@@ -8,6 +8,8 @@ use core::ops;
88use super :: { DInt , HInt , Int , MinInt } ;
99
1010const U128_LO_MASK : u128 = u64:: MAX as u128 ;
11+ const U128_WORDS : usize = ( u128:: BITS / usize:: BITS ) as usize ;
12+ const U256_WORDS : usize = U128_WORDS * 2 ;
1113
1214/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
1315#[ allow( non_camel_case_types) ]
@@ -31,6 +33,26 @@ impl u256 {
3133 hi : self . hi as i128 ,
3234 }
3335 }
36+
37+ fn to_words ( self ) -> [ usize ; U256_WORDS ] {
38+ let mut ret = [ 0usize ; U256_WORDS ] ;
39+ for i in 0 ..U128_WORDS {
40+ let shift = i as u32 * usize:: BITS ;
41+ ret[ i] = ( self . lo >> shift) as usize ;
42+ ret[ i + U128_WORDS ] = ( self . hi >> shift) as usize ;
43+ }
44+ ret
45+ }
46+
47+ fn from_words ( words : [ usize ; U256_WORDS ] ) -> Self {
48+ let mut ret = u256:: ZERO ;
49+ for i in 0 ..U128_WORDS {
50+ let shift = i as u32 * usize:: BITS ;
51+ ret. lo |= ( words[ i] as u128 ) << shift;
52+ ret. hi |= ( words[ i + U128_WORDS ] as u128 ) << shift;
53+ }
54+ ret
55+ }
3456}
3557
3658/// A 256-bit signed integer represented as two 128-bit native-endian limbs.
@@ -156,34 +178,86 @@ macro_rules! impl_common {
156178 self
157179 }
158180 }
181+ } ;
182+ }
159183
160- impl ops:: Shr <u32 > for $ty {
161- type Output = Self ;
184+ impl ops:: Shr < u32 > for i256 {
185+ type Output = Self ;
162186
163- fn shr( mut self , rhs: u32 ) -> Self :: Output {
164- debug_assert!( rhs < Self :: BITS , "attempt to shift right with overflow" ) ;
187+ fn shr ( mut self , rhs : u32 ) -> Self :: Output {
188+ debug_assert ! ( rhs < Self :: BITS , "attempt to shift right with overflow" ) ;
165189
166- let half_bits = Self :: BITS / 2 ;
167- let low_mask = half_bits - 1 ;
168- let s = rhs & low_mask;
190+ let half_bits = Self :: BITS / 2 ;
191+ let low_mask = half_bits - 1 ;
192+ let s = rhs & low_mask;
169193
170- let lo = self . lo;
171- let hi = self . hi;
194+ let lo = self . lo ;
195+ let hi = self . hi ;
172196
173- self . hi = hi >> s;
197+ self . hi = hi >> s;
174198
175- #[ allow( unused_comparisons) ]
176- if rhs & half_bits == 0 {
177- self . lo = ( hi << ( low_mask ^ s) << 1 ) as _;
178- self . lo |= lo >> s;
179- } else {
180- self . lo = self . hi as _;
181- self . hi = if hi < 0 { !0 } else { 0 } ;
199+ #[ allow( unused_comparisons) ]
200+ if rhs & half_bits == 0 {
201+ self . lo = ( hi << ( low_mask ^ s) << 1 ) as _ ;
202+ self . lo |= lo >> s;
203+ } else {
204+ self . lo = self . hi as _ ;
205+ self . hi = if hi < 0 { !0 } else { 0 } ;
206+ }
207+ self
208+ }
209+ }
210+
211+ impl ops:: Shr < u32 > for u256 {
212+ type Output = Self ;
213+
214+ fn shr ( self , rhs : u32 ) -> Self :: Output {
215+ debug_assert ! ( rhs < Self :: BITS , "attempt to shift right with overflow" ) ;
216+
217+ // Set up an array with the input in the low half, zeros in the upper half
218+ let mut words = [ 0usize ; U256_WORDS * 2 ] ;
219+ words[ ..U256_WORDS ] . copy_from_slice ( & self . to_words ( ) ) ;
220+
221+ let shift = rhs & 255 ; // limit to 255 in cases of overflow
222+ let word_shift = ( shift / usize:: BITS ) as usize ;
223+ let bit_shift = shift % usize:: BITS ;
224+
225+ let mut ret = [ 0usize ; U256_WORDS ] ;
226+
227+ // Each output word is a coarse (word-sized) shift plus a small bit shift. Note that
228+ // these loops get unrolled.
229+ cfg_if ! {
230+ if #[ cfg( intrinsics_enabled) ] {
231+ // Use funnel shifts if available to handle the two-word input, which
232+ // can be a single instruction (`shrd` on x86).
233+ for i in 0 ..U256_WORDS {
234+ if i < ( U256_WORDS - 1 ) {
235+ let hi = words[ word_shift + i + 1 ] ;
236+ let lo = words[ word_shift + i] ;
237+ ret[ i] = hi. funnel_shr( lo, bit_shift) ;
238+ } else {
239+ ret[ i] = words[ word_shift + i] >> bit_shift
240+ }
182241 }
183- self
242+ } else {
243+ // Otherwise, perform the narrowing shift as a combined left and right shift.
244+ // This doesn't get optimized quite as well.
245+ for i in 0 ..U256_WORDS {
246+ ret[ i] = words[ word_shift + i] >> bit_shift;
247+ }
248+
249+ if bit_shift != 0 {
250+ let lshift = usize :: BITS - bit_shift as u32 ;
251+ for i in 0 ..( U256_WORDS - 1 ) {
252+ ret[ i] |= words[ word_shift + i + 1 ] << lshift;
253+ }
254+ }
255+
184256 }
185257 }
186- } ;
258+
259+ u256:: from_words ( ret)
260+ }
187261}
188262
189263impl_common ! ( i256) ;
0 commit comments