Skip to content

Commit 759376f

Browse files
committed
Auto merge of #147250 - Kmeakin:km/optimize-ilog-base-power-of-two, r=<try>
Optimize `checked_ilog` and `pow` when `base` is a power of two
2 parents 4f84d9f + e845f15 commit 759376f

9 files changed

Lines changed: 649 additions & 251 deletions

File tree

library/core/src/num/imp/overflow_panic.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ pub(in crate::num) const fn shl() -> ! {
5050
panic!("attempt to shift left with overflow")
5151
}
5252

53+
#[cold]
54+
#[track_caller]
55+
pub(in crate::num) const fn pow() -> ! {
56+
panic!("attempt to exponentiate with overflow")
57+
}
58+
5359
#[cold]
5460
#[track_caller]
5561
pub(in crate::num) const fn cast_integer() -> ! {

library/core/src/num/int_macros.rs

Lines changed: 80 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,11 +1829,38 @@ macro_rules! int_impl {
18291829
without modifying the original"]
18301830
#[inline]
18311831
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
1832+
let mut base = self;
1833+
let mut acc: Self = 1;
1834+
1835+
if intrinsics::is_val_statically_known(base) && base.unsigned_abs().is_power_of_two() {
1836+
let k = base.unsigned_abs().ilog2();
1837+
let shift = try_opt!(k.checked_mul(exp));
1838+
return if base < 0 && (exp % 2) == 1 {
1839+
(-1 as Self).shl_exact(shift)
1840+
} else {
1841+
(1 as Self).shl_exact(shift)
1842+
}
1843+
}
1844+
18321845
if exp == 0 {
18331846
return Some(1);
18341847
}
1835-
let mut base = self;
1836-
let mut acc: Self = 1;
1848+
1849+
if intrinsics::is_val_statically_known(exp) {
1850+
while exp > 1 {
1851+
if (exp & 1) == 1 {
1852+
acc = try_opt!(acc.checked_mul(base));
1853+
}
1854+
exp /= 2;
1855+
base = try_opt!(base.checked_mul(base));
1856+
}
1857+
1858+
// since exp!=0, finally the exp must be 1.
1859+
// Deal with the final bit of the exponent separately, since
1860+
// squaring the base afterwards is not necessary and may cause a
1861+
// needless overflow.
1862+
return acc.checked_mul(base);
1863+
}
18371864

18381865
loop {
18391866
if (exp & 1) == 1 {
@@ -1875,23 +1902,10 @@ macro_rules! int_impl {
18751902
without modifying the original"]
18761903
#[inline]
18771904
#[track_caller]
1878-
pub const fn strict_pow(self, mut exp: u32) -> Self {
1879-
if exp == 0 {
1880-
return 1;
1881-
}
1882-
let mut base = self;
1883-
let mut acc: Self = 1;
1884-
1885-
loop {
1886-
if (exp & 1) == 1 {
1887-
acc = acc.strict_mul(base);
1888-
// since exp!=0, finally the exp must be 1.
1889-
if exp == 1 {
1890-
return acc;
1891-
}
1892-
}
1893-
exp /= 2;
1894-
base = base.strict_mul(base);
1905+
pub const fn strict_pow(self, exp: u32) -> Self {
1906+
match self.checked_pow(exp) {
1907+
Some(x) => x,
1908+
None => imp::overflow_panic::pow(),
18951909
}
18961910
}
18971911

@@ -2503,43 +2517,9 @@ macro_rules! int_impl {
25032517
#[must_use = "this returns the result of the operation, \
25042518
without modifying the original"]
25052519
#[inline]
2506-
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
2507-
if exp == 0 {
2508-
return 1;
2509-
}
2510-
let mut base = self;
2511-
let mut acc: Self = 1;
2512-
2513-
if intrinsics::is_val_statically_known(exp) {
2514-
while exp > 1 {
2515-
if (exp & 1) == 1 {
2516-
acc = acc.wrapping_mul(base);
2517-
}
2518-
exp /= 2;
2519-
base = base.wrapping_mul(base);
2520-
}
2521-
2522-
// since exp!=0, finally the exp must be 1.
2523-
// Deal with the final bit of the exponent separately, since
2524-
// squaring the base afterwards is not necessary.
2525-
acc.wrapping_mul(base)
2526-
} else {
2527-
// This is faster than the above when the exponent is not known
2528-
// at compile time. We can't use the same code for the constant
2529-
// exponent case because LLVM is currently unable to unroll
2530-
// this loop.
2531-
loop {
2532-
if (exp & 1) == 1 {
2533-
acc = acc.wrapping_mul(base);
2534-
// since exp!=0, finally the exp must be 1.
2535-
if exp == 1 {
2536-
return acc;
2537-
}
2538-
}
2539-
exp /= 2;
2540-
base = base.wrapping_mul(base);
2541-
}
2542-
}
2520+
pub const fn wrapping_pow(self, exp: u32) -> Self {
2521+
let (a, _) = self.overflowing_pow(exp);
2522+
a
25432523
}
25442524

25452525
/// Calculates `self` + `rhs`.
@@ -3075,30 +3055,56 @@ macro_rules! int_impl {
30753055
without modifying the original"]
30763056
#[inline]
30773057
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
3078-
if exp == 0 {
3079-
return (1,false);
3080-
}
30813058
let mut base = self;
30823059
let mut acc: Self = 1;
3083-
let mut overflown = false;
3084-
// Scratch space for storing results of overflowing_mul.
3085-
let mut r;
3060+
let mut overflow = false;
3061+
let mut tmp_overflow;
3062+
3063+
if intrinsics::is_val_statically_known(base) && base.unsigned_abs().is_power_of_two() {
3064+
let k = base.unsigned_abs().ilog2();
3065+
let Some(shift) = k.checked_mul(exp) else {
3066+
return (0, true)
3067+
};
3068+
let base: Self = if base < 0 && (exp % 2) != 0 { -1 } else { 1 };
3069+
return (base.unbounded_shl(shift), base.shl_exact(shift).is_none());
3070+
}
3071+
3072+
if exp == 0 {
3073+
return (1, false);
3074+
}
3075+
3076+
if intrinsics::is_val_statically_known(exp) {
3077+
while exp > 1 {
3078+
if (exp & 1) == 1 {
3079+
(acc, tmp_overflow) = acc.overflowing_mul(base);
3080+
overflow |= tmp_overflow;
3081+
}
3082+
exp /= 2;
3083+
(base, tmp_overflow) = base.overflowing_mul(base);
3084+
overflow |= tmp_overflow;
3085+
}
3086+
3087+
// since exp!=0, finally the exp must be 1.
3088+
// Deal with the final bit of the exponent separately, since
3089+
// squaring the base afterwards is not necessary and may cause a
3090+
// needless overflow.
3091+
(acc, tmp_overflow) = acc.overflowing_mul(base);
3092+
overflow |= tmp_overflow;
3093+
return (acc, overflow);
3094+
}
30863095

30873096
loop {
30883097
if (exp & 1) == 1 {
3089-
r = acc.overflowing_mul(base);
3098+
(acc, tmp_overflow) = acc.overflowing_mul(base);
3099+
overflow |= tmp_overflow;
30903100
// since exp!=0, finally the exp must be 1.
30913101
if exp == 1 {
3092-
r.1 |= overflown;
3093-
return r;
3102+
return (acc, overflow);
30943103
}
3095-
acc = r.0;
3096-
overflown |= r.1;
30973104
}
30983105
exp /= 2;
3099-
r = base.overflowing_mul(base);
3100-
base = r.0;
3101-
overflown |= r.1;
3106+
(base, tmp_overflow) = base.overflowing_mul(base);
3107+
overflow |= tmp_overflow;
31023108
}
31033109
}
31043110

@@ -3118,43 +3124,11 @@ macro_rules! int_impl {
31183124
without modifying the original"]
31193125
#[inline]
31203126
#[rustc_inherit_overflow_checks]
3121-
pub const fn pow(self, mut exp: u32) -> Self {
3122-
if exp == 0 {
3123-
return 1;
3124-
}
3125-
let mut base = self;
3126-
let mut acc = 1;
3127-
3128-
if intrinsics::is_val_statically_known(exp) {
3129-
while exp > 1 {
3130-
if (exp & 1) == 1 {
3131-
acc = acc * base;
3132-
}
3133-
exp /= 2;
3134-
base = base * base;
3135-
}
3136-
3137-
// since exp!=0, finally the exp must be 1.
3138-
// Deal with the final bit of the exponent separately, since
3139-
// squaring the base afterwards is not necessary and may cause a
3140-
// needless overflow.
3141-
acc * base
3127+
pub const fn pow(self, exp: u32) -> Self {
3128+
if intrinsics::overflow_checks() {
3129+
self.strict_pow(exp)
31423130
} else {
3143-
// This is faster than the above when the exponent is not known
3144-
// at compile time. We can't use the same code for the constant
3145-
// exponent case because LLVM is currently unable to unroll
3146-
// this loop.
3147-
loop {
3148-
if (exp & 1) == 1 {
3149-
acc = acc * base;
3150-
// since exp!=0, finally the exp must be 1.
3151-
if exp == 1 {
3152-
return acc;
3153-
}
3154-
}
3155-
exp /= 2;
3156-
base = base * base;
3157-
}
3131+
self.wrapping_pow(exp)
31583132
}
31593133
}
31603134

0 commit comments

Comments
 (0)