Skip to content

Commit a0b573e

Browse files
committed
perf: use borrowing/carrying ops in add/sub, remove bound checks in shifts
1 parent 41c45f8 commit a0b573e

File tree

6 files changed

+64
-27
lines changed

6 files changed

+64
-27
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
## [Unreleased]
1111

12+
### Changed
13+
14+
- Use borrowing/carrying ops in add/sub, remove bound checks in shifts ([#366])
15+
1216
### Fixed
1317

14-
- add `alloc` requirement to `num-traits` feature [#363]
18+
- Add `alloc` requirement to `num-traits` feature [#363]
1519

1620
[#363]: https://github.com/recmo/uint/pull/363
21+
[#366]: https://github.com/recmo/uint/pull/366
1722

1823
## [1.12.1] - 2024-03-12
1924

src/add.rs

+22-13
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,24 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
5656
#[inline]
5757
#[must_use]
5858
pub const fn overflowing_add(mut self, rhs: Self) -> (Self, bool) {
59+
// TODO: Replace with `u64::carrying_add` once stable.
60+
#[inline]
61+
const fn u64_carrying_add(lhs: u64, rhs: u64, carry: bool) -> (u64, bool) {
62+
let (a, b) = lhs.overflowing_add(rhs);
63+
let (c, d) = a.overflowing_add(carry as u64);
64+
(c, b || d)
65+
}
66+
5967
if BITS == 0 {
6068
return (Self::ZERO, false);
6169
}
62-
let mut carry = 0_u128;
70+
let mut carry = false;
6371
let mut i = 0;
64-
#[allow(clippy::cast_possible_truncation)] // Intentional
6572
while i < LIMBS {
66-
carry += self.limbs[i] as u128 + rhs.limbs[i] as u128;
67-
self.limbs[i] = carry as u64;
68-
carry >>= 64;
73+
(self.limbs[i], carry) = u64_carrying_add(self.limbs[i], rhs.limbs[i], carry);
6974
i += 1;
7075
}
71-
let overflow = carry != 0 || self.limbs[LIMBS - 1] > Self::MASK;
76+
let overflow = carry || self.limbs[LIMBS - 1] > Self::MASK;
7277
self.limbs[LIMBS - 1] &= Self::MASK;
7378
(self, overflow)
7479
}
@@ -93,20 +98,24 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
9398
#[inline]
9499
#[must_use]
95100
pub const fn overflowing_sub(mut self, rhs: Self) -> (Self, bool) {
101+
// TODO: Replace with `u64::borrowing_sub` once stable.
102+
#[inline]
103+
const fn u64_borrowing_sub(lhs: u64, rhs: u64, borrow: bool) -> (u64, bool) {
104+
let (a, b) = lhs.overflowing_sub(rhs);
105+
let (c, d) = a.overflowing_sub(borrow as u64);
106+
(c, b || d)
107+
}
108+
96109
if BITS == 0 {
97110
return (Self::ZERO, false);
98111
}
99-
let mut carry = 0_i128;
112+
let mut borrow = false;
100113
let mut i = 0;
101-
#[allow(clippy::cast_possible_truncation)] // Intentional
102-
#[allow(clippy::cast_sign_loss)] // Intentional
103114
while i < LIMBS {
104-
carry += self.limbs[i] as i128 - rhs.limbs[i] as i128;
105-
self.limbs[i] = carry as u64;
106-
carry >>= 64;
115+
(self.limbs[i], borrow) = u64_borrowing_sub(self.limbs[i], rhs.limbs[i], borrow);
107116
i += 1;
108117
}
109-
let overflow = carry != 0 || self.limbs[LIMBS - 1] > Self::MASK;
118+
let overflow = borrow || self.limbs[LIMBS - 1] > Self::MASK;
110119
self.limbs[LIMBS - 1] &= Self::MASK;
111120
(self, overflow)
112121
}

src/algorithms/div/knuth.rs

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::{
55
algorithms::{add::adc_n, mul::submul_nx1},
66
utils::{likely, unlikely},
77
};
8-
use core::u64;
98

109
/// ⚠️ In-place Knuth normalized long division with reciprocals.
1110
///

src/bits.rs

+7-12
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,10 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
271271

272272
// Shift
273273
for i in (limbs..LIMBS).rev() {
274+
assume!(i >= limbs && i - limbs < LIMBS);
274275
self.limbs[i] = self.limbs[i - limbs];
275276
}
276-
for i in 0..limbs {
277-
self.limbs[i] = 0;
278-
}
277+
self.limbs[..limbs].fill(0);
279278
self.limbs[LIMBS - 1] &= Self::MASK;
280279
return (self, overflow);
281280
}
@@ -294,13 +293,12 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
294293

295294
// Shift
296295
for i in (limbs + 1..LIMBS).rev() {
296+
assume!(i >= limbs + 1 && i - limbs < LIMBS && i - limbs - 1 < LIMBS);
297297
self.limbs[i] = self.limbs[i - limbs] << bits;
298298
self.limbs[i] |= self.limbs[i - limbs - 1] >> (64 - bits);
299299
}
300300
self.limbs[limbs] = self.limbs[0] << bits;
301-
for i in 0..limbs {
302-
self.limbs[i] = 0;
303-
}
301+
self.limbs[..limbs].fill(0);
304302
self.limbs[LIMBS - 1] &= Self::MASK;
305303
(self, overflow)
306304
}
@@ -367,9 +365,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
367365
for i in 0..(LIMBS - limbs) {
368366
self.limbs[i] = self.limbs[i + limbs];
369367
}
370-
for i in (LIMBS - limbs)..LIMBS {
371-
self.limbs[i] = 0;
372-
}
368+
self.limbs[LIMBS - limbs..].fill(0);
373369
return (self, overflow);
374370
}
375371

@@ -378,13 +374,12 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
378374

379375
// Shift
380376
for i in 0..(LIMBS - limbs - 1) {
377+
assume!(i + limbs < LIMBS && i + limbs + 1 < LIMBS);
381378
self.limbs[i] = self.limbs[i + limbs] >> bits;
382379
self.limbs[i] |= self.limbs[i + limbs + 1] << (64 - bits);
383380
}
384381
self.limbs[LIMBS - limbs - 1] = self.limbs[LIMBS - 1] >> bits;
385-
for i in (LIMBS - limbs)..LIMBS {
386-
self.limbs[i] = 0;
387-
}
382+
self.limbs[LIMBS - limbs..].fill(0);
388383
(self, overflow)
389384
}
390385

src/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ pub use self::{
7979
#[doc(inline)]
8080
pub use ruint_macro::uint;
8181

82+
#[inline(never)]
83+
pub fn u256_binop(a: &mut aliases::U256, b: usize) {
84+
*a = a.wrapping_shr(b);
85+
}
86+
8287
/// Extra features that are nightly only.
8388
#[cfg(feature = "generic_const_exprs")]
8489
pub mod nightly {

src/macros.rs

+24
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,30 @@ macro_rules! impl_bin_op {
7575
};
7676
}
7777

78+
macro_rules! assume {
79+
($e:expr $(,)?) => {
80+
if !$e {
81+
debug_unreachable!(stringify!($e));
82+
}
83+
};
84+
85+
($e:expr, $($t:tt)+) => {
86+
if !$e {
87+
debug_unreachable!($($t)+);
88+
}
89+
};
90+
}
91+
92+
macro_rules! debug_unreachable {
93+
($($t:tt)*) => {
94+
if cfg!(debug_assertions) {
95+
unreachable!($($t)*);
96+
} else {
97+
unsafe { core::hint::unreachable_unchecked() };
98+
}
99+
};
100+
}
101+
78102
#[cfg(test)]
79103
mod tests {
80104
// https://github.com/recmo/uint/issues/359

0 commit comments

Comments
 (0)