aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJosh Bleecher Snyder <josharian@gmail.com>2019-03-03 13:08:40 -0800
committerJosh Bleecher Snyder <josharian@gmail.com>2019-03-04 19:30:57 +0000
commit87cc56718adb43340b24fcf9e5f14d87c028ce88 (patch)
treea5dd02624e6f05888102b82fc173e09273775d2b /src
parentabf8e355a8fe4b77009cb55f6bef11f74e6ade03 (diff)
downloadgo-87cc56718adb43340b24fcf9e5f14d87c028ce88.tar.xz
math/big: optimize shlVU_g and shrVU_g
Special case shifts by zero. Provide hints to the compiler that shifts are bounded. There are no existing benchmarks for shifts, but the Float implementation uses shifts, so we can use those. Benchmarks on amd64 with -tags=math_big_pure_go. name old time/op new time/op delta FloatString/100-8 869ns ± 3% 872ns ± 4% +0.40% (p=0.001 n=94+83) FloatString/1000-8 26.5µs ± 1% 26.4µs ± 1% -0.46% (p=0.000 n=87+96) FloatString/10000-8 2.18ms ± 2% 2.18ms ± 2% ~ (p=0.687 n=90+89) FloatString/100000-8 200ms ± 7% 197ms ± 5% -1.47% (p=0.000 n=100+90) FloatAdd/10-8 65.9ns ± 4% 64.0ns ± 4% -2.94% (p=0.000 n=92+93) FloatAdd/100-8 71.3ns ± 4% 67.4ns ± 4% -5.51% (p=0.000 n=96+93) FloatAdd/1000-8 128ns ± 1% 121ns ± 0% -5.69% (p=0.000 n=91+80) FloatAdd/10000-8 718ns ± 4% 626ns ± 4% -12.83% (p=0.000 n=99+99) FloatAdd/100000-8 6.43µs ± 3% 5.50µs ± 1% -14.50% (p=0.000 n=98+83) FloatSub/10-8 57.7ns ± 2% 57.0ns ± 4% -1.20% (p=0.000 n=89+96) FloatSub/100-8 59.9ns ± 3% 58.7ns ± 4% -2.10% (p=0.000 n=100+98) FloatSub/1000-8 94.5ns ± 1% 88.6ns ± 0% -6.16% (p=0.000 n=74+70) FloatSub/10000-8 456ns ± 1% 416ns ± 5% -8.83% (p=0.000 n=87+95) FloatSub/100000-8 4.00µs ± 1% 3.57µs ± 1% -10.87% (p=0.000 n=68+85) FloatSqrt/64-8 585ns ± 1% 579ns ± 1% -0.99% (p=0.000 n=92+90) FloatSqrt/128-8 1.26µs ± 1% 1.23µs ± 2% -2.42% (p=0.000 n=91+81) FloatSqrt/256-8 1.45µs ± 3% 1.40µs ± 1% -3.61% (p=0.000 n=96+90) FloatSqrt/1000-8 4.03µs ± 1% 3.91µs ± 1% -3.05% (p=0.000 n=90+93) FloatSqrt/10000-8 48.0µs ± 0% 47.3µs ± 1% -1.55% (p=0.000 n=90+90) FloatSqrt/100000-8 1.23ms ± 3% 1.22ms ± 4% -1.00% (p=0.000 n=99+99) FloatSqrt/1000000-8 96.7ms ± 4% 98.0ms ±10% ~ (p=0.322 n=89+99) Change-Id: I0f941c05b7c324256d7f0674559b6ba906e92ba8 Reviewed-on: https://go-review.googlesource.com/c/go/+/164967 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/math/big/arith.go54
1 files changed, 34 insertions, 20 deletions
diff --git a/src/math/big/arith.go b/src/math/big/arith.go
index f9db9118eb..611193ef18 100644
--- a/src/math/big/arith.go
+++ b/src/math/big/arith.go
@@ -204,32 +204,46 @@ func subVW_g(z, x []Word, y Word) (c Word) {
}
func shlVU_g(z, x []Word, s uint) (c Word) {
- if n := len(z); n > 0 {
- ŝ := _W - s
- w1 := x[n-1]
- c = w1 >> ŝ
- for i := n - 1; i > 0; i-- {
- w := w1
- w1 = x[i-1]
- z[i] = w<<s | w1>>ŝ
- }
- z[0] = w1 << s
+ if s == 0 {
+ copy(z, x)
+ return
+ }
+ if len(z) == 0 {
+ return
+ }
+ s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
+ ŝ := _W - s
+ ŝ &= _W - 1 // ditto
+ w1 := x[len(z)-1]
+ c = w1 >> ŝ
+ for i := len(z) - 1; i > 0; i-- {
+ w := w1
+ w1 = x[i-1]
+ z[i] = w<<s | w1>>ŝ
}
+ z[0] = w1 << s
return
}
func shrVU_g(z, x []Word, s uint) (c Word) {
- if n := len(z); n > 0 {
- ŝ := _W - s
- w1 := x[0]
- c = w1 << ŝ
- for i := 0; i < n-1; i++ {
- w := w1
- w1 = x[i+1]
- z[i] = w>>s | w1<<ŝ
- }
- z[n-1] = w1 >> s
+ if s == 0 {
+ copy(z, x)
+ return
+ }
+ if len(z) == 0 {
+ return
+ }
+ s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
+ ŝ := _W - s
+ ŝ &= _W - 1 // ditto
+ w1 := x[0]
+ c = w1 << ŝ
+ for i := 0; i < len(z)-1; i++ {
+ w := w1
+ w1 = x[i+1]
+ z[i] = w>>s | w1<<ŝ
}
+ z[len(z)-1] = w1 >> s
return
}