aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIlya Tocar <ilya.tocar@intel.com>2018-03-23 15:45:03 -0500
committerIlya Tocar <ilya.tocar@intel.com>2018-04-13 21:33:52 +0000
commite53cc7ae931eeec0863d066d98972fc52fcd09f0 (patch)
tree31d6a5753e68fb262a9dc4a7c4b04e808b291dfb /src
parent70d83eda99d0687ba07348ca773ee8bf3e9bbfa9 (diff)
downloadgo-e53cc7ae931eeec0863d066d98972fc52fcd09f0.tar.xz
runtime: avoid division in growslice
Add a special case for power-of-2 sized elements. We can replace div/mul with left/right shift and avoid expensive operation. growslice is hotter for short slices of small elements, such as int16, so add an int16 version for GrowSlice benchmark. name old time/op new time/op delta GrowSlice/Byte-6 61.3ns ± 3% 60.5ns ± 4% -1.33% (p=0.002 n=30+30) GrowSlice/Int16-6 94.0ns ± 4% 84.7ns ± 2% -9.82% (p=0.000 n=30+30) GrowSlice/Int-6 100ns ± 1% 99ns ± 1% -0.25% (p=0.032 n=29+28) GrowSlice/Ptr-6 197ns ± 2% 195ns ± 2% -0.94% (p=0.001 n=30+29) GrowSlice/Struct/24-6 168ns ± 1% 166ns ± 2% -1.09% (p=0.000 n=25+30) GrowSlice/Struct/32-6 187ns ± 2% 180ns ± 1% -3.59% (p=0.000 n=30+30) GrowSlice/Struct/40-6 241ns ± 2% 238ns ± 2% -1.41% (p=0.000 n=30+30) Change-Id: I31e8388d73fd9356e2dcc091d8d92eef3e3ccdbc Reviewed-on: https://go-review.googlesource.com/102279 Run-TryBot: Ilya Tocar <ilya.tocar@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/runtime/slice.go28
-rw-r--r--src/runtime/slice_test.go6
2 files changed, 31 insertions, 3 deletions
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 9f35a89400..0924d1d8e6 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "runtime/internal/sys"
"unsafe"
)
@@ -128,19 +129,36 @@ func growslice(et *_type, old slice, cap int) slice {
var overflow bool
var lenmem, newlenmem, capmem uintptr
const ptrSize = unsafe.Sizeof((*byte)(nil))
- switch et.size {
- case 1:
+ // Specialize for common values of et.size.
+ // For 1 we don't need any division/multiplication.
+ // For ptrSize, compiler will optimize division/multiplication into a shift by a constant.
+ // For powers of 2, use a variable shift.
+ switch {
+ case et.size == 1:
lenmem = uintptr(old.len)
newlenmem = uintptr(cap)
capmem = roundupsize(uintptr(newcap))
overflow = uintptr(newcap) > maxAlloc
newcap = int(capmem)
- case ptrSize:
+ case et.size == ptrSize:
lenmem = uintptr(old.len) * ptrSize
newlenmem = uintptr(cap) * ptrSize
capmem = roundupsize(uintptr(newcap) * ptrSize)
overflow = uintptr(newcap) > maxAlloc/ptrSize
newcap = int(capmem / ptrSize)
+ case isPowerOfTwo(et.size):
+ var shift uintptr
+ if ptrSize == 8 {
+ // Mask shift for better code generation.
+ shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
+ } else {
+ shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
+ }
+ lenmem = uintptr(old.len) << shift
+ newlenmem = uintptr(cap) << shift
+ capmem = roundupsize(uintptr(newcap) << shift)
+ overflow = uintptr(newcap) > (maxAlloc >> shift)
+ newcap = int(capmem >> shift)
default:
lenmem = uintptr(old.len) * et.size
newlenmem = uintptr(cap) * et.size
@@ -189,6 +207,10 @@ func growslice(et *_type, old slice, cap int) slice {
return slice{p, old.len, newcap}
}
+func isPowerOfTwo(x uintptr) bool {
+ return x&(x-1) == 0
+}
+
func slicecopy(to, fm slice, width uintptr) int {
if fm.len == 0 || to.len == 0 {
return 0
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index ef1e812c0d..46db071ebe 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -31,6 +31,12 @@ func BenchmarkGrowSlice(b *testing.B) {
_ = append([]byte(nil), x...)
}
})
+ b.Run("Int16", func(b *testing.B) {
+ x := make([]int16, 9)
+ for i := 0; i < b.N; i++ {
+ _ = append([]int16(nil), x...)
+ }
+ })
b.Run("Int", func(b *testing.B) {
x := make([]int, 9)
for i := 0; i < b.N; i++ {