diff options
| author | Rick Hudson <rlh@golang.org> | 2015-04-15 17:08:58 -0400 |
|---|---|---|
| committer | Rick Hudson <rlh@golang.org> | 2015-04-20 21:39:06 +0000 |
| commit | 899a4ad47e452ede041fdb99204575a407dd94f2 (patch) | |
| tree | 298cb71b818c4674b0aff88727012f3efbc9ffa6 /src/runtime/mbitmap.go | |
| parent | e7ffafdb6e76f62382d47c1bd21626ec7dae4594 (diff) | |
| download | go-899a4ad47e452ede041fdb99204575a407dd94f2.tar.xz | |
runtime: Speed up heapBitsForObject
Optimized heapBitsForObject by special casing
objects whose size is a power of two. When a
span holding such objects is initialized I
added a mask that when &ed with an interior pointer
results in the base of the pointer. For the garbage
benchmark this resulted in CPU_CLK_UNHALTED in
heapBitsForObject going from 7.7% down to 5.9%
of the total, INST_RETIRED went from 12.2 -> 8.7.
Here are the benchmarks that were at lease plus or minus 1%.
benchmark old ns/op new ns/op delta
BenchmarkFmtFprintfString 249 221 -11.24%
BenchmarkFmtFprintfInt 247 223 -9.72%
BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02%
BenchmarkBinaryTree17 4106631412 3744550160 -8.82%
BenchmarkFmtFprintfFloat 424 399 -5.90%
BenchmarkGoParse 4484421 4242115 -5.40%
BenchmarkGobEncode 8803668 8449107 -4.03%
BenchmarkFmtManyArgs 1494 1436 -3.88%
BenchmarkGobDecode 10431051 10032606 -3.82%
BenchmarkFannkuch11 2591306713 2517400464 -2.85%
BenchmarkTimeParse 361 371 +2.77%
BenchmarkJSONDecode 70620492 68830357 -2.53%
BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47%
BenchmarkTemplate 90008879 91929940 +2.13%
BenchmarkTimeFormat 380 387 +1.84%
BenchmarkRegexpMatchEasy1_32 111 113 +1.80%
BenchmarkJSONEncode 21359159 21007583 -1.65%
BenchmarkRegexpMatchEasy1_1K 603 613 +1.66%
BenchmarkRegexpMatchEasy0_32 127 129 +1.57%
BenchmarkFmtFprintfIntInt 399 393 -1.50%
BenchmarkRegexpMatchEasy0_1K 373 378 +1.34%
Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29
Reviewed-on: https://go-review.googlesource.com/8980
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/mbitmap.go')
| -rw-r--r-- | src/runtime/mbitmap.go | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 5dad2a0782..f0704bdb5d 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -154,17 +154,16 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) { // return base == 0 // otherwise return the base of the object. func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) { - if p < mheap_.arena_start || p >= mheap_.arena_used { + arenaStart := mheap_.arena_start + if p < arenaStart || p >= mheap_.arena_used { return } - + off := p - arenaStart + idx := off >> _PageShift // p points into the heap, but possibly to the middle of an object. // Consult the span table to find the block beginning. - // TODO(rsc): Factor this out. k := p >> _PageShift - x := k - x -= mheap_.arena_start >> _PageShift - s = h_spans[x] + s = h_spans[idx] if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse { if s == nil || s.state == _MSpanStack { // If s is nil, the virtual address has never been part of the heap. @@ -188,23 +187,23 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) { printunlock() throw("objectstart: bad pointer in unexpected span") } - return } - base = s.base() - if p-base >= s.elemsize { - // n := (p - base) / s.elemsize, using division by multiplication - n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2) - - const debugMagic = false - if debugMagic { - n2 := (p - base) / s.elemsize - if n != n2 { - println("runtime: bad div magic", (p - base), s.elemsize, s.divShift, s.divMul, s.divShift2) - throw("bad div magic") - } + // If this span holds object of a power of 2 size, just mask off the bits to + // the interior of the object. Otherwise use the size to get the base. + if s.baseMask != 0 { + // optimize for power of 2 sized objects. + base = s.base() + base = base + (p-base)&s.baseMask + // base = p & s.baseMask is faster for small spans, + // but doesn't work for large spans. + // Overall, it's faster to use the more general computation above. + } else { + base = s.base() + if p-base >= s.elemsize { + // n := (p - base) / s.elemsize, using division by multiplication + n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2) + base += n * s.elemsize } - - base += n * s.elemsize } // Now that we know the actual base, compute heapBits to return to caller. hbits = heapBitsForAddr(base) |
