runtime: Speed up heapBitsForObject

Optimized heapBitsForObject by special casing objects whose size is a power of two. When a span holding such objects is initialized I added a mask that when &ed with an interior pointer results in the base of the pointer. For the garbage benchmark this resulted in CPU_CLK_UNHALTED in heapBitsForObject going from 7.7% down to 5.9% of the total, INST_RETIRED went from 12.2 -> 8.7. Here are the benchmarks that were at lease plus or minus 1%. benchmark old ns/op new ns/op delta BenchmarkFmtFprintfString 249 221 -11.24% BenchmarkFmtFprintfInt 247 223 -9.72% BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02% BenchmarkBinaryTree17 4106631412 3744550160 -8.82% BenchmarkFmtFprintfFloat 424 399 -5.90% BenchmarkGoParse 4484421 4242115 -5.40% BenchmarkGobEncode 8803668 8449107 -4.03% BenchmarkFmtManyArgs 1494 1436 -3.88% BenchmarkGobDecode 10431051 10032606 -3.82% BenchmarkFannkuch11 2591306713 2517400464 -2.85% BenchmarkTimeParse 361 371 +2.77% BenchmarkJSONDecode 70620492 68830357 -2.53% BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47% BenchmarkTemplate 90008879 91929940 +2.13% BenchmarkTimeFormat 380 387 +1.84% BenchmarkRegexpMatchEasy1_32 111 113 +1.80% BenchmarkJSONEncode 21359159 21007583 -1.65% BenchmarkRegexpMatchEasy1_1K 603 613 +1.66% BenchmarkRegexpMatchEasy0_32 127 129 +1.57% BenchmarkFmtFprintfIntInt 399 393 -1.50% BenchmarkRegexpMatchEasy0_1K 373 378 +1.34% Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29 Reviewed-on: https://go-review.googlesource.com/8980 Reviewed-by: Austin Clements <austin@google.com>
author: Rick Hudson <rlh@golang.org> 2015-04-15 17:08:58 -0400
committer: Rick Hudson <rlh@golang.org> 2015-04-20 21:39:06 +0000
commit: 899a4ad47e452ede041fdb99204575a407dd94f2 (patch)
tree: 298cb71b818c4674b0aff88727012f3efbc9ffa6 /src/runtime/mbitmap.go
parent: e7ffafdb6e76f62382d47c1bd21626ec7dae4594 (diff)
download: go-899a4ad47e452ede041fdb99204575a407dd94f2.tar.xz
1 files changed, 20 insertions, 21 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 5dad2a0782..f0704bdb5d 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -154,17 +154,16 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
 // return base == 0
 // otherwise return the base of the object.
 func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
-	if p < mheap_.arena_start || p >= mheap_.arena_used {
+	arenaStart := mheap_.arena_start
+	if p < arenaStart || p >= mheap_.arena_used {
 		return
 	}
-
+	off := p - arenaStart
+	idx := off >> _PageShift
 	// p points into the heap, but possibly to the middle of an object.
 	// Consult the span table to find the block beginning.
-	// TODO(rsc): Factor this out.
 	k := p >> _PageShift
-	x := k
-	x -= mheap_.arena_start >> _PageShift
-	s = h_spans[x]
+	s = h_spans[idx]
 	if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
 		if s == nil || s.state == _MSpanStack {
 			// If s is nil, the virtual address has never been part of the heap.
@@ -188,23 +187,23 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
 			printunlock()
 			throw("objectstart: bad pointer in unexpected span")
 		}
-		return
 	}
-	base = s.base()
-	if p-base >= s.elemsize {
-		// n := (p - base) / s.elemsize, using division by multiplication
-		n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
-
-		const debugMagic = false
-		if debugMagic {
-			n2 := (p - base) / s.elemsize
-			if n != n2 {
-				println("runtime: bad div magic", (p - base), s.elemsize, s.divShift, s.divMul, s.divShift2)
-				throw("bad div magic")
-			}
+	// If this span holds object of a power of 2 size, just mask off the bits to
+	// the interior of the object. Otherwise use the size to get the base.
+	if s.baseMask != 0 {
+		// optimize for power of 2 sized objects.
+		base = s.base()
+		base = base + (p-base)&s.baseMask
+		// base = p & s.baseMask is faster for small spans,
+		// but doesn't work for large spans.
+		// Overall, it's faster to use the more general computation above.
+	} else {
+		base = s.base()
+		if p-base >= s.elemsize {
+			// n := (p - base) / s.elemsize, using division by multiplication
+			n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
+			base += n * s.elemsize
 		}
-
-		base += n * s.elemsize
 	}
 	// Now that we know the actual base, compute heapBits to return to caller.
 	hbits = heapBitsForAddr(base)
author	Rick Hudson <rlh@golang.org>	2015-04-15 17:08:58 -0400
committer	Rick Hudson <rlh@golang.org>	2015-04-20 21:39:06 +0000
commit	899a4ad47e452ede041fdb99204575a407dd94f2 (patch)
tree	298cb71b818c4674b0aff88727012f3efbc9ffa6 /src/runtime/mbitmap.go
parent	e7ffafdb6e76f62382d47c1bd21626ec7dae4594 (diff)
download	go-899a4ad47e452ede041fdb99204575a407dd94f2.tar.xz