aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/mbitmap.go
diff options
context:
space:
mode:
authorMartin Möhrmann <moehrmann@google.com>2020-07-13 18:12:20 +0200
committerMartin Möhrmann <martisch@uos.de>2020-09-14 19:21:56 +0000
commit14c7caae5074fdf0d97a3ad995e20c63e4065cbf (patch)
tree628b26fe7af8c2be350c88c4c32e2daeb97a9a87 /src/runtime/mbitmap.go
parentc0c396bd6ad2aea40f7f302711c8b89e20feb371 (diff)
downloadgo-14c7caae5074fdf0d97a3ad995e20c63e4065cbf.tar.xz
runtime: add 24 byte allocation size class
This CL introduces a 24 byte allocation size class which fits 3 pointers on 64 bit and 6 pointers on 32 bit architectures. Notably this new size class fits a slice header on 64 bit architectures exactly while previously a 32 byte size class would have been used for allocating a slice header on the heap. The main complexity added with this CL is that heapBitsSetType needs to handle objects that aren't 16-byte aligned but contain more than a single pointer on 64-bit architectures. Due to having a non 16 byte aligned size class on 32 bit a h.shift of 2 is now possible which means a heap bitmap byte might only be partially written. Due to this already having been possible on 64 bit before the heap bitmap code only needed minor adjustments for 32 bit doublecheck code paths. Note that this CL changes the slice capacity allocated by append for slice growth to a target capacity of 17 to 24 bytes. On 64 bit architectures the capacity of the slice returned by append([]byte{}, make([]byte, 24)...)) is 32 bytes before and 24 bytes after this CL. Depending on allocation patterns of the specific Go program this can increase the number of total alloctions as subsequent appends to the slice can trigger slice growth earlier than before. On the other side if the slice is never appended to again above its capacity this will lower heap usage by 8 bytes. This CL changes the set of size classes reported in the runtime.MemStats.BySize array due to it being limited to a total of 61 size classes. The new 24 byte size class is now included and the 20480 byte size class is not included anymore. Fixes #8885 name old time/op new time/op delta Template 196ms ± 3% 194ms ± 2% ~ (p=0.247 n=10+10) Unicode 85.6ms ±16% 88.1ms ± 1% ~ (p=0.165 n=10+10) GoTypes 673ms ± 2% 668ms ± 2% ~ (p=0.258 n=9+9) Compiler 3.14s ± 6% 3.08s ± 1% ~ (p=0.243 n=10+9) SSA 6.82s ± 1% 6.76s ± 1% -0.87% (p=0.006 n=9+10) Flate 128ms ± 7% 127ms ± 3% ~ (p=0.739 n=10+10) GoParser 154ms ± 3% 153ms ± 4% ~ (p=0.730 n=9+9) Reflect 404ms ± 1% 412ms ± 4% +1.99% (p=0.022 n=9+10) Tar 172ms ± 4% 170ms ± 4% ~ (p=0.065 n=10+9) XML 231ms ± 4% 230ms ± 3% ~ (p=0.912 n=10+10) LinkCompiler 341ms ± 1% 339ms ± 1% ~ (p=0.243 n=9+10) ExternalLinkCompiler 1.72s ± 1% 1.72s ± 1% ~ (p=0.661 n=9+10) LinkWithoutDebugCompiler 221ms ± 2% 221ms ± 2% ~ (p=0.529 n=10+10) StdCmd 18.4s ± 3% 18.2s ± 1% ~ (p=0.515 n=10+8) name old user-time/op new user-time/op delta Template 238ms ± 4% 243ms ± 6% ~ (p=0.661 n=9+10) Unicode 116ms ± 6% 113ms ± 3% -3.37% (p=0.035 n=9+10) GoTypes 854ms ± 2% 848ms ± 2% ~ (p=0.604 n=9+10) Compiler 4.10s ± 1% 4.11s ± 1% ~ (p=0.481 n=8+9) SSA 9.49s ± 1% 9.41s ± 1% -0.92% (p=0.001 n=9+10) Flate 149ms ± 6% 151ms ± 7% ~ (p=0.481 n=10+10) GoParser 189ms ± 2% 190ms ± 2% ~ (p=0.497 n=9+10) Reflect 511ms ± 2% 508ms ± 2% ~ (p=0.211 n=9+10) Tar 215ms ± 4% 212ms ± 3% ~ (p=0.105 n=10+10) XML 288ms ± 2% 288ms ± 2% ~ (p=0.971 n=10+10) LinkCompiler 559ms ± 4% 557ms ± 1% ~ (p=0.968 n=9+10) ExternalLinkCompiler 1.78s ± 1% 1.77s ± 1% ~ (p=0.055 n=8+10) LinkWithoutDebugCompiler 245ms ± 3% 245ms ± 2% ~ (p=0.684 n=10+10) name old alloc/op new alloc/op delta Template 34.8MB ± 0% 34.4MB ± 0% -0.95% (p=0.000 n=9+10) Unicode 28.6MB ± 0% 28.3MB ± 0% -0.95% (p=0.000 n=10+10) GoTypes 115MB ± 0% 114MB ± 0% -1.02% (p=0.000 n=10+9) Compiler 554MB ± 0% 549MB ± 0% -0.86% (p=0.000 n=9+10) SSA 1.28GB ± 0% 1.27GB ± 0% -0.83% (p=0.000 n=10+10) Flate 21.8MB ± 0% 21.6MB ± 0% -0.87% (p=0.000 n=8+10) GoParser 26.7MB ± 0% 26.4MB ± 0% -0.97% (p=0.000 n=10+9) Reflect 75.0MB ± 0% 74.1MB ± 0% -1.18% (p=0.000 n=10+10) Tar 32.6MB ± 0% 32.3MB ± 0% -0.94% (p=0.000 n=10+7) XML 41.5MB ± 0% 41.2MB ± 0% -0.90% (p=0.000 n=10+8) LinkCompiler 105MB ± 0% 104MB ± 0% -0.94% (p=0.000 n=10+10) ExternalLinkCompiler 153MB ± 0% 152MB ± 0% -0.69% (p=0.000 n=10+10) LinkWithoutDebugCompiler 63.7MB ± 0% 63.6MB ± 0% -0.13% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 336k ± 0% 336k ± 0% +0.02% (p=0.002 n=10+10) Unicode 332k ± 0% 332k ± 0% ~ (p=0.447 n=10+10) GoTypes 1.16M ± 0% 1.16M ± 0% +0.01% (p=0.001 n=10+10) Compiler 4.92M ± 0% 4.92M ± 0% +0.01% (p=0.000 n=10+10) SSA 11.9M ± 0% 11.9M ± 0% +0.02% (p=0.000 n=9+10) Flate 214k ± 0% 214k ± 0% +0.02% (p=0.032 n=10+8) GoParser 270k ± 0% 270k ± 0% +0.02% (p=0.004 n=10+9) Reflect 877k ± 0% 877k ± 0% +0.01% (p=0.000 n=10+10) Tar 313k ± 0% 313k ± 0% ~ (p=0.075 n=9+10) XML 387k ± 0% 387k ± 0% +0.02% (p=0.007 n=10+10) LinkCompiler 455k ± 0% 456k ± 0% +0.08% (p=0.000 n=10+9) ExternalLinkCompiler 670k ± 0% 671k ± 0% +0.06% (p=0.000 n=10+10) LinkWithoutDebugCompiler 113k ± 0% 113k ± 0% ~ (p=0.149 n=10+10) name old maxRSS/op new maxRSS/op delta Template 34.1M ± 1% 34.1M ± 1% ~ (p=0.853 n=10+10) Unicode 35.1M ± 1% 34.6M ± 1% -1.43% (p=0.000 n=10+10) GoTypes 72.8M ± 3% 73.3M ± 2% ~ (p=0.724 n=10+10) Compiler 288M ± 3% 295M ± 4% ~ (p=0.393 n=10+10) SSA 630M ± 1% 622M ± 1% -1.18% (p=0.001 n=10+10) Flate 26.0M ± 1% 26.2M ± 2% ~ (p=0.493 n=10+10) GoParser 28.6M ± 1% 28.5M ± 2% ~ (p=0.256 n=10+10) Reflect 55.5M ± 2% 55.4M ± 1% ~ (p=0.436 n=10+10) Tar 33.0M ± 1% 32.8M ± 2% ~ (p=0.075 n=10+10) XML 38.7M ± 1% 39.0M ± 1% ~ (p=0.053 n=9+10) LinkCompiler 164M ± 1% 164M ± 1% -0.27% (p=0.029 n=10+10) ExternalLinkCompiler 174M ± 0% 173M ± 0% -0.33% (p=0.002 n=9+10) LinkWithoutDebugCompiler 137M ± 0% 136M ± 2% ~ (p=0.825 n=9+10) Change-Id: I9ecf2a10024513abef8fbfbe519e44e0b29b6167 Reviewed-on: https://go-review.googlesource.com/c/go/+/242258 Trust: Martin Möhrmann <moehrmann@google.com> Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Martin Möhrmann <martisch@uos.de> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/runtime/mbitmap.go')
-rw-r--r--src/runtime/mbitmap.go112
1 files changed, 96 insertions, 16 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 8de44c14b9..51c3625c3d 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -30,10 +30,9 @@
// indicates scanning can ignore the rest of the allocation.
//
// The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
-// bits.
-// This form allows a copy from the 1-bit to the 4-bit form to keep the
-// pointer bits contiguous, instead of having to space them out.
+// of the byte contains 4 high (scan) bits and the bottom half contains 4 low
+// (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to
+// keep the pointer bits contiguous, instead of having to space them out.
//
// The code makes use of the fact that the zero value for a heap
// bitmap means scalar/dead. This property must be preserved when
@@ -816,6 +815,12 @@ func (s *mspan) countAlloc() int {
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
const doubleCheck = false // slow but helpful; enable to test modifications to this code
+ const (
+ mask1 = bitPointer | bitScan // 00010001
+ mask2 = bitPointer | bitScan | mask1<<heapBitsShift // 00110011
+ mask3 = bitPointer | bitScan | mask2<<heapBitsShift // 01110111
+ )
+
// dataSize is always size rounded up to the next malloc size class,
// except in the case of allocating a defer block, in which case
// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
@@ -844,11 +849,12 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
h := heapBitsForAddr(x)
ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
- // Heap bitmap bits for 2-word object are only 4 bits,
- // so also shared with objects next to it.
- // This is called out as a special case primarily for 32-bit systems,
- // so that on 32-bit systems the code below can assume all objects
- // are 4-word aligned (because they're all 16-byte aligned).
+ // 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.
+ // Therefore, these objects share a heap bitmap byte with the objects next to them.
+ // These are called out as a special case primarily so the code below can assume all
+ // objects are at least 4 words long and that their bitmaps start either at the beginning
+ // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
+
if size == 2*sys.PtrSize {
if typ.size == sys.PtrSize {
// We're allocating a block big enough to hold two pointers.
@@ -865,7 +871,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
*h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
*h.bitp |= (bitPointer | bitScan) << h.shift
} else {
- // 2-element slice of pointer.
+ // 2-element array of pointer.
*h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
}
return
@@ -886,6 +892,70 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
*h.bitp |= uint8(hb << h.shift)
return
+ } else if size == 3*sys.PtrSize {
+ b := uint8(*ptrmask)
+ if doubleCheck {
+ if b == 0 {
+ println("runtime: invalid type ", typ.string())
+ throw("heapBitsSetType: called with non-pointer type")
+ }
+ if sys.PtrSize != 8 {
+ throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
+ }
+ if typ.kind&kindGCProg != 0 {
+ throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
+ }
+ if typ.size == 2*sys.PtrSize {
+ print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n")
+ throw("heapBitsSetType: inconsistent object sizes")
+ }
+ }
+ if typ.size == sys.PtrSize {
+ // The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
+ // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
+ if doubleCheck && *typ.gcdata != 1 {
+ print("runtime: heapBitsSetType size=", size, " typ.size=", typ.size, "but *typ.gcdata", *typ.gcdata, "\n")
+ throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")
+ }
+ // 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.
+ b = 7
+ }
+
+ hb := b & 7
+ // Set bitScan bits for all pointers.
+ hb |= hb << wordsPerBitmapByte
+ // First bitScan bit is always set since the type contains pointers.
+ hb |= bitScan
+ // Second bitScan bit needs to also be set if the third bitScan bit is set.
+ hb |= hb & (bitScan << (2 * heapBitsShift)) >> 1
+
+ // For h.shift > 1 heap bits cross a byte boundary and need to be written part
+ // to h.bitp and part to the next h.bitp.
+ switch h.shift {
+ case 0:
+ *h.bitp &^= mask3 << 0
+ *h.bitp |= hb << 0
+ case 1:
+ *h.bitp &^= mask3 << 1
+ *h.bitp |= hb << 1
+ case 2:
+ *h.bitp &^= mask2 << 2
+ *h.bitp |= (hb & mask2) << 2
+ // Two words written to the first byte.
+ // Advance two words to get to the next byte.
+ h = h.next().next()
+ *h.bitp &^= mask1
+ *h.bitp |= (hb >> 2) & mask1
+ case 3:
+ *h.bitp &^= mask1 << 3
+ *h.bitp |= (hb & mask1) << 3
+ // One word written to the first byte.
+ // Advance one word to get to the next byte.
+ h = h.next()
+ *h.bitp &^= mask2
+ *h.bitp |= (hb >> 1) & mask2
+ }
+ return
}
// Copy from 1-bit ptrmask into 2-bit bitmap.
@@ -1079,7 +1149,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// word must be set to scan since there are pointers
// somewhere in the object.
// In all following words, we set the scan/dead
- // appropriately to indicate that the object contains
+ // appropriately to indicate that the object continues
// to the next 2-bit entry in the bitmap.
//
// We set four bits at a time here, but if the object
@@ -1095,12 +1165,22 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
b >>= 4
nb -= 4
- case sys.PtrSize == 8 && h.shift == 2:
+ case h.shift == 2:
// Ptrmask and heap bitmap are misaligned.
+ //
+ // On 32 bit architectures only the 6-word object that corresponds
+ // to a 24 bytes size class can start with h.shift of 2 here since
+ // all other non 16 byte aligned size classes have been handled by
+ // special code paths at the beginning of heapBitsSetType on 32 bit.
+ //
+ // Many size classes are only 16 byte aligned. On 64 bit architectures
+ // this results in a heap bitmap position starting with a h.shift of 2.
+ //
// The bits for the first two words are in a byte shared
// with another object, so we must be careful with the bits
// already there.
- // We took care of 1-word and 2-word objects above,
+ //
+ // We took care of 1-word, 2-word, and 3-word objects above,
// so this is at least a 6-word object.
hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
hb |= bitScan << (2 * heapBitsShift)
@@ -1113,7 +1193,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
*hbitp |= uint8(hb)
hbitp = add1(hbitp)
if w += 2; w >= nw {
- // We know that there is more data, because we handled 2-word objects above.
+ // We know that there is more data, because we handled 2-word and 3-word objects above.
// This must be at least a 6-word object. If we're out of pointer words,
// mark no scan in next bitmap byte and finish.
hb = 0
@@ -1248,12 +1328,12 @@ Phase4:
// Handle the first byte specially if it's shared. See
// Phase 1 for why this is the only special case we need.
if doubleCheck {
- if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) {
+ if !(h.shift == 0 || h.shift == 2) {
print("x=", x, " size=", size, " cnw=", h.shift, "\n")
throw("bad start shift")
}
}
- if sys.PtrSize == 8 && h.shift == 2 {
+ if h.shift == 2 {
*h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
h = h.next().next()
cnw -= 2