diff options
| author | Keith Randall <khr@golang.org> | 2022-05-06 18:40:17 -0700 |
|---|---|---|
| committer | Keith Randall <khr@google.com> | 2022-08-08 16:57:48 +0000 |
| commit | c3833a55433f4b2981253f64444fe5c3d1bc910a (patch) | |
| tree | 3b2b04ae9c99f415f5626877acb50e14980fc618 /src/runtime | |
| parent | b589208c8cc6e08239868f47e12c1449cd797bac (diff) | |
| download | go-c3833a55433f4b2981253f64444fe5c3d1bc910a.tar.xz | |
runtime: process ptr bitmaps one word at a time
Read the bitmaps one uintptr at a time instead of one byte at a time.
Performance so far:
Allocation heavy, no retention: ~30% faster in heapBitsSetType
Scan heavy, ~no allocation: ~even in scanobject
Change-Id: I40d492b50d7f89d1b4261c2de58f6d255fa5e93e
Reviewed-on: https://go-review.googlesource.com/c/go/+/407036
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/mbitmap.go | 83 |
1 files changed, 71 insertions, 12 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 1c7ae8a68e..d454949926 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -893,6 +893,19 @@ func (h writeHeapBits) flush(addr, size uintptr) { } } +// Read the bytes starting at the aligned pointer p into a uintptr. +// Read is little-endian. +func readUintptr(p *byte) uintptr { + x := *(*uintptr)(unsafe.Pointer(p)) + if goarch.BigEndian { + if goarch.PtrSize == 8 { + return uintptr(sys.Bswap64(uint64(x))) + } + return uintptr(sys.Bswap32(uint32(x))) + } + return x +} + // heapBitsSetType records that the new allocation [x, x+size) // holds in [x, x+dataSize) one or more values of type typ. // (The number of values is given by dataSize / typ.size.) @@ -917,7 +930,7 @@ func (h writeHeapBits) flush(addr, size uintptr) { // machines, callers must execute a store/store (publication) barrier // between calling this function and making the object reachable. func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { - const doubleCheck = true // slow but helpful; enable to test modifications to this code + const doubleCheck = false // slow but helpful; enable to test modifications to this code if doubleCheck && dataSize%typ.size != 0 { throw("heapBitsSetType: dataSize not a multiple of typ.size") @@ -995,19 +1008,65 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // objects with scalar tails, all but the last tail does have to // be initialized, because there is no way to say "skip forward". - for i := uintptr(0); true; i += typ.size { - p := typ.gcdata - var j uintptr - for j = 0; j+8*goarch.PtrSize < typ.ptrdata; j += 8 * goarch.PtrSize { - h = h.write(uintptr(*p), 8) - p = add1(p) + ptrs := typ.ptrdata / goarch.PtrSize + if typ.size == dataSize { // Single element + if ptrs <= ptrBits { // Single small element + m := readUintptr(typ.gcdata) + h = h.write(m, ptrs) + } else { // Single large element + p := typ.gcdata + for { + h = h.write(readUintptr(p), ptrBits) + p = addb(p, ptrBits/8) + ptrs -= ptrBits + if ptrs <= ptrBits { + break + } + } + m := readUintptr(p) + h = h.write(m, ptrs) } - h = h.write(uintptr(*p), (typ.ptrdata-j)/goarch.PtrSize) - if i+typ.size == dataSize { - break // don't need the trailing nonptr bits on the last element. + } else { // Repeated element + words := typ.size / goarch.PtrSize // total words, including scalar tail + if words <= ptrBits { // Repeated small element + n := dataSize / typ.size + m := readUintptr(typ.gcdata) + // Make larger unit to repeat + for words <= ptrBits/2 { + if n&1 != 0 { + h = h.write(m, words) + } + n /= 2 + m |= m << words + ptrs += words + words *= 2 + if n == 1 { + break + } + } + for n > 1 { + h = h.write(m, words) + n-- + } + h = h.write(m, ptrs) + } else { // Repeated large element + for i := uintptr(0); true; i += typ.size { + p := typ.gcdata + j := ptrs + for j > ptrBits { + h = h.write(readUintptr(p), ptrBits) + p = addb(p, ptrBits/8) + j -= ptrBits + } + m := readUintptr(p) + h = h.write(m, j) + if i+typ.size == dataSize { + break // don't need the trailing nonptr bits on the last element. + } + // Pad with zeros to the start of the next element. + h = h.pad(typ.size - typ.ptrdata) + } } - // Pad with zeros to the start of the next element. - h = h.pad(typ.size - typ.ptrdata) } h.flush(x, size) |
