From 6d8a147bef8ee28eb647db21ea91ecb823fa2480 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 28 Apr 2015 00:28:47 -0400 Subject: runtime: use 1-bit pointer bitmaps in type representation The type information in reflect.Type and the GC programs is now 1 bit per word, down from 2 bits. The in-memory unrolled type bitmap representation are now 1 bit per word, down from 4 bits. The conversion from the unrolled (now 1-bit) bitmap to the heap bitmap (still 4-bit) is not optimized. A followup CL will work on that, after the heap bitmap has been converted to 2-bit. The typeDead optimization, in which a special value denotes that there are no more pointers anywhere in the object, is lost in this CL. A followup CL will bring it back in the final form of heapBitsSetType. Change-Id: If61e67950c16a293b0b516a6fd9a1c755b6d5549 Reviewed-on: https://go-review.googlesource.com/9702 Reviewed-by: Austin Clements --- src/runtime/export_test.go | 9 +-- src/runtime/gcinfo_test.go | 14 ++--- src/runtime/mbarrier.go | 86 ++++++++++++++----------- src/runtime/mbitmap.go | 154 +++++++++++++++++++-------------------------- 4 files changed, 122 insertions(+), 141 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 1efe24c61a..817622abd0 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -76,15 +76,8 @@ func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) { } func GCMask(x interface{}) (ret []byte) { - e := (*eface)(unsafe.Pointer(&x)) - s := (*slice)(unsafe.Pointer(&ret)) systemstack(func() { - var len uintptr - var a *byte - getgcmask(e.data, e._type, &a, &len) - s.array = unsafe.Pointer(a) - s.len = int(len) - s.cap = s.len + ret = getgcmask(x) }) return } diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 66b0353f08..b4ab9134aa 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -28,13 +28,13 @@ func TestGCInfo(t *testing.T) { verifyGCInfo(t, "data eface", &dataEface, infoEface) verifyGCInfo(t, "data iface", &dataIface, infoIface) - verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr) - verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar) - verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct()) - verifyGCInfo(t, "stack string", new(string), infoString) - verifyGCInfo(t, "stack slice", new([]string), infoSlice) - verifyGCInfo(t, "stack eface", new(interface{}), infoEface) - verifyGCInfo(t, "stack iface", new(Iface), infoIface) + verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), nonStackInfo(infoScalarPtr)) + verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), nonStackInfo(infoPtrScalar)) + verifyGCInfo(t, "stack BigStruct", new(BigStruct), nonStackInfo(infoBigStruct())) + verifyGCInfo(t, "stack string", new(string), nonStackInfo(infoString)) + verifyGCInfo(t, "stack slice", new([]string), nonStackInfo(infoSlice)) + verifyGCInfo(t, "stack eface", new(interface{}), nonStackInfo(infoEface)) + verifyGCInfo(t, "stack iface", new(Iface), nonStackInfo(infoIface)) for i := 0; i < 10; i++ { verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr) diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index eb5881707b..4162483ade 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -223,29 +223,25 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) { } systemstack(func() { - mask := typeBitmapInHeapBitmapFormat(typ) + dst := dst // make local copies + src := src nptr := typ.size / ptrSize - for i := uintptr(0); i < nptr; i += 2 { - bits := mask[i/2] - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) - } - // TODO(rsc): The noescape calls should be unnecessary. - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) - if i+1 == nptr { - break - } - bits >>= 4 - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) + i := uintptr(0) + Copy: + for _, bits := range ptrBitmapForType(typ) { + for j := 0; j < 8; j++ { + if bits&1 != 0 { + writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) + } else { + *(*uintptr)(dst) = *(*uintptr)(src) + } + if i++; i >= nptr { + break Copy + } + dst = add(dst, ptrSize) + src = add(src, ptrSize) + bits >>= 1 } - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) } }) } @@ -274,18 +270,25 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size off += frag } - mask := typeBitmapInHeapBitmapFormat(typ) + mask := ptrBitmapForType(typ) nptr := (off + size) / ptrSize - for i := uintptr(off / ptrSize); i < nptr; i++ { - bits := mask[i/2] >> ((i & 1) << 2) - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) + i := uintptr(off / ptrSize) +Copy: + for { + bits := mask[i/8] >> (i % 8) + for j := i % 8; j < 8; j++ { + if bits&1 != 0 { + writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) + } else { + *(*uintptr)(dst) = *(*uintptr)(src) + } + if i++; i >= nptr { + break Copy + } + dst = add(dst, ptrSize) + src = add(src, ptrSize) + bits >>= 1 } - // TODO(rsc): The noescape calls should be unnecessary. - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) } size &= ptrSize - 1 if size > 0 { @@ -307,18 +310,25 @@ func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uin } systemstack(func() { - mask := typeBitmapInHeapBitmapFormat(typ) + mask := ptrBitmapForType(typ) // retoffset is known to be pointer-aligned (at least). // TODO(rsc): The noescape call should be unnecessary. dst := add(noescape(frame), retoffset) nptr := framesize / ptrSize - for i := uintptr(retoffset / ptrSize); i < nptr; i++ { - bits := mask[i/2] >> ((i & 1) << 2) - if (bits>>2)&typeMask == typePointer { - writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst)) + i := uintptr(retoffset / ptrSize) + Copy: + for { + bits := mask[i/8] >> (i % 8) + for j := i % 8; j < 8; j++ { + if bits&1 != 0 { + writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst)) + } + if i++; i >= nptr { + break Copy + } + dst = add(dst, ptrSize) + bits >>= 1 } - // TODO(rsc): The noescape call should be unnecessary. - dst = add(noescape(dst), ptrSize) } }) } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index f0c7520e38..cfdd259371 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -446,25 +446,23 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // and storing type info in the GC bitmap. h := heapBitsForAddr(x) - var ti, te uintptr var ptrmask *uint8 if size == ptrSize { // It's one word and it has pointers, it must be a pointer. // The bitmap byte is shared with the one-word object // next to it, and concurrent GC might be marking that // object, so we must use an atomic update. + // TODO(rsc): It may make sense to set all the pointer bits + // when initializing the span, and then the atomicor8 here + // goes away - heapBitsSetType would be a no-op + // in that case. atomicor8(h.bitp, typePointer<<(typeShift+h.shift)) return } if typ.kind&kindGCProg != 0 { nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize - masksize := nptr - if masksize%2 != 0 { - masksize *= 2 // repeated - } - const typeBitsPerByte = 8 / typeBitsWidth - masksize = masksize * typeBitsPerByte / 8 // 4 bits per word - masksize++ // unroll flag in the beginning + masksize := (nptr + 7) / 8 + masksize++ // unroll flag in the beginning if masksize > maxGCMask && typ.gc[1] != 0 { // write barriers have not been updated to deal with this case yet. throw("maxGCMask too small for now") @@ -490,64 +488,55 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } else { ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask } - if size == 2*ptrSize { - // h.shift is 0 for all sizes > ptrSize. - *h.bitp = *ptrmask - return - } - te = uintptr(typ.size) / ptrSize - // If the type occupies odd number of words, its mask is repeated. - if te%2 == 0 { - te /= 2 - } - // Copy pointer bitmask into the bitmap. - // TODO(rlh): add comment addressing the following concerns: - // If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned? - // And if type occupies and odd number of words, why are we only going through half - // of ptrmask and why don't we have to shift everything by 4 on odd iterations? - - for i := uintptr(0); i < dataSize; i += 2 * ptrSize { - v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti)) - ti++ - if ti == te { - ti = 0 - } - if i+ptrSize == dataSize { - v &^= typeMask << (4 + typeShift) + + // Copy from 1-bit ptrmask into 4-bit bitmap. + elemSize := typ.size + var v uint32 // pending byte of 4-bit bitmap; uint32 for better code gen + nv := 0 // number of bits added to v + for i := uintptr(0); i < dataSize; i += elemSize { + // At each word, b holds the pending bits from the 1-bit bitmap, + // with a sentinel 1 bit above all the actual bits. + // When b == 1, that means it is out of bits and needs to be refreshed. + // *(p+1) is the next byte to read. + p := ptrmask + b := uint32(*p) | 0x100 + for j := uintptr(0); j < elemSize; j += ptrSize { + if b == 1 { + p = addb(p, 1) + b = uint32(*p) | 0x100 + } + // b&1 is 1 for pointer, 0 for scalar. + // We want typePointer (2) or typeScalar (1), so add 1. + v |= ((b & 1) + 1) << (uint(nv) + typeShift) + b >>= 1 + if nv += heapBitsWidth; nv == 8 { + *h.bitp = uint8(v) + h.bitp = subtractb(h.bitp, 1) + v = 0 + nv = 0 + } } + } - *h.bitp = v + // Finish final byte of bitmap and mark next word (if any) with typeDead (0) + if nv != 0 { + *h.bitp = uint8(v) h.bitp = subtractb(h.bitp, 1) - } - if dataSize%(2*ptrSize) == 0 && dataSize < size { - // Mark the word after last object's word as typeDead. + } else if dataSize < size { *h.bitp = 0 } } -// typeBitmapInHeapBitmapFormat returns a bitmap holding -// the type bits for the type typ, but expanded into heap bitmap format -// to make it easier to copy them into the heap bitmap. -// TODO(rsc): Change clients to use the type bitmap format instead, -// which can be stored more densely (especially if we drop to 1 bit per pointer). -// -// To make it easier to replicate the bits when filling out the heap -// bitmap for an array of typ, if typ holds an odd number of words -// (meaning the heap bitmap would stop halfway through a byte), -// typeBitmapInHeapBitmapFormat returns the bitmap for two instances -// of typ in a row. -// TODO(rsc): Remove doubling. -func typeBitmapInHeapBitmapFormat(typ *_type) []uint8 { +// ptrBitmapForType returns a bitmap indicating where pointers are +// in the memory representation of the type typ. +// The bit x[i/8]&(1<<(i%8)) is 1 if the i'th word in a value of type typ +// is a pointer. +func ptrBitmapForType(typ *_type) []uint8 { var ptrmask *uint8 nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize if typ.kind&kindGCProg != 0 { - masksize := nptr - if masksize%2 != 0 { - masksize *= 2 // repeated - } - const typeBitsPerByte = 8 / typeBitsWidth - masksize = masksize * typeBitsPerByte / 8 // 4 bits per word - masksize++ // unroll flag in the beginning + masksize := (nptr + 7) / 8 + masksize++ // unroll flag in the beginning if masksize > maxGCMask && typ.gc[1] != 0 { // write barriers have not been updated to deal with this case yet. throw("maxGCMask too small for now") @@ -565,7 +554,7 @@ func typeBitmapInHeapBitmapFormat(typ *_type) []uint8 { } else { ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask } - return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2] + return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+7)/8] } // GC type info programs @@ -625,10 +614,7 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) prog = addb(prog, 1) p := (*[1 << 30]byte)(unsafe.Pointer(prog)) for i := 0; i < siz; i++ { - const typeBitsPerByte = 8 / typeBitsWidth - v := p[i/typeBitsPerByte] - v >>= (uint(i) % typeBitsPerByte) * typeBitsWidth - v &= typeMask + v := p[i/8] >> (uint(i) % 8) & 1 if inplace { // Store directly into GC bitmap. h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos]))) @@ -639,18 +625,18 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) } pos += ptrSize } else if sparse { + throw("sparse") // 4-bits per word, type bits in high bits v <<= (pos % 8) + typeShift mask[pos/8] |= v pos += heapBitsWidth } else { // 1 bit per word, for data/bss bitmap - v >>= 1 // convert typePointer to 1, others to 0 mask[pos/8] |= v << (pos % 8) pos++ } } - prog = addb(prog, round(uintptr(siz)*typeBitsWidth, 8)/8) + prog = addb(prog, (uintptr(siz)+7)/8) case insArray: prog = (*byte)(add(unsafe.Pointer(prog), 1)) @@ -675,7 +661,7 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) } } -// Unrolls GC program prog for data/bss, returns dense GC mask. +// Unrolls GC program prog for data/bss, returns 1-bit GC mask. func unrollglobgcprog(prog *byte, size uintptr) bitvector { masksize := round(round(size, ptrSize)/ptrSize, 8) / 8 mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys)) @@ -721,16 +707,10 @@ func unrollgcprog_m(typ *_type) { if *mask == 0 { pos := uintptr(8) // skip the unroll flag prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) - prog = unrollgcprog1(mask, prog, &pos, false, true) + prog = unrollgcprog1(mask, prog, &pos, false, false) if *prog != insEnd { throw("unrollgcprog: program does not end with insEnd") } - if typ.size/ptrSize%2 != 0 { - // repeat the program - prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) - unrollgcprog1(mask, prog, &pos, false, true) - } - // atomic way to say mask[0] = 1 atomicor8(mask, 1) } @@ -749,21 +729,21 @@ func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { } // Returns GC type info for object p for testing. -func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { - *mask = nil - *len = 0 - - // data +func getgcmask(ep interface{}) (mask []byte) { + e := *(*eface)(unsafe.Pointer(&ep)) + p := e.data + t := e._type + // data or bss for datap := &firstmoduledata; datap != nil; datap = datap.next { + // data if datap.data <= uintptr(p) && uintptr(p) < datap.edata { n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.data) / ptrSize bits := (*addb(datap.gcdatamask.bytedata, off/8) >> (off % 8)) & 1 bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = bits } return } @@ -771,13 +751,12 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { // bss if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.bss) / ptrSize bits := (*addb(datap.gcbssmask.bytedata, off/8) >> (off % 8)) & 1 bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = bits } return } @@ -787,11 +766,10 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { var n uintptr var base uintptr if mlookup(uintptr(p), &base, &n, nil) != 0 { - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { bits := heapBitsForAddr(base + i).typeBits() - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = bits } return } @@ -821,13 +799,13 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { bv := stackmapdata(stkmap, pcdata) size := uintptr(bv.n) * ptrSize n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - frame.varp + size) / ptrSize bits := (*addb(bv.bytedata, off/8) >> (off % 8)) & 1 bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = bits } } + return } -- cgit v1.3