diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/runtime/heapdump.go | 10 | ||||
| -rw-r--r-- | src/runtime/malloc.go | 36 | ||||
| -rw-r--r-- | src/runtime/mbitmap.go | 306 | ||||
| -rw-r--r-- | src/runtime/mcache.go | 13 | ||||
| -rw-r--r-- | src/runtime/mcentral.go | 35 | ||||
| -rw-r--r-- | src/runtime/mgcmark.go | 10 | ||||
| -rw-r--r-- | src/runtime/mgcsweep.go | 72 | ||||
| -rw-r--r-- | src/runtime/mheap.go | 16 | ||||
| -rw-r--r-- | src/runtime/stack.go | 1 |
9 files changed, 281 insertions, 218 deletions
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index e6a41f7f97..96dd6ff867 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -472,9 +472,13 @@ func dumpobjs() { if n > uintptr(len(freemark)) { throw("freemark array doesn't have enough entries") } - for l := s.freelist; l.ptr() != nil; l = l.ptr().next { - freemark[(uintptr(l)-p)/size] = true + + for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ { + if s.isFree(freeIndex) { + freemark[freeIndex] = true + } } + for j := uintptr(0); j < n; j, p = j+1, p+size { if freemark[j] { freemark[j] = false @@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i >= 2 && !hbits.isMarked() { + if i >= 2 && !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 528a5b73ba..e635682cae 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -502,23 +502,34 @@ const ( // weight allocation. If it is a heavy weight allocation the caller must // determine whether a new GC cycle needs to be started or if the GC is active // whether this goroutine needs to assist the GC. -// https://golang.org/cl/5350 motivates why this routine should preform a -// prefetch. func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) { s := c.alloc[sizeclass] - v = s.freelist - if v.ptr() == nil { + shouldhelpgc = false + freeIndex := s.nextFreeIndex(s.freeindex) + + if freeIndex == s.nelems { + // The span is full. + if uintptr(s.ref) != s.nelems { + throw("s.ref != s.nelems && freeIndex == s.nelems") + } systemstack(func() { c.refill(int32(sizeclass)) }) shouldhelpgc = true s = c.alloc[sizeclass] - v = s.freelist + freeIndex = s.nextFreeIndex(s.freeindex) + } + if freeIndex >= s.nelems { + throw("freeIndex is not valid") } - s.freelist = v.ptr().next + + v = gclinkptr(freeIndex*s.elemsize + s.base()) + // Advance the freeIndex. + s.freeindex = freeIndex + 1 s.ref++ - // prefetchnta offers best performance, see change list message. - prefetchnta(uintptr(v.ptr().next)) + if uintptr(s.ref) > s.nelems { + throw("s.ref > s.nelems") + } return } @@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { v, shouldhelpgc = c.nextFree(sizeclass) x = unsafe.Pointer(v) if flags&flagNoZero == 0 { - v.ptr().next = 0 - if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 { - memclr(unsafe.Pointer(v), size) - } + memclr(unsafe.Pointer(v), size) + // TODO:(rlh) Only clear if object is not known to be zeroed. } } } else { @@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { systemstack(func() { s = largeAlloc(size, flags) }) + s.freeindex = 1 x = unsafe.Pointer(uintptr(s.start << pageShift)) size = s.elemsize } if flags&flagNoScan != 0 { - // All objects are pre-marked as noscan. Nothing to do. + heapBitsSetTypeNoScan(uintptr(x), size) } else { // If allocating a defer+arg block, now that we've picked a malloc size // large enough to hold everything, cut the "asked for" size down to diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index a78efdc034..10446fee42 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -24,7 +24,7 @@ // In each 2-bit entry, the lower bit holds the same information as in the 1-bit // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. // The meaning of the high bit depends on the position of the word being described -// in its allocated object. In the first word, the high bit is the GC ``marked'' bit. +// in its allocated object. In the first word, the high bit is unused. // In the second word, the high bit is the GC ``checkmarked'' bit (see below). // In the third and later words, the high bit indicates that the object is still // being described. In these words, if a bit pair with a high bit 0 is encountered, @@ -33,12 +33,13 @@ // in the object are uninteresting to the garbage collector. // // The 2-bit entries are split when written into the byte, so that the top half -// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. +// of the byte contains 4 high bits and the bottom half contains 4 low (pointer) +// bits. // This form allows a copy from the 1-bit to the 4-bit form to keep the // pointer bits contiguous, instead of having to space them out. // // The code makes use of the fact that the zero value for a heap bitmap -// has no live pointer bit set and is (depending on position), not marked, +// has no live pointer bit set and is (depending on position), not used, // not checkmarked, and is the dead encoding. // These properties must be preserved when modifying the encoding. // @@ -63,6 +64,7 @@ // It is still used in general, except in checkmark the type bit is repurposed // as the checkmark bit and then reinitialized (to 1) as the type bit when // finished. +// package runtime @@ -254,16 +256,20 @@ func markBitsForAddr(p uintptr) markBits { func (s *mspan) markBitsForAddr(p uintptr) markBits { byteOffset := p - s.base() - markBitIndex := byteOffset / s.elemsize // TODO if hot spot use fancy divide.... - return s.markBitsForIndex(markBitIndex) -} - -func (s *mspan) markBitsForIndex(markBitIndex uintptr) markBits { + markBitIndex := uintptr(0) + if byteOffset != 0 { + // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication + markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2) + } whichByte := markBitIndex / 8 whichBit := markBitIndex % 8 return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex} } +func (s *mspan) markBitsForBase() markBits { + return markBits{&s.gcmarkBits[0], uint8(1), 0} +} + // isMarked reports whether mark bit m is set. func (m markBits) isMarked() bool { return *m.bytep&m.mask != 0 @@ -307,6 +313,17 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// advance advances the markBits to the next object in the span. +func (m *markBits) advance() { + if m.mask == 1<<7 { + m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) + m.mask = 1 + } else { + m.mask = m.mask << 1 + } + m.index++ +} + // heapBitsForAddr returns the heapBits for the address addr. // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). // @@ -440,28 +457,13 @@ func (h heapBits) bits() uint32 { return uint32(*h.bitp) >> (h.shift & 31) } -// isMarked reports whether the heap bits have the marked bit set. -// h must describe the initial word of the object. -func (h heapBits) isMarked() bool { +// morePointers returns true if this word and all remaining words in this object +// are scalars. +// h must not describe the first or second word of the object. +func (h heapBits) morePointers() bool { return *h.bitp&(bitMarked<<h.shift) != 0 } -// setMarked sets the marked bit in the heap bits, atomically. -// h must describe the initial word of the object. -func (h heapBits) setMarked() { - // Each byte of GC bitmap holds info for four words. - // Might be racing with other updates, so use atomic update always. - // We used to be clever here and use a non-atomic update in certain - // cases, but it's not worth the risk. - atomic.Or8(h.bitp, bitMarked<<h.shift) -} - -// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically. -// h must describe the initial word of the object. -func (h heapBits) setMarkedNonAtomic() { - *h.bitp |= bitMarked << h.shift -} - // isPointer reports whether the heap bits describe a pointer word. // h must describe the initial word of the object. // @@ -733,106 +735,134 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { } } -// heapBitsSweepSpan coordinates the sweeping of a span by reading -// and updating the corresponding heap bitmap entries. -// For each free object in the span, heapBitsSweepSpan sets the type -// bits for the first four words (less for smaller objects) to scalar/dead -// and then calls f(p), where p is the object's base address. -// f is expected to add the object to a free list. -// For non-free objects, heapBitsSweepSpan turns off the marked bit. -func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { +// heapBitsSweepSpan coordinates the sweeping of a span and inspects +// each freed object. If objects are being traced or if msan is enabled +// then heapBitsSweepSpan calls f(p), where p is the object's base address. +// When not tracing and msan is not enabled heapBitsSweepSpan is lightweight. +// heapBitsSweepSpan never alters the pointer/scalar heapBit maps. HeapBit map +// maintenance is the responsibility of the allocation routines. +// TODO:(rlh) Deal with the checkmark bits but moving them +// out of heap bitmap thus enabling bulk clearing. +func heapBitsSweepSpan(s *mspan, f func(uintptr)) (nfree int) { + base := s.base() + size := s.elemsize + n := s.nelems + cl := s.sizeclass + doCall := debug.allocfreetrace != 0 || msanenabled || cl == 0 + h := heapBitsForSpan(base) switch { default: throw("heapBitsSweepSpan") case sys.PtrSize == 8 && size == sys.PtrSize: - // Consider mark bits in all four 2-bit entries of each bitmap byte. - bitp := h.bitp - for i := uintptr(0); i < n; i += 4 { - x := uint32(*bitp) - // Note that unlike the other size cases, we leave the pointer bits set here. - // These are initialized during initSpan when the span is created and left - // in place the whole time the span is used for pointer-sized objects. - // That lets heapBitsSetType avoid an atomic update to set the pointer bit - // during allocation. - if x&bitMarked != 0 { - x &^= bitMarked - } else { + nfree = heapBitsSweep8BitPtrs(h, s, base, n, cl, doCall, f) + case size%(4*sys.PtrSize) == 0: + nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f) + case size%(4*sys.PtrSize) == 2*sys.PtrSize: + nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f) + } + return +} + +func heapBitsSweep8BitPtrs(h heapBits, s *mspan, base, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) { + mbits := s.markBitsForBase() + for i := uintptr(0); i < n; i += 4 { + // Note that unlike the other size cases, we leave the pointer bits set here. + // These are initialized during initSpan when the span is created and left + // in place the whole time the span is used for pointer-sized objects. + // That lets heapBitsSetType avoid an atomic update to set the pointer bit + // during allocation. + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + i*sys.PtrSize) } - if x&(bitMarked<<heapBitsShift) != 0 { - x &^= bitMarked << heapBitsShift - } else { + if cl != 0 { + nfree++ + } + } + mbits.advance() + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+1)*sys.PtrSize) } - if x&(bitMarked<<(2*heapBitsShift)) != 0 { - x &^= bitMarked << (2 * heapBitsShift) - } else { + if cl != 0 { + nfree++ + } + } + mbits.advance() + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+2)*sys.PtrSize) } - if x&(bitMarked<<(3*heapBitsShift)) != 0 { - x &^= bitMarked << (3 * heapBitsShift) - } else { + if cl != 0 { + nfree++ + } + } + mbits.advance() + if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) { + if doCall { f(base + (i+3)*sys.PtrSize) } - *bitp = uint8(x) - bitp = subtract1(bitp) + if cl != 0 { + nfree++ + } } + mbits.advance() + } + return +} - case size%(4*sys.PtrSize) == 0: - // Mark bit is in first word of each object. - // Each object starts at bit 0 of a heap bitmap byte. - bitp := h.bitp - step := size / heapBitmapScale - for i := uintptr(0); i < n; i++ { - x := uint32(*bitp) - if x&bitMarked != 0 { - x &^= bitMarked - } else { - x = 0 - f(base + i*size) +func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool { + mByte := *m.bytep + for { + for mByte == 0xff { + if m.index >= maxIndex { + return false } - *bitp = uint8(x) - bitp = subtractb(bitp, step) + m.index = (m.index + 8) &^ (8 - 1) + m.mask = 1 + m.bytep = add1(m.bytep) + mByte = *m.bytep } - - case size%(4*sys.PtrSize) == 2*sys.PtrSize: - // Mark bit is in first word of each object, - // but every other object starts halfway through a heap bitmap byte. - // Unroll loop 2x to handle alternating shift count and step size. - bitp := h.bitp - step := size / heapBitmapScale - var i uintptr - for i = uintptr(0); i < n; i += 2 { - x := uint32(*bitp) - if x&bitMarked != 0 { - x &^= bitMarked - } else { - x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift - f(base + i*size) - if size > 2*sys.PtrSize { - x = 0 + if m.index >= maxIndex { + return false + } + for m.index < maxIndex { + if m.mask&mByte == 0 { + if m.index < s.freeindex { + return true + } + if s.allocBits[m.index/8]&m.mask != 0 { + return true } } - *bitp = uint8(x) - if i+1 >= n { + if m.mask == 1<<7 { + m.mask = 1 + m.bytep = add1(m.bytep) + mByte = *m.bytep + m.index++ break - } - bitp = subtractb(bitp, step) - x = uint32(*bitp) - if x&(bitMarked<<(2*heapBitsShift)) != 0 { - x &^= bitMarked << (2 * heapBitsShift) } else { - x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) - f(base + (i+1)*size) - if size > 2*sys.PtrSize { - *subtract1(bitp) = 0 - } + m.mask = m.mask << 1 + m.index++ } - *bitp = uint8(x) - bitp = subtractb(bitp, step+1) } } + return false +} + +func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) { + twobits := s.markBitsForBase() + for twobits.nextFreed(n, s) { + if doCall { + f(base + twobits.index*size) + } + if cl != 0 { + nfree++ + } + twobits.advance() + } + return } // heapBitsSetType records that the new allocation [x, x+size) @@ -862,7 +892,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // size is sizeof(_defer{}) (at least 6 words) and dataSize may be // arbitrarily larger. // - // The checks for size == ptrSize and size == 2*ptrSize can therefore + // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore // assume that dataSize == size without checking it explicitly. if sys.PtrSize == 8 && size == sys.PtrSize { @@ -902,10 +932,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // (In general the number of instances of typ being allocated is // dataSize/typ.size.) if sys.PtrSize == 4 && dataSize == sys.PtrSize { - // 1 pointer. + // 1 pointer object. On 32-bit machines clear the bit for the + // unused second word. if gcphase == _GCoff { + *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift *h.bitp |= bitPointer << h.shift } else { + atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift)) atomic.Or8(h.bitp, bitPointer<<h.shift) } } else { @@ -918,7 +951,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } return } - // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0. + // Otherwise typ.size must be 2*sys.PtrSize, + // and typ.kind&kindGCProg == 0. if doubleCheck { if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") @@ -928,8 +962,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { b := uint32(*ptrmask) hb := b & 3 if gcphase == _GCoff { + // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1. + // 110011 is shifted h.shift and complemented. + // This clears out the bits that are about to be + // ored into *h.hbitp in the next instructions. + *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift *h.bitp |= uint8(hb << h.shift) } else { + // TODO:(rlh) since the GC is not concurrently setting the + // mark bits in the heap map anymore and malloc + // owns the span we are allocating in why does this have + // to be atomic? + + atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift)) atomic.Or8(h.bitp, uint8(hb<<h.shift)) } return @@ -1043,8 +1088,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // Replicate ptrmask to fill entire pbits uintptr. // Doubling and truncating is fewer steps than // iterating by nb each time. (nb could be 1.) - // Since we loaded typ.ptrdata/ptrSize bits - // but are pretending to have typ.size/ptrSize, + // Since we loaded typ.ptrdata/sys.PtrSize bits + // but are pretending to have typ.size/sys.PtrSize, // there might be no replication necessary/possible. pbits = b endnb = nb @@ -1135,13 +1180,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // not with its mark bit. Since there is only one allocation // from a given span at a time, we should be able to set // these bits non-atomically. Not worth the risk right now. - hb = (b & 3) << (2 * heapBitsShift) + hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) b >>= 2 nb -= 2 // Note: no bitMarker in hb because the first two words don't get markers from us. if gcphase == _GCoff { + *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) } else { + atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift))) atomic.Or8(hbitp, uint8(hb)) } hbitp = subtract1(hbitp) @@ -1331,6 +1378,41 @@ Phase4: } } +// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2 +// words set their bitPointers to off (0). +// All other objects have the first 3 bitPointers set to +// off (0) and the scan word in the third word +// also set to off (0). +func heapBitsSetTypeNoScan(x, size uintptr) { + h := heapBitsForAddr(uintptr(x)) + bitp := h.bitp + + if sys.PtrSize == 8 && size == sys.PtrSize { + // If this is truely noScan the tinyAlloc logic should have noticed + // and combined such objects. + throw("noscan object is too small") + } else if size%(4*sys.PtrSize) == 0 { + *bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift) + } else if size%(4*sys.PtrSize) == 2*sys.PtrSize { + if h.shift == 0 { + *bitp &^= (bitPointer | bitPointer<<heapBitsShift) + if size > 2*sys.PtrSize { + *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift) + } + } else if h.shift == 2 { + *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) + if size > 2*sys.PtrSize { + bitp = subtract1(bitp) + *bitp &^= bitPointer | bitMarked + } + } else { + throw("Type has unrecognized size") + } + } else { + throw("Type has unrecognized size") + } +} + var debugPtrmask struct { lock mutex data *byte @@ -1424,7 +1506,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. // size the size of the region described by prog, in bytes. -// The resulting bitvector will have no more than size/ptrSize bits. +// The resulting bitvector will have no more than size/sys.PtrSize bits. func progToPointerMask(prog *byte, size uintptr) bitvector { n := (size/sys.PtrSize + 7) / 8 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] @@ -1560,7 +1642,7 @@ Run: // into a register and use that register for the entire loop // instead of repeatedly reading from memory. // Handling fewer than 8 bits here makes the general loop simpler. - // The cutoff is ptrSize*8 - 7 to guarantee that when we add + // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add // the pattern to a bit buffer holding at most 7 bits (a partial byte) // it will not overflow. src := dst @@ -1855,7 +1937,7 @@ func getgcmask(ep interface{}) (mask []byte) { if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } - if i >= 2*sys.PtrSize && !hbits.isMarked() { + if i >= 2*sys.PtrSize && !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 2230c5c200..424fa0efac 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { _g_.m.locks++ // Return the current cached span to the central lists. s := c.alloc[sizeclass] - if s.freelist.ptr() != nil { - throw("refill on a nonempty span") + + if uintptr(s.ref) != s.nelems { + throw("refill of span with free space remaining") } + if s != &emptymspan { s.incache = false } @@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { if s == nil { throw("out of memory") } - if s.freelist.ptr() == nil { - println(s.ref, (s.npages<<_PageShift)/s.elemsize) - throw("empty span") + + if uintptr(s.ref) == s.nelems { + throw("span has no free space") } + c.alloc[sizeclass] = s _g_.m.locks-- return s diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index baca157db9..47d3ae2f81 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -18,7 +18,7 @@ import "runtime/internal/atomic" type mcentral struct { lock mutex sizeclass int32 - nonempty mSpanList // list of spans with a free object + nonempty mSpanList // list of spans with a free object, ie a nonempty free list empty mSpanList // list of spans with no free objects (or cached in an mcache) } @@ -67,7 +67,9 @@ retry: c.empty.insertBack(s) unlock(&c.lock) s.sweep(true) - if s.freelist.ptr() != nil { + freeIndex := s.nextFreeIndex(0) + if freeIndex != s.nelems { + s.freeindex = freeIndex goto havespan } lock(&c.lock) @@ -115,9 +117,6 @@ havespan: // heap_live changed. gcController.revise() } - if s.freelist.ptr() == nil { - throw("freelist empty") - } s.incache = true return s } @@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) { // the latest generation. // If preserve=true, don't return the span to heap nor relink in MCentral lists; // caller takes care of it. -func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool { +func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool { if s.incache { - throw("freespan into cached span") + throw("freeSpan given cached span") } - // Add the objects back to s's free list. - wasempty := s.freelist.ptr() == nil - end.ptr().next = s.freelist - s.freelist = start s.ref -= uint16(n) if preserve { @@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p return false } - // s is completely freed, return it to the heap. c.nonempty.remove(s) s.needzero = 1 - s.freelist = 0 unlock(&c.lock) mheap_.freeSpan(s, 0) return true } -// Fetch a new span from the heap and carve into objects for the free list. +// grow allocates a new empty span from the heap and initializes it for c's size class. func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.sizeclass]) size := uintptr(class_to_size[c.sizeclass]) @@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan { p := uintptr(s.start << _PageShift) s.limit = p + size*n - head := gclinkptr(p) - tail := gclinkptr(p) - // i==0 iteration already done - for i := uintptr(1); i < n; i++ { - p += size - tail.ptr().next = gclinkptr(p) - tail = gclinkptr(p) - } - if s.freelist.ptr() != nil { - throw("freelist not empty") - } - tail.ptr().next = 0 - s.freelist = head + heapBitsForSpan(s.base()).initSpan(s) return s } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 66d61bae1e..fe8a56460b 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork if obj&(sys.PtrSize-1) != 0 { throw("greyobject: obj not pointer-aligned") } - + mbits := span.markBitsForAddr(obj) if useCheckmark { - if !hbits.isMarked() { + if !mbits.isMarked() { printlock() print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") @@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork } } else { // If marked we have nothing to do. - if hbits.isMarked() { + if mbits.isMarked() { return } - hbits.setMarked() + mbits.setMarked() // If this is a noscan object, fast-track it to black // instead of greying it. @@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) { if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. throw("gcmarknewobject called while doing checkmark") } - heapBitsForAddr(obj).setMarked() + markBitsForAddr(obj).setMarked() atomic.Xadd64(&work.bytesMarked, int64(size)) } diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 31d1a80183..7a1a76cbad 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool { c := _g_.m.mcache freeToHeap := false - // Mark any free objects in this span so we don't collect them. - sstart := uintptr(s.start << _PageShift) - for link := s.freelist; link.ptr() != nil; link = link.ptr().next { - if uintptr(link) < sstart || s.limit <= uintptr(link) { - // Free list is corrupted. - dumpFreeList(s) - throw("free list corrupted") - } - heapBitsForAddr(uintptr(link)).setMarkedNonAtomic() - } + // The allocBits indicate which unmarked objects don't need to be + // processed since they were free at the end of the last GC cycle + // and were not allocated since then. + // If the allocBits index is >= s.freeindex and the bit + // is not marked then the object remains unallocated + // since the last GC. + // This situation is analogous to being on a freelist. // Unlink & free special records for any objects we're about to free. // Two complications here: @@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool { for special != nil { // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size - hbits := heapBitsForAddr(p) - if !hbits.isMarked() { + mbits := s.markBitsForAddr(p) + if !mbits.isMarked() { // This object is not marked and has at least one special record. // Pass 1: see if it has at least one finalizer. hasFin := false @@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool { for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { if tmp.kind == _KindSpecialFinalizer { // Stop freeing of object if it has a finalizer. - hbits.setMarkedNonAtomic() + mbits.setMarkedNonAtomic() hasFin = true break } @@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool { // This thread owns the span now, so it can manipulate // the block bitmap without atomic operations. - size, n, _ := s.layout() - heapBitsSweepSpan(s.base(), size, n, func(p uintptr) { + nfree = heapBitsSweepSpan(s, func(p uintptr) { // At this point we know that we are looking at garbage object // that needs to be collected. if debug.allocfreetrace != 0 { @@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool { } else if size > sys.PtrSize { *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0 } - if head.ptr() == nil { - head = gclinkptr(p) - } else { - end.ptr().next = gclinkptr(p) - } - end = gclinkptr(p) - end.ptr().next = gclinkptr(0x0bade5) - nfree++ } }) + wasempty := s.nextFreeIndex(s.freeindex) == s.nelems + + s.freeindex = 0 // reset allocation index to start of span. + + // Swap role of allocBits with gcmarkBits + // Clear gcmarkBits in preparation for next GC + s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits + s.clearGCMarkBits() // prepare for next GC + // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, // because of the potential for a concurrent free/SetFinalizer. // But we need to set it before we make the span available for allocation @@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state after sweep") } + // Serialization point. + // At this point the mark bits are cleared and allocation ready + // to go so release the span. atomic.Store(&s.sweepgen, sweepgen) } if nfree > 0 { c.local_nsmallfree[cl] += uintptr(nfree) - res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve) + res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty) // MCentral_FreeSpan updates sweepgen } else if freeToHeap { // Free large span to heap @@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) { throw("spanBytesAlloc underflow") } } - -func dumpFreeList(s *mspan) { - printlock() - print("runtime: free list of span ", s, ":\n") - sstart := uintptr(s.start << _PageShift) - link := s.freelist - for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ { - if i != 0 { - print(" -> ") - } - print(hex(link)) - if link.ptr() == nil { - break - } - if uintptr(link) < sstart || s.limit <= uintptr(link) { - // Bad link. Stop walking before we crash. - print(" (BAD)") - break - } - link = link.ptr().next - } - print("\n") - printunlock() -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index a3d34a360e..d5dde5e72e 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -119,8 +119,7 @@ type mspan struct { start pageID // starting page number npages uintptr // number of pages in span - freelist gclinkptr // list of free objects for _MSpanInUse - stackfreelist gclinkptr // list of free stacks, avoids overloading freelist for _MSpanStack + stackfreelist gclinkptr // list of free stacks, avoids overloading freelist // freeindex is the slot index between 0 and nelems at which to begin scanning // for the next free object in this span. @@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { // able to map interior pointer to containing span. atomic.Store(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse - s.freelist = 0 s.ref = 0 s.sizeclass = uint8(sizeclass) if sizeclass == 0 { @@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) { span.list = nil span.start = start span.npages = npages - span.freelist = 0 span.ref = 0 span.sizeclass = 0 span.incache = false @@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) { span.speciallock.key = 0 span.specials = nil span.needzero = 0 + span.freeindex = 0 + span.allocBits = &span.markbits1 + span.gcmarkBits = &span.markbits2 + // determine if this is actually needed. It is once / span so it + // isn't expensive. This is to be replaced by an arena + // based system where things can be cleared all at once so + // don't worry about optimizing this. + for i := 0; i < len(span.markbits1); i++ { + span.allocBits[i] = 0 + span.gcmarkBits[i] = 0 + } } func (span *mspan) inList() bool { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 5e373f1b94..8fd7ef2bcf 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1137,7 +1137,6 @@ func freeStackSpans() { next := s.next if s.ref == 0 { list.remove(s) - s.freelist = 0 s.stackfreelist = 0 mheap_.freeStack(s) } |
