diff options
Diffstat (limited to 'src/runtime')
54 files changed, 681 insertions, 1672 deletions
diff --git a/src/runtime/asm.s b/src/runtime/asm.s index 95a3424de2..27d8df9e06 100644 --- a/src/runtime/asm.s +++ b/src/runtime/asm.s @@ -11,24 +11,3 @@ DATA runtime·no_pointers_stackmap+0x00(SB)/4, $2 DATA runtime·no_pointers_stackmap+0x04(SB)/4, $0 GLOBL runtime·no_pointers_stackmap(SB),RODATA, $8 - -// NaCl requires that these skips be verifiable machine code. -#ifdef GOARCH_amd64 -#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90 -#endif -#ifdef GOARCH_386 -#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90 -#endif -#ifdef GOARCH_wasm -#define SKIP4 UNDEF; UNDEF; UNDEF; UNDEF -#endif -#ifndef SKIP4 -#define SKIP4 WORD $0 -#endif - -#define SKIP16 SKIP4; SKIP4; SKIP4; SKIP4 -#define SKIP64 SKIP16; SKIP16; SKIP16; SKIP16 - -// This function must be sizeofSkipFunction bytes. -TEXT runtime·skipPleaseUseCallersFrames(SB),NOSPLIT,$0-0 - SKIP64; SKIP64; SKIP64; SKIP64 diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 11d2f2f51a..23387a2165 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -916,23 +916,23 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 // - R20 is the destination of the write // - R21 is the value being written at R20. // It clobbers condition codes. -// It does not clobber R0 through R15, +// It does not clobber R0 through R17 (except special registers), // but may clobber any other register, *including* R31. TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112 // The standard prologue clobbers R31. - // We use R16 and R17 as scratch registers. - MOVD g_m(g), R16 - MOVD m_p(R16), R16 - MOVD (p_wbBuf+wbBuf_next)(R16), R17 + // We use R18 and R19 as scratch registers. + MOVD g_m(g), R18 + MOVD m_p(R18), R18 + MOVD (p_wbBuf+wbBuf_next)(R18), R19 // Increment wbBuf.next position. - ADD $16, R17 - MOVD R17, (p_wbBuf+wbBuf_next)(R16) - MOVD (p_wbBuf+wbBuf_end)(R16), R16 - CMP R16, R17 + ADD $16, R19 + MOVD R19, (p_wbBuf+wbBuf_next)(R18) + MOVD (p_wbBuf+wbBuf_end)(R18), R18 + CMP R18, R19 // Record the write. - MOVD R21, -16(R17) // Record value - MOVD (R20), R16 // TODO: This turns bad writes into bad reads. - MOVD R16, -8(R17) // Record *slot + MOVD R21, -16(R19) // Record value + MOVD (R20), R18 // TODO: This turns bad writes into bad reads. + MOVD R18, -8(R19) // Record *slot // Is the buffer full? (flags set in CMP above) BEQ flush ret: @@ -956,11 +956,12 @@ flush: MOVD R8, (FIXED_FRAME+56)(R1) MOVD R9, (FIXED_FRAME+64)(R1) MOVD R10, (FIXED_FRAME+72)(R1) - MOVD R11, (FIXED_FRAME+80)(R1) - MOVD R12, (FIXED_FRAME+88)(R1) + // R11, R12 may be clobbered by external-linker-inserted trampoline // R13 is REGTLS - MOVD R14, (FIXED_FRAME+96)(R1) - MOVD R15, (FIXED_FRAME+104)(R1) + MOVD R14, (FIXED_FRAME+80)(R1) + MOVD R15, (FIXED_FRAME+88)(R1) + MOVD R16, (FIXED_FRAME+96)(R1) + MOVD R17, (FIXED_FRAME+104)(R1) // This takes arguments R20 and R21. CALL runtime·wbBufFlush(SB) @@ -975,10 +976,10 @@ flush: MOVD (FIXED_FRAME+56)(R1), R8 MOVD (FIXED_FRAME+64)(R1), R9 MOVD (FIXED_FRAME+72)(R1), R10 - MOVD (FIXED_FRAME+80)(R1), R11 - MOVD (FIXED_FRAME+88)(R1), R12 - MOVD (FIXED_FRAME+96)(R1), R14 - MOVD (FIXED_FRAME+104)(R1), R15 + MOVD (FIXED_FRAME+80)(R1), R14 + MOVD (FIXED_FRAME+88)(R1), R15 + MOVD (FIXED_FRAME+96)(R1), R16 + MOVD (FIXED_FRAME+104)(R1), R17 JMP ret // Note: these functions use a special calling convention to save generated code space. diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index d7c45a183d..8f6c8773eb 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -79,7 +79,7 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0-0 // func cputicks() int64 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 - WORD $0xc0102573 // rdtime a0 + RDTIME A0 MOV A0, ret+0(FP) RET diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index a4e64b00cc..099aa540e0 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -605,7 +605,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { hbits := heapBitsForAddr(base) n := span.elemsize for i = uintptr(0); i < n; i += sys.PtrSize { - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { // No more possible pointers. break } diff --git a/src/runtime/chan.go b/src/runtime/chan.go index f6f4ffd02e..0afe5d962b 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -263,18 +263,19 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { } gp.waiting = nil gp.activeStackChans = false - if gp.param == nil { - if c.closed == 0 { - throw("chansend: spurious wakeup") - } - panic(plainError("send on closed channel")) - } + closed := !mysg.success gp.param = nil if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } mysg.c = nil releaseSudog(mysg) + if closed { + if c.closed == 0 { + throw("chansend: spurious wakeup") + } + panic(plainError("send on closed channel")) + } return true } @@ -311,6 +312,7 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) + sg.success = true if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -384,7 +386,8 @@ func closechan(c *hchan) { sg.releasetime = cputicks() } gp := sg.g - gp.param = nil + gp.param = unsafe.Pointer(sg) + sg.success = false if raceenabled { raceacquireg(gp, c.raceaddr()) } @@ -402,7 +405,8 @@ func closechan(c *hchan) { sg.releasetime = cputicks() } gp := sg.g - gp.param = nil + gp.param = unsafe.Pointer(sg) + sg.success = false if raceenabled { raceacquireg(gp, c.raceaddr()) } @@ -575,11 +579,11 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } - closed := gp.param == nil + success := mysg.success gp.param = nil mysg.c = nil releaseSudog(mysg) - return true, !closed + return true, success } // recv processes a receive operation on a full channel c. @@ -632,6 +636,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) + sg.success = true if sg.releasetime != 0 { sg.releasetime = cputicks() } diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go index 8ab8a4937c..194cc1243a 100644 --- a/src/runtime/checkptr_test.go +++ b/src/runtime/checkptr_test.go @@ -27,6 +27,7 @@ func TestCheckPtr(t *testing.T) { {"CheckPtrAlignmentPtr", "fatal error: checkptr: misaligned pointer conversion\n"}, {"CheckPtrAlignmentNoPtr", ""}, {"CheckPtrArithmetic", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"}, + {"CheckPtrArithmetic2", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"}, {"CheckPtrSize", "fatal error: checkptr: converted pointer straddles multiple allocations\n"}, {"CheckPtrSmall", "fatal error: checkptr: pointer arithmetic computed bad pointer value\n"}, } diff --git a/src/runtime/closure_test.go b/src/runtime/closure_test.go index ea65fbd5f5..741c932eab 100644 --- a/src/runtime/closure_test.go +++ b/src/runtime/closure_test.go @@ -1,6 +1,7 @@ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import "testing" diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go index f4db8cf927..64a0fbcaaa 100644 --- a/src/runtime/defs_linux_386.go +++ b/src/runtime/defs_linux_386.go @@ -226,14 +226,3 @@ type sockaddr_un struct { family uint16 path [108]byte } - -const __NEW_UTS_LEN = 64 - -type new_utsname struct { - sysname [__NEW_UTS_LEN + 1]byte - nodename [__NEW_UTS_LEN + 1]byte - release [__NEW_UTS_LEN + 1]byte - version [__NEW_UTS_LEN + 1]byte - machine [__NEW_UTS_LEN + 1]byte - domainname [__NEW_UTS_LEN + 1]byte -} diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go index 8480d85219..1ae18a309b 100644 --- a/src/runtime/defs_linux_amd64.go +++ b/src/runtime/defs_linux_amd64.go @@ -262,14 +262,3 @@ type sockaddr_un struct { family uint16 path [108]byte } - -const __NEW_UTS_LEN = 64 - -type new_utsname struct { - sysname [__NEW_UTS_LEN + 1]byte - nodename [__NEW_UTS_LEN + 1]byte - release [__NEW_UTS_LEN + 1]byte - version [__NEW_UTS_LEN + 1]byte - machine [__NEW_UTS_LEN + 1]byte - domainname [__NEW_UTS_LEN + 1]byte -} diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 5ab03f3f99..d591fdc4e9 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -43,8 +43,6 @@ var PhysHugePageSize = physHugePageSize var NetpollGenericInit = netpollGenericInit -var ParseRelease = parseRelease - var Memmove = memmove var MemclrNoHeapPointers = memclrNoHeapPointers diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index ec1ba90c2e..0808b416f0 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -77,7 +77,7 @@ func TestGCInfo(t *testing.T) { } for i := 0; i < 10; i++ { - verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr))) + verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(infoPtr)) verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10)) verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr)) verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4)) @@ -97,25 +97,10 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) { } } -func padDead(mask []byte) []byte { - // Because the dead bit isn't encoded in the second word, - // and because on 32-bit systems a one-word allocation - // uses a two-word block, the pointer info for a one-word - // object needs to be expanded to include an extra scalar - // on 32-bit systems to match the heap bitmap. - if runtime.PtrSize == 4 && len(mask) == 1 { - return []byte{mask[0], 0} - } - return mask -} - func trimDead(mask []byte) []byte { - for len(mask) > 2 && mask[len(mask)-1] == typeScalar { + for len(mask) > 0 && mask[len(mask)-1] == typeScalar { mask = mask[:len(mask)-1] } - if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar { - mask = mask[:0] - } return mask } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index cfd5c251b4..4c35309211 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -713,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i != 1 && !hbits.morePointers() { + if !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go index 000193585d..b23cf767be 100644 --- a/src/runtime/lockrank.go +++ b/src/runtime/lockrank.go @@ -67,8 +67,6 @@ const ( lockRankRwmutexW lockRankRwmutexR - lockRankMcentral // For !go115NewMCentralImpl - lockRankSpine // For !go115NewMCentralImpl lockRankSpanSetSpine lockRankGscan lockRankStackpool @@ -149,8 +147,6 @@ var lockNames = []string{ lockRankRwmutexW: "rwmutexW", lockRankRwmutexR: "rwmutexR", - lockRankMcentral: "mcentral", - lockRankSpine: "spine", lockRankSpanSetSpine: "spanSetSpine", lockRankGscan: "gscan", lockRankStackpool: "stackpool", @@ -228,18 +224,16 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankRwmutexW: {}, lockRankRwmutexR: {lockRankRwmutexW}, - lockRankMcentral: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankSpine: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine}, - lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan}, - lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan}, + lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine}, + lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan}, + lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan}, lockRankDefer: {}, lockRankSudog: {lockRankNotifyList, lockRankHchan}, lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog}, - lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, + lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankGlobalAlloc: {lockRankProf, lockRankSpine, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, + lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, lockRankGFree: {lockRankSched}, lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf}, diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index b3fac3de24..e46327f9ce 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1178,11 +1178,9 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { if s == nil { throw("out of memory") } - if go115NewMCentralImpl { - // Put the large span in the mcentral swept list so that it's - // visible to the background sweeper. - mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) - } + // Put the large span in the mcentral swept list so that it's + // visible to the background sweeper. + mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) s.limit = s.base() + size heapBitsForAddr(s.base()).initSpan(s) return s diff --git a/src/runtime/map_benchmark_test.go b/src/runtime/map_benchmark_test.go index 893cb6c5b6..d0becc9ddb 100644 --- a/src/runtime/map_benchmark_test.go +++ b/src/runtime/map_benchmark_test.go @@ -1,6 +1,7 @@ // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import ( diff --git a/src/runtime/map_fast32.go b/src/runtime/map_fast32.go index 534454f3ad..d035ed0386 100644 --- a/src/runtime/map_fast32.go +++ b/src/runtime/map_fast32.go @@ -299,8 +299,12 @@ search: continue } // Only clear key if there are pointers in it. - if t.key.ptrdata != 0 { - memclrHasPointers(k, t.key.size) + // This can only happen if pointers are 32 bit + // wide as 64 bit pointers do not fit into a 32 bit key. + if sys.PtrSize == 4 && t.key.ptrdata != 0 { + // The key must be a pointer as we checked pointers are + // 32 bits wide and the key is 32 bits wide also. + *(*unsafe.Pointer)(k) = nil } e := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize)) if t.elem.ptrdata != 0 { diff --git a/src/runtime/map_fast64.go b/src/runtime/map_fast64.go index 1669c7cfe9..f1f3927598 100644 --- a/src/runtime/map_fast64.go +++ b/src/runtime/map_fast64.go @@ -300,7 +300,13 @@ search: } // Only clear key if there are pointers in it. if t.key.ptrdata != 0 { - memclrHasPointers(k, t.key.size) + if sys.PtrSize == 8 { + *(*unsafe.Pointer)(k) = nil + } else { + // There are three ways to squeeze at one ore more 32 bit pointers into 64 bits. + // Just call memclrHasPointers instead of trying to handle all cases here. + memclrHasPointers(k, 8) + } } e := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize)) if t.elem.ptrdata != 0 { diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go index 1b7ccad6ed..302b3c23c1 100644 --- a/src/runtime/map_test.go +++ b/src/runtime/map_test.go @@ -993,6 +993,27 @@ func benchmarkMapDeleteStr(b *testing.B, n int) { } } +func benchmarkMapDeletePointer(b *testing.B, n int) { + i2p := make([]*int, n) + for i := 0; i < n; i++ { + i2p[i] = new(int) + } + a := make(map[*int]int, n) + b.ResetTimer() + k := 0 + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := 0; j < n; j++ { + a[i2p[j]] = j + } + k = i + b.StartTimer() + } + delete(a, i2p[i-k]) + } +} + func runWith(f func(*testing.B, int), v ...int) func(*testing.B) { return func(b *testing.B) { for _, n := range v { @@ -1023,6 +1044,7 @@ func BenchmarkMapDelete(b *testing.B) { b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000)) b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000)) b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000)) + b.Run("Pointer", runWith(benchmarkMapDeletePointer, 100, 1000, 10000)) } func TestDeferDeleteSlow(t *testing.T) { diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 35332c91c4..8de44c14b9 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -6,10 +6,11 @@ // // Stack, data, and bss bitmaps // -// Stack frames and global variables in the data and bss sections are described -// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer -// to be visited during GC. The bits in each byte are consumed starting with -// the low bit: 1<<0, 1<<1, and so on. +// Stack frames and global variables in the data and bss sections are +// described by bitmaps with 1 bit per pointer-sized word. A "1" bit +// means the word is a live pointer to be visited by the GC (referred to +// as "pointer"). A "0" bit means the word should be ignored by GC +// (referred to as "scalar", though it could be a dead pointer value). // // Heap bitmap // @@ -20,18 +21,13 @@ // through start+3*ptrSize, ha.bitmap[1] holds the entries for // start+4*ptrSize through start+7*ptrSize, and so on. // -// In each 2-bit entry, the lower bit holds the same information as in the 1-bit -// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. -// The meaning of the high bit depends on the position of the word being described -// in its allocated object. In all words *except* the second word, the -// high bit indicates that the object is still being described. In -// these words, if a bit pair with a high bit 0 is encountered, the -// low bit can also be assumed to be 0, and the object description is -// over. This 00 is called the ``dead'' encoding: it signals that the -// rest of the words in the object are uninteresting to the garbage -// collector. -// -// In the second word, the high bit is the GC ``checkmarked'' bit (see below). +// In each 2-bit entry, the lower bit is a pointer/scalar bit, just +// like in the stack/data bitmaps described above. The upper bit +// indicates scan/dead: a "1" value ("scan") indicates that there may +// be pointers in later words of the allocation, and a "0" value +// ("dead") indicates there are no more pointers in the allocation. If +// the upper bit is 0, the lower bit must also be 0, and this +// indicates scanning can ignore the rest of the allocation. // // The 2-bit entries are split when written into the byte, so that the top half // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) @@ -39,38 +35,14 @@ // This form allows a copy from the 1-bit to the 4-bit form to keep the // pointer bits contiguous, instead of having to space them out. // -// The code makes use of the fact that the zero value for a heap bitmap -// has no live pointer bit set and is (depending on position), not used, -// not checkmarked, and is the dead encoding. -// These properties must be preserved when modifying the encoding. +// The code makes use of the fact that the zero value for a heap +// bitmap means scalar/dead. This property must be preserved when +// modifying the encoding. // // The bitmap for noscan spans is not maintained. Code must ensure // that an object is scannable before consulting its bitmap by // checking either the noscan bit in the span or by consulting its // type's information. -// -// Checkmarks -// -// In a concurrent garbage collector, one worries about failing to mark -// a live object due to mutations without write barriers or bugs in the -// collector implementation. As a sanity check, the GC has a 'checkmark' -// mode that retraverses the object graph with the world stopped, to make -// sure that everything that should be marked is marked. -// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry -// for the second word of the object holds the checkmark bit. -// When not in checkmark mode, this bit is set to 1. -// -// The smallest possible allocation is 8 bytes. On a 32-bit machine, that -// means every allocated object has two words, so there is room for the -// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is -// just one word, so the second bit pair is not available for encoding the -// checkmark. However, because non-pointer allocations are combined -// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation -// must be a pointer, so the type bit in the first word is not actually needed. -// It is still used in general, except in checkmark the type bit is repurposed -// as the checkmark bit and then reinitialized (to 1) as the type bit when -// finished. -// package runtime @@ -551,33 +523,6 @@ func (h heapBits) isPointer() bool { return h.bits()&bitPointer != 0 } -// isCheckmarked reports whether the heap bits have the checkmarked bit set. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) isCheckmarked(size uintptr) bool { - if size == sys.PtrSize { - return (*h.bitp>>h.shift)&bitPointer != 0 - } - // All multiword objects are 2-word aligned, - // so we know that the initial word's 2-bit pair - // and the second word's 2-bit pair are in the - // same heap bitmap byte, *h.bitp. - return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 -} - -// setCheckmarked sets the checkmarked bit. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) setCheckmarked(size uintptr) { - if size == sys.PtrSize { - atomic.Or8(h.bitp, bitPointer<<h.shift) - return - } - atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) -} - // bulkBarrierPreWrite executes a write barrier // for every pointer slot in the memory range [src, src+size), // using pointer/scalar information from [dst, dst+size). @@ -795,7 +740,6 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { // TODO(rsc): Perhaps introduce a different heapBitsSpan type. // initSpan initializes the heap bitmap for a span. -// It clears all checkmark bits. // If this is a span of pointer-sized objects, it initializes all // words to pointer/scan. // Otherwise, it initializes all words to scalar/dead. @@ -826,45 +770,6 @@ func (h heapBits) initSpan(s *mspan) { } } -// initCheckmarkSpan initializes a span for being checkmarked. -// It clears the checkmark bits, which are set to 1 in normal operation. -func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { - // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. - if sys.PtrSize == 8 && size == sys.PtrSize { - // Checkmark bit is type bit, bottom bit of every 2-bit entry. - // Only possible on 64-bit system, since minimum size is 8. - // Must clear type bit (checkmark bit) of every word. - // The type bit is the lower of every two-bit pair. - for i := uintptr(0); i < n; i += wordsPerBitmapByte { - *h.bitp &^= bitPointerAll - h = h.forward(wordsPerBitmapByte) - } - return - } - for i := uintptr(0); i < n; i++ { - *h.bitp &^= bitScan << (heapBitsShift + h.shift) - h = h.forward(size / sys.PtrSize) - } -} - -// clearCheckmarkSpan undoes all the checkmarking in a span. -// The actual checkmark bits are ignored, so the only work to do -// is to fix the pointer bits. (Pointer bits are ignored by scanobject -// but consulted by typedmemmove.) -func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { - // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. - if sys.PtrSize == 8 && size == sys.PtrSize { - // Checkmark bit is type bit, bottom bit of every 2-bit entry. - // Only possible on 64-bit system, since minimum size is 8. - // Must clear type bit (checkmark bit) of every word. - // The type bit is the lower of every two-bit pair. - for i := uintptr(0); i < n; i += wordsPerBitmapByte { - *h.bitp |= bitPointerAll - h = h.forward(wordsPerBitmapByte) - } - } -} - // countAlloc returns the number of objects allocated in span s by // scanning the allocation bitmap. func (s *mspan) countAlloc() int { @@ -957,11 +862,11 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { if sys.PtrSize == 4 && dataSize == sys.PtrSize { // 1 pointer object. On 32-bit machines clear the bit for the // unused second word. - *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift + *h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift *h.bitp |= (bitPointer | bitScan) << h.shift } else { // 2-element slice of pointer. - *h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift + *h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift } return } @@ -974,11 +879,10 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } } b := uint32(*ptrmask) - hb := (b & 3) | bitScan - // bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1. - // 110011 is shifted h.shift and complemented. - // This clears out the bits that are about to be - // ored into *h.hbitp in the next instructions. + hb := b & 3 + hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1) + // Clear the bits for this object so we can set the + // appropriate ones. *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift *h.bitp |= uint8(hb << h.shift) return @@ -1155,11 +1059,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { throw("heapBitsSetType: called with non-pointer type") return } - if nw < 2 { - // Must write at least 2 words, because the "no scan" - // encoding doesn't take effect until the third word. - nw = 2 - } // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2). // The leading byte is special because it contains the bits for word 1, @@ -1172,21 +1071,22 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { case h.shift == 0: // Ptrmask and heap bitmap are aligned. - // Handle first byte of bitmap specially. + // + // This is a fast path for small objects. // // The first byte we write out covers the first four // words of the object. The scan/dead bit on the first // word must be set to scan since there are pointers - // somewhere in the object. The scan/dead bit on the - // second word is the checkmark, so we don't set it. + // somewhere in the object. // In all following words, we set the scan/dead // appropriately to indicate that the object contains // to the next 2-bit entry in the bitmap. // - // TODO: It doesn't matter if we set the checkmark, so - // maybe this case isn't needed any more. + // We set four bits at a time here, but if the object + // is fewer than four words, phase 3 will clear + // unnecessary bits. hb = b & bitPointerAll - hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) + hb |= bitScanAll if w += 4; w >= nw { goto Phase3 } @@ -1203,14 +1103,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // We took care of 1-word and 2-word objects above, // so this is at least a 6-word object. hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) - // This is not noscan, so set the scan bit in the - // first word. hb |= bitScan << (2 * heapBitsShift) + if nw > 1 { + hb |= bitScan << (3 * heapBitsShift) + } b >>= 2 nb -= 2 - // Note: no bitScan for second word because that's - // the checkmark. - *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) + *hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) hbitp = add1(hbitp) if w += 2; w >= nw { @@ -1403,17 +1302,20 @@ Phase4: // Double check the whole bitmap. if doubleCheck { // x+size may not point to the heap, so back up one - // word and then call next(). - end := heapBitsForAddr(x + size - sys.PtrSize).next() - endAI := arenaIdx(end.arena) - if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { - // The unrolling code above walks hbitp just - // past the bitmap without moving to the next - // arena. Synthesize this for end.bitp. - end.arena-- - endAI = arenaIdx(end.arena) - end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) - end.last = nil + // word and then advance it the way we do above. + end := heapBitsForAddr(x + size - sys.PtrSize) + if outOfPlace { + // In out-of-place copying, we just advance + // using next. + end = end.next() + } else { + // Don't use next because that may advance to + // the next arena and the in-place logic + // doesn't do that. + end.shift += heapBitsShift + if end.shift == 4*heapBitsShift { + end.bitp, end.shift = add1(end.bitp), 0 + } } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) @@ -1437,19 +1339,16 @@ Phase4: var have, want uint8 have = (*h.bitp >> h.shift) & (bitPointer | bitScan) if i >= totalptr { - want = 0 // deadmarker if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { + // heapBitsSetTypeGCProg always fills + // in full nibbles of bitScan. want = bitScan } } else { if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { want |= bitPointer } - if i != 1 { - want |= bitScan - } else { - have &^= bitScan - } + want |= bitScan } if have != want { println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) @@ -2009,7 +1908,7 @@ func getgcmask(ep interface{}) (mask []byte) { if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 5bceb51ac9..7a7d33ccae 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -131,11 +131,7 @@ func (c *mcache) refill(spc spanClass) { if s.sweepgen != mheap_.sweepgen+3 { throw("bad sweepgen in refill") } - if go115NewMCentralImpl { - mheap_.central[spc].mcentral.uncacheSpan(s) - } else { - atomic.Store(&s.sweepgen, mheap_.sweepgen) - } + mheap_.central[spc].mcentral.uncacheSpan(s) } // Get a new cached span from the central lists. diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index ed49d86d0c..ed49e01677 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -18,7 +18,6 @@ import "runtime/internal/atomic" // //go:notinheap type mcentral struct { - lock mutex spanclass spanClass // For !go115NewMCentralImpl. @@ -55,16 +54,10 @@ type mcentral struct { // Initialize a single central free list. func (c *mcentral) init(spc spanClass) { c.spanclass = spc - if go115NewMCentralImpl { - lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine) - lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine) - lockInit(&c.full[0].spineLock, lockRankSpanSetSpine) - lockInit(&c.full[1].spineLock, lockRankSpanSetSpine) - } else { - c.nonempty.init() - c.empty.init() - lockInit(&c.lock, lockRankMcentral) - } + lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine) + lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine) + lockInit(&c.full[0].spineLock, lockRankSpanSetSpine) + lockInit(&c.full[1].spineLock, lockRankSpanSetSpine) } // partialUnswept returns the spanSet which holds partially-filled @@ -93,9 +86,6 @@ func (c *mcentral) fullSwept(sweepgen uint32) *spanSet { // Allocate a span to use in an mcache. func (c *mcentral) cacheSpan() *mspan { - if !go115NewMCentralImpl { - return c.oldCacheSpan() - } // Deduct credit for this span allocation and sweep if necessary. spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize deductSweepCredit(spanBytes, 0) @@ -213,127 +203,11 @@ havespan: return s } -// Allocate a span to use in an mcache. -// -// For !go115NewMCentralImpl. -func (c *mcentral) oldCacheSpan() *mspan { - // Deduct credit for this span allocation and sweep if necessary. - spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize - deductSweepCredit(spanBytes, 0) - - lock(&c.lock) - traceDone := false - if trace.enabled { - traceGCSweepStart() - } - sg := mheap_.sweepgen -retry: - var s *mspan - for s = c.nonempty.first; s != nil; s = s.next { - if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { - c.nonempty.remove(s) - c.empty.insertBack(s) - unlock(&c.lock) - s.sweep(true) - goto havespan - } - if s.sweepgen == sg-1 { - // the span is being swept by background sweeper, skip - continue - } - // we have a nonempty span that does not require sweeping, allocate from it - c.nonempty.remove(s) - c.empty.insertBack(s) - unlock(&c.lock) - goto havespan - } - - for s = c.empty.first; s != nil; s = s.next { - if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { - // we have an empty span that requires sweeping, - // sweep it and see if we can free some space in it - c.empty.remove(s) - // swept spans are at the end of the list - c.empty.insertBack(s) - unlock(&c.lock) - s.sweep(true) - freeIndex := s.nextFreeIndex() - if freeIndex != s.nelems { - s.freeindex = freeIndex - goto havespan - } - lock(&c.lock) - // the span is still empty after sweep - // it is already in the empty list, so just retry - goto retry - } - if s.sweepgen == sg-1 { - // the span is being swept by background sweeper, skip - continue - } - // already swept empty span, - // all subsequent ones must also be either swept or in process of sweeping - break - } - if trace.enabled { - traceGCSweepDone() - traceDone = true - } - unlock(&c.lock) - - // Replenish central list if empty. - s = c.grow() - if s == nil { - return nil - } - lock(&c.lock) - c.empty.insertBack(s) - unlock(&c.lock) - - // At this point s is a non-empty span, queued at the end of the empty list, - // c is unlocked. -havespan: - if trace.enabled && !traceDone { - traceGCSweepDone() - } - n := int(s.nelems) - int(s.allocCount) - if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { - throw("span has no free objects") - } - // Assume all objects from this span will be allocated in the - // mcache. If it gets uncached, we'll adjust this. - atomic.Xadd64(&c.nmalloc, int64(n)) - usedBytes := uintptr(s.allocCount) * s.elemsize - atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes)) - if trace.enabled { - // heap_live changed. - traceHeapAlloc() - } - if gcBlackenEnabled != 0 { - // heap_live changed. - gcController.revise() - } - freeByteBase := s.freeindex &^ (64 - 1) - whichByte := freeByteBase / 8 - // Init alloc bits cache. - s.refillAllocCache(whichByte) - - // Adjust the allocCache so that s.freeindex corresponds to the low bit in - // s.allocCache. - s.allocCache >>= s.freeindex % 64 - - return s -} - // Return span from an mcache. // // s must have a span class corresponding to this // mcentral and it must not be empty. func (c *mcentral) uncacheSpan(s *mspan) { - if !go115NewMCentralImpl { - c.oldUncacheSpan(s) - return - } if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } @@ -393,111 +267,6 @@ func (c *mcentral) uncacheSpan(s *mspan) { } } -// Return span from an mcache. -// -// For !go115NewMCentralImpl. -func (c *mcentral) oldUncacheSpan(s *mspan) { - if s.allocCount == 0 { - throw("uncaching span but s.allocCount == 0") - } - - sg := mheap_.sweepgen - stale := s.sweepgen == sg+1 - if stale { - // Span was cached before sweep began. It's our - // responsibility to sweep it. - // - // Set sweepgen to indicate it's not cached but needs - // sweeping and can't be allocated from. sweep will - // set s.sweepgen to indicate s is swept. - atomic.Store(&s.sweepgen, sg-1) - } else { - // Indicate that s is no longer cached. - atomic.Store(&s.sweepgen, sg) - } - - n := int(s.nelems) - int(s.allocCount) - if n > 0 { - // cacheSpan updated alloc assuming all objects on s - // were going to be allocated. Adjust for any that - // weren't. We must do this before potentially - // sweeping the span. - atomic.Xadd64(&c.nmalloc, -int64(n)) - - lock(&c.lock) - c.empty.remove(s) - c.nonempty.insert(s) - if !stale { - // mCentral_CacheSpan conservatively counted - // unallocated slots in heap_live. Undo this. - // - // If this span was cached before sweep, then - // heap_live was totally recomputed since - // caching this span, so we don't do this for - // stale spans. - atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) - } - unlock(&c.lock) - } - - if stale { - // Now that s is in the right mcentral list, we can - // sweep it. - s.sweep(false) - } -} - -// freeSpan updates c and s after sweeping s. -// It sets s's sweepgen to the latest generation, -// and, based on the number of free objects in s, -// moves s to the appropriate list of c or returns it -// to the heap. -// freeSpan reports whether s was returned to the heap. -// If preserve=true, it does not move s (the caller -// must take care of it). -// -// For !go115NewMCentralImpl. -func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { - if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 { - throw("freeSpan given cached span") - } - s.needzero = 1 - - if preserve { - // preserve is set only when called from (un)cacheSpan above, - // the span must be in the empty list. - if !s.inList() { - throw("can't preserve unlinked span") - } - atomic.Store(&s.sweepgen, mheap_.sweepgen) - return false - } - - lock(&c.lock) - - // Move to nonempty if necessary. - if wasempty { - c.empty.remove(s) - c.nonempty.insert(s) - } - - // delay updating sweepgen until here. This is the signal that - // the span may be used in an mcache, so it must come after the - // linked list operations above (actually, just after the - // lock of c above.) - atomic.Store(&s.sweepgen, mheap_.sweepgen) - - if s.allocCount != 0 { - unlock(&c.lock) - return false - } - - c.nonempty.remove(s) - unlock(&c.lock) - mheap_.freeSpan(s) - return true -} - // grow allocates a new empty span from the heap and initializes it for c's size class. func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go new file mode 100644 index 0000000000..1fd8e4e78f --- /dev/null +++ b/src/runtime/mcheckmark.go @@ -0,0 +1,100 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GC checkmarks +// +// In a concurrent garbage collector, one worries about failing to mark +// a live object due to mutations without write barriers or bugs in the +// collector implementation. As a sanity check, the GC has a 'checkmark' +// mode that retraverses the object graph with the world stopped, to make +// sure that everything that should be marked is marked. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// A checkmarksMap stores the GC marks in "checkmarks" mode. It is a +// per-arena bitmap with a bit for every word in the arena. The mark +// is stored on the bit corresponding to the first word of the marked +// allocation. +// +//go:notinheap +type checkmarksMap [heapArenaBytes / sys.PtrSize / 8]uint8 + +// If useCheckmark is true, marking of an object uses the checkmark +// bits instead of the standard mark bits. +var useCheckmark = false + +// startCheckmarks prepares for the checkmarks phase. +// +// The world must be stopped. +func startCheckmarks() { + // Clear all checkmarks. + for _, ai := range mheap_.allArenas { + arena := mheap_.arenas[ai.l1()][ai.l2()] + bitmap := arena.checkmarks + + if bitmap == nil { + // Allocate bitmap on first use. + bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys)) + if bitmap == nil { + throw("out of memory allocating checkmarks bitmap") + } + arena.checkmarks = bitmap + } else { + // Otherwise clear the existing bitmap. + for i := range bitmap { + bitmap[i] = 0 + } + } + } + // Enable checkmarking. + useCheckmark = true +} + +// endCheckmarks ends the checkmarks phase. +func endCheckmarks() { + if gcMarkWorkAvailable(nil) { + throw("GC work not flushed") + } + useCheckmark = false +} + +// setCheckmark throws if marking object is a checkmarks violation, +// and otherwise sets obj's checkmark. It returns true if obj was +// already checkmarked. +func setCheckmark(obj, base, off uintptr, mbits markBits) bool { + if !mbits.isMarked() { + printlock() + print("runtime: checkmarks found unexpected unmarked object obj=", hex(obj), "\n") + print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") + + // Dump the source (base) object + gcDumpObject("base", base, off) + + // Dump the object + gcDumpObject("obj", obj, ^uintptr(0)) + + getg().m.traceback = 2 + throw("checkmark found unmarked object") + } + + ai := arenaIndex(obj) + arena := mheap_.arenas[ai.l1()][ai.l2()] + arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks)) + mask := byte(1 << ((obj / heapArenaBytes) % 8)) + bytep := &arena.checkmarks[arenaWord] + + if atomic.Load8(bytep)&mask != 0 { + // Already checkmarked. + return true + } + + atomic.Or8(bytep, mask) + return false +} diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b3499516f6..bd87144355 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1670,13 +1670,13 @@ func gcMarkTermination(nextTriggerRatio float64) { // mark using checkmark bits, to check that we // didn't forget to mark anything during the // concurrent mark process. + startCheckmarks() gcResetMarkState() - initCheckmarks() gcw := &getg().m.p.ptr().gcw gcDrain(gcw, 0) wbBufFlush1(getg().m.p.ptr()) gcw.dispose() - clearCheckmarks() + endCheckmarks() } // marking is complete so we can turn the write barrier off @@ -2149,21 +2149,13 @@ func gcSweep(mode gcMode) { lock(&mheap_.lock) mheap_.sweepgen += 2 mheap_.sweepdone = 0 - if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { - // We should have drained this list during the last - // sweep phase. We certainly need to start this phase - // with an empty swept list. - throw("non-empty swept list") - } mheap_.pagesSwept = 0 mheap_.sweepArenas = mheap_.allArenas mheap_.reclaimIndex = 0 mheap_.reclaimCredit = 0 unlock(&mheap_.lock) - if go115NewMCentralImpl { - sweep.centralIndex.clear() - } + sweep.centralIndex.clear() if !_ConcurrentSweep || mode == gcForceBlockMode { // Special case synchronous sweep. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index fe988c46d9..2b84945471 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -47,10 +47,6 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divide pagesPerArena. pagesPerSpanRoot = 512 - - // go115NewMarkrootSpans is a feature flag that indicates whether - // to use the new bitmap-based markrootSpans implementation. - go115NewMarkrootSpans = true ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and @@ -87,24 +83,16 @@ func gcMarkRootPrepare() { // // We depend on addfinalizer to mark objects that get // finalizers after root marking. - if go115NewMarkrootSpans { - // We're going to scan the whole heap (that was available at the time the - // mark phase started, i.e. markArenas) for in-use spans which have specials. - // - // Break up the work into arenas, and further into chunks. - // - // Snapshot allArenas as markArenas. This snapshot is safe because allArenas - // is append-only. - mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] - work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) - } else { - // We're only interested in scanning the in-use spans, - // which will all be swept at this point. More spans - // may be added to this list during concurrent GC, but - // we only care about spans that were allocated before - // this mark phase. - work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() - } + // + // We're going to scan the whole heap (that was available at the time the + // mark phase started, i.e. markArenas) for in-use spans which have specials. + // + // Break up the work into arenas, and further into chunks. + // + // Snapshot allArenas as markArenas. This snapshot is safe because allArenas + // is append-only. + mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] + work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) // Scan stacks. // @@ -316,10 +304,6 @@ func markrootFreeGStacks() { // //go:nowritebarrier func markrootSpans(gcw *gcWork, shard int) { - if !go115NewMarkrootSpans { - oldMarkrootSpans(gcw, shard) - return - } // Objects with finalizers have two GC-related invariants: // // 1) Everything reachable from the object must be marked. @@ -396,90 +380,6 @@ func markrootSpans(gcw *gcWork, shard int) { } } -// oldMarkrootSpans marks roots for one shard of work.spans. -// -// For go115NewMarkrootSpans = false. -// -//go:nowritebarrier -func oldMarkrootSpans(gcw *gcWork, shard int) { - // Objects with finalizers have two GC-related invariants: - // - // 1) Everything reachable from the object must be marked. - // This ensures that when we pass the object to its finalizer, - // everything the finalizer can reach will be retained. - // - // 2) Finalizer specials (which are not in the garbage - // collected heap) are roots. In practice, this means the fn - // field must be scanned. - // - // TODO(austin): There are several ideas for making this more - // efficient in issue #11485. - - sg := mheap_.sweepgen - spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) - // Note that work.spans may not include spans that were - // allocated between entering the scan phase and now. We may - // also race with spans being added into sweepSpans when they're - // just created, and as a result we may see nil pointers in the - // spans slice. This is okay because any objects with finalizers - // in those spans must have been allocated and given finalizers - // after we entered the scan phase, so addfinalizer will have - // ensured the above invariants for them. - for i := 0; i < len(spans); i++ { - // sweepBuf.block requires that we read pointers from the block atomically. - // It also requires that we ignore nil pointers. - s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i]))) - - // This is racing with spans being initialized, so - // check the state carefully. - if s == nil || s.state.get() != mSpanInUse { - continue - } - // Check that this span was swept (it may be cached or uncached). - if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { - // sweepgen was updated (+2) during non-checkmark GC pass - print("sweep ", s.sweepgen, " ", sg, "\n") - throw("gc: unswept span") - } - - // Speculatively check if there are any specials - // without acquiring the span lock. This may race with - // adding the first special to a span, but in that - // case addfinalizer will observe that the GC is - // active (which is globally synchronized) and ensure - // the above invariants. We may also ensure the - // invariants, but it's okay to scan an object twice. - if s.specials == nil { - continue - } - - // Lock the specials to prevent a special from being - // removed from the list while we're traversing it. - lock(&s.speciallock) - - for sp := s.specials; sp != nil; sp = sp.next { - if sp.kind != _KindSpecialFinalizer { - continue - } - // don't mark finalized object, but scan it so we - // retain everything it points to. - spf := (*specialfinalizer)(unsafe.Pointer(sp)) - // A finalizer can be set for an inner byte of an object, find object beginning. - p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize - - // Mark everything that can be reached from - // the object (but *not* the object itself or - // we'll never collect it). - scanobject(p, gcw) - - // The special itself is a root. - scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil) - } - - unlock(&s.speciallock) - } -} - // gcAssistAlloc performs GC work to make gp's assist debt positive. // gp must be the calling user gorountine. // @@ -1354,11 +1254,7 @@ func scanobject(b uintptr, gcw *gcWork) { } // Load bits once. See CL 22712 and issue 16973 for discussion. bits := hbits.bits() - // During checkmarking, 1-word objects store the checkmark - // in the type bit for the one word. The only one-word objects - // are pointers, or else they'd be merged with other non-pointer - // data into larger allocations. - if i != 1*sys.PtrSize && bits&bitScan == 0 { + if bits&bitScan == 0 { break // no more pointers in this object } if bits&bitPointer == 0 { @@ -1511,28 +1407,10 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp mbits := span.markBitsForIndex(objIndex) if useCheckmark { - if !mbits.isMarked() { - printlock() - print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") - print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") - - // Dump the source (base) object - gcDumpObject("base", base, off) - - // Dump the object - gcDumpObject("obj", obj, ^uintptr(0)) - - getg().m.traceback = 2 - throw("checkmark found unmarked object") - } - hbits := heapBitsForAddr(obj) - if hbits.isCheckmarked(span.elemsize) { + if setCheckmark(obj, base, off, mbits) { + // Already marked. return } - hbits.setCheckmarked(span.elemsize) - if !hbits.isCheckmarked(span.elemsize) { - throw("setCheckmarked and isCheckmarked disagree") - } } else { if debug.gccheckmark > 0 && span.isFree(objIndex) { print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") @@ -1661,45 +1539,3 @@ func gcMarkTinyAllocs() { greyobject(c.tiny, 0, 0, span, gcw, objIndex) } } - -// Checkmarking - -// To help debug the concurrent GC we remark with the world -// stopped ensuring that any object encountered has their normal -// mark bit set. To do this we use an orthogonal bit -// pattern to indicate the object is marked. The following pattern -// uses the upper two bits in the object's boundary nibble. -// 01: scalar not marked -// 10: pointer not marked -// 11: pointer marked -// 00: scalar marked -// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. -// The higher bit is 1 for pointers and 0 for scalars, whether the object -// is marked or not. -// The first nibble no longer holds the typeDead pattern indicating that the -// there are no more pointers in the object. This information is held -// in the second nibble. - -// If useCheckmark is true, marking of an object uses the -// checkmark bits (encoding above) instead of the standard -// mark bits. -var useCheckmark = false - -//go:nowritebarrier -func initCheckmarks() { - useCheckmark = true - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) - } - } -} - -func clearCheckmarks() { - useCheckmark = false - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) - } - } -} diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 3aa3afc028..6b8c56ce35 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -132,17 +132,15 @@ func finishsweep_m() { sweep.npausesweep++ } - if go115NewMCentralImpl { - // Reset all the unswept buffers, which should be empty. - // Do this in sweep termination as opposed to mark termination - // so that we can catch unswept spans and reclaim blocks as - // soon as possible. - sg := mheap_.sweepgen - for i := range mheap_.central { - c := &mheap_.central[i].mcentral - c.partialUnswept(sg).reset() - c.fullUnswept(sg).reset() - } + // Reset all the unswept buffers, which should be empty. + // Do this in sweep termination as opposed to mark termination + // so that we can catch unswept spans and reclaim blocks as + // soon as possible. + sg := mheap_.sweepgen + for i := range mheap_.central { + c := &mheap_.central[i].mcentral + c.partialUnswept(sg).reset() + c.fullUnswept(sg).reset() } // Sweeping is done, so if the scavenger isn't already awake, @@ -202,11 +200,7 @@ func sweepone() uintptr { var s *mspan sg := mheap_.sweepgen for { - if go115NewMCentralImpl { - s = mheap_.nextSpanForSweep() - } else { - s = mheap_.sweepSpans[1-sg/2%2].pop() - } + s = mheap_.nextSpanForSweep() if s == nil { atomic.Store(&mheap_.sweepdone, 1) break @@ -322,9 +316,6 @@ func (s *mspan) ensureSwept() { // If preserve=true, don't return it to heap nor relink in mcentral lists; // caller takes care of it. func (s *mspan) sweep(preserve bool) bool { - if !go115NewMCentralImpl { - return s.oldSweep(preserve) - } // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() @@ -568,214 +559,6 @@ func (s *mspan) sweep(preserve bool) bool { return false } -// Sweep frees or collects finalizers for blocks not marked in the mark phase. -// It clears the mark bits in preparation for the next GC round. -// Returns true if the span was returned to heap. -// If preserve=true, don't return it to heap nor relink in mcentral lists; -// caller takes care of it. -// -// For !go115NewMCentralImpl. -func (s *mspan) oldSweep(preserve bool) bool { - // It's critical that we enter this function with preemption disabled, - // GC must not start while we are in the middle of this function. - _g_ := getg() - if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { - throw("mspan.sweep: m is not locked") - } - sweepgen := mheap_.sweepgen - if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("mspan.sweep: bad span state") - } - - if trace.enabled { - traceGCSweepSpan(s.npages * _PageSize) - } - - atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) - - spc := s.spanclass - size := s.elemsize - res := false - - c := _g_.m.p.ptr().mcache - freeToHeap := false - - // The allocBits indicate which unmarked objects don't need to be - // processed since they were free at the end of the last GC cycle - // and were not allocated since then. - // If the allocBits index is >= s.freeindex and the bit - // is not marked then the object remains unallocated - // since the last GC. - // This situation is analogous to being on a freelist. - - // Unlink & free special records for any objects we're about to free. - // Two complications here: - // 1. An object can have both finalizer and profile special records. - // In such case we need to queue finalizer for execution, - // mark the object as live and preserve the profile special. - // 2. A tiny object can have several finalizers setup for different offsets. - // If such object is not marked, we need to queue all finalizers at once. - // Both 1 and 2 are possible at the same time. - hadSpecials := s.specials != nil - specialp := &s.specials - special := *specialp - for special != nil { - // A finalizer can be set for an inner byte of an object, find object beginning. - objIndex := uintptr(special.offset) / size - p := s.base() + objIndex*size - mbits := s.markBitsForIndex(objIndex) - if !mbits.isMarked() { - // This object is not marked and has at least one special record. - // Pass 1: see if it has at least one finalizer. - hasFin := false - endOffset := p - s.base() + size - for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { - if tmp.kind == _KindSpecialFinalizer { - // Stop freeing of object if it has a finalizer. - mbits.setMarkedNonAtomic() - hasFin = true - break - } - } - // Pass 2: queue all finalizers _or_ handle profile record. - for special != nil && uintptr(special.offset) < endOffset { - // Find the exact byte for which the special was setup - // (as opposed to object beginning). - p := s.base() + uintptr(special.offset) - if special.kind == _KindSpecialFinalizer || !hasFin { - // Splice out special record. - y := special - special = special.next - *specialp = special - freespecial(y, unsafe.Pointer(p), size) - } else { - // This is profile record, but the object has finalizers (so kept alive). - // Keep special record. - specialp = &special.next - special = *specialp - } - } - } else { - // object is still live: keep special record - specialp = &special.next - special = *specialp - } - } - if go115NewMarkrootSpans && hadSpecials && s.specials == nil { - spanHasNoSpecials(s) - } - - if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled { - // Find all newly freed objects. This doesn't have to - // efficient; allocfreetrace has massive overhead. - mbits := s.markBitsForBase() - abits := s.allocBitsForIndex(0) - for i := uintptr(0); i < s.nelems; i++ { - if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { - x := s.base() + i*s.elemsize - if debug.allocfreetrace != 0 { - tracefree(unsafe.Pointer(x), size) - } - if debug.clobberfree != 0 { - clobberfree(unsafe.Pointer(x), size) - } - if raceenabled { - racefree(unsafe.Pointer(x), size) - } - if msanenabled { - msanfree(unsafe.Pointer(x), size) - } - } - mbits.advance() - abits.advance() - } - } - - // Count the number of free objects in this span. - nalloc := uint16(s.countAlloc()) - if spc.sizeclass() == 0 && nalloc == 0 { - s.needzero = 1 - freeToHeap = true - } - nfreed := s.allocCount - nalloc - if nalloc > s.allocCount { - print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") - throw("sweep increased allocation count") - } - - s.allocCount = nalloc - wasempty := s.nextFreeIndex() == s.nelems - s.freeindex = 0 // reset allocation index to start of span. - if trace.enabled { - getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize - } - - // gcmarkBits becomes the allocBits. - // get a fresh cleared gcmarkBits in preparation for next GC - s.allocBits = s.gcmarkBits - s.gcmarkBits = newMarkBits(s.nelems) - - // Initialize alloc bits cache. - s.refillAllocCache(0) - - // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, - // because of the potential for a concurrent free/SetFinalizer. - // But we need to set it before we make the span available for allocation - // (return it to heap or mcentral), because allocation code assumes that a - // span is already swept if available for allocation. - if freeToHeap || nfreed == 0 { - // The span must be in our exclusive ownership until we update sweepgen, - // check for potential races. - if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("mspan.sweep: bad span state after sweep") - } - // Serialization point. - // At this point the mark bits are cleared and allocation ready - // to go so release the span. - atomic.Store(&s.sweepgen, sweepgen) - } - - if nfreed > 0 && spc.sizeclass() != 0 { - c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) - res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty) - // mcentral.freeSpan updates sweepgen - } else if freeToHeap { - // Free large span to heap - - // NOTE(rsc,dvyukov): The original implementation of efence - // in CL 22060046 used sysFree instead of sysFault, so that - // the operating system would eventually give the memory - // back to us again, so that an efence program could run - // longer without running out of memory. Unfortunately, - // calling sysFree here without any kind of adjustment of the - // heap data structures means that when the memory does - // come back to us, we have the wrong metadata for it, either in - // the mspan structures or in the garbage collection bitmap. - // Using sysFault here means that the program will run out of - // memory fairly quickly in efence mode, but at least it won't - // have mysterious crashes due to confused memory reuse. - // It should be possible to switch back to sysFree if we also - // implement and then call some kind of mheap.deleteSpan. - if debug.efence > 0 { - s.limit = 0 // prevent mlookup from finding this span - sysFault(unsafe.Pointer(s.base()), size) - } else { - mheap_.freeSpan(s) - } - c.local_nlargefree++ - c.local_largefree += size - res = true - } - if !res { - // The span has been swept and is still in-use, so put - // it on the swept in-use list. - mheap_.sweepSpans[sweepgen/2%2].push(s) - } - return res -} - // reportZombies reports any marked but free objects in s and throws. // // This generally means one of the following: diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go deleted file mode 100644 index 1f722c3d58..0000000000 --- a/src/runtime/mgcsweepbuf.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package runtime - -import ( - "internal/cpu" - "runtime/internal/atomic" - "runtime/internal/sys" - "unsafe" -) - -// A gcSweepBuf is a set of *mspans. -// -// gcSweepBuf is safe for concurrent push operations *or* concurrent -// pop operations, but not both simultaneously. -type gcSweepBuf struct { - // A gcSweepBuf is a two-level data structure consisting of a - // growable spine that points to fixed-sized blocks. The spine - // can be accessed without locks, but adding a block or - // growing it requires taking the spine lock. - // - // Because each mspan covers at least 8K of heap and takes at - // most 8 bytes in the gcSweepBuf, the growth of the spine is - // quite limited. - // - // The spine and all blocks are allocated off-heap, which - // allows this to be used in the memory manager and avoids the - // need for write barriers on all of these. We never release - // this memory because there could be concurrent lock-free - // access and we're likely to reuse it anyway. (In principle, - // we could do this during STW.) - - spineLock mutex - spine unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically - spineLen uintptr // Spine array length, accessed atomically - spineCap uintptr // Spine array cap, accessed under lock - - // index is the first unused slot in the logical concatenation - // of all blocks. It is accessed atomically. - index uint32 -} - -const ( - gcSweepBlockEntries = 512 // 4KB on 64-bit - gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit -) - -type gcSweepBlock struct { - spans [gcSweepBlockEntries]*mspan -} - -// push adds span s to buffer b. push is safe to call concurrently -// with other push operations, but NOT to call concurrently with pop. -func (b *gcSweepBuf) push(s *mspan) { - // Obtain our slot. - cursor := uintptr(atomic.Xadd(&b.index, +1) - 1) - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - - // Do we need to add a block? - spineLen := atomic.Loaduintptr(&b.spineLen) - var block *gcSweepBlock -retry: - if top < spineLen { - spine := atomic.Loadp(unsafe.Pointer(&b.spine)) - blockp := add(spine, sys.PtrSize*top) - block = (*gcSweepBlock)(atomic.Loadp(blockp)) - } else { - // Add a new block to the spine, potentially growing - // the spine. - lock(&b.spineLock) - // spineLen cannot change until we release the lock, - // but may have changed while we were waiting. - spineLen = atomic.Loaduintptr(&b.spineLen) - if top < spineLen { - unlock(&b.spineLock) - goto retry - } - - if spineLen == b.spineCap { - // Grow the spine. - newCap := b.spineCap * 2 - if newCap == 0 { - newCap = gcSweepBufInitSpineCap - } - newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys) - if b.spineCap != 0 { - // Blocks are allocated off-heap, so - // no write barriers. - memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) - } - // Spine is allocated off-heap, so no write barrier. - atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) - b.spineCap = newCap - // We can't immediately free the old spine - // since a concurrent push with a lower index - // could still be reading from it. We let it - // leak because even a 1TB heap would waste - // less than 2MB of memory on old spines. If - // this is a problem, we could free old spines - // during STW. - } - - // Allocate a new block and add it to the spine. - block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), cpu.CacheLineSize, &memstats.gc_sys)) - blockp := add(b.spine, sys.PtrSize*top) - // Blocks are allocated off-heap, so no write barrier. - atomic.StorepNoWB(blockp, unsafe.Pointer(block)) - atomic.Storeuintptr(&b.spineLen, spineLen+1) - unlock(&b.spineLock) - } - - // We have a block. Insert the span atomically, since there may be - // concurrent readers via the block API. - atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s)) -} - -// pop removes and returns a span from buffer b, or nil if b is empty. -// pop is safe to call concurrently with other pop operations, but NOT -// to call concurrently with push. -func (b *gcSweepBuf) pop() *mspan { - cursor := atomic.Xadd(&b.index, -1) - if int32(cursor) < 0 { - atomic.Xadd(&b.index, +1) - return nil - } - - // There are no concurrent spine or block modifications during - // pop, so we can omit the atomics. - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top))) - block := *blockp - s := block.spans[bottom] - // Clear the pointer for block(i). - block.spans[bottom] = nil - return s -} - -// numBlocks returns the number of blocks in buffer b. numBlocks is -// safe to call concurrently with any other operation. Spans that have -// been pushed prior to the call to numBlocks are guaranteed to appear -// in some block in the range [0, numBlocks()), assuming there are no -// intervening pops. Spans that are pushed after the call may also -// appear in these blocks. -func (b *gcSweepBuf) numBlocks() int { - return int(divRoundUp(uintptr(atomic.Load(&b.index)), gcSweepBlockEntries)) -} - -// block returns the spans in the i'th block of buffer b. block is -// safe to call concurrently with push. The block may contain nil -// pointers that must be ignored, and each entry in the block must be -// loaded atomically. -func (b *gcSweepBuf) block(i int) []*mspan { - // Perform bounds check before loading spine address since - // push ensures the allocated length is at least spineLen. - if i < 0 || uintptr(i) >= atomic.Loaduintptr(&b.spineLen) { - throw("block index out of range") - } - - // Get block i. - spine := atomic.Loadp(unsafe.Pointer(&b.spine)) - blockp := add(spine, sys.PtrSize*uintptr(i)) - block := (*gcSweepBlock)(atomic.Loadp(blockp)) - - // Slice the block if necessary. - cursor := uintptr(atomic.Load(&b.index)) - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - var spans []*mspan - if uintptr(i) < top { - spans = block.spans[:] - } else { - spans = block.spans[:bottom] - } - return spans -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 2c7bfd8a59..1a57bcd66e 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -42,17 +42,8 @@ const ( // roughly 100µs. // // Must be a multiple of the pageInUse bitmap element size and - // must also evenly divid pagesPerArena. + // must also evenly divide pagesPerArena. pagesPerReclaimerChunk = 512 - - // go115NewMCentralImpl is a feature flag for the new mcentral implementation. - // - // This flag depends on go115NewMarkrootSpans because the new mcentral - // implementation requires that markroot spans no longer rely on mgcsweepbufs. - // The definition of this flag helps ensure that if there's a problem with - // the new markroot spans implementation and it gets turned off, that the new - // mcentral implementation also gets turned off so the runtime isn't broken. - go115NewMCentralImpl = true && go115NewMarkrootSpans ) // Main malloc heap. @@ -85,19 +76,6 @@ type mheap struct { // access (since that may free the backing store). allspans []*mspan // all spans out there - // sweepSpans contains two mspan stacks: one of swept in-use - // spans, and one of unswept in-use spans. These two trade - // roles on each GC cycle. Since the sweepgen increases by 2 - // on each cycle, this means the swept spans are in - // sweepSpans[sweepgen/2%2] and the unswept spans are in - // sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the - // unswept stack and pushes spans that are still in-use on the - // swept stack. Likewise, allocating an in-use span pushes it - // on the swept stack. - // - // For !go115NewMCentralImpl. - sweepSpans [2]gcSweepBuf - _ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep @@ -220,7 +198,7 @@ type mheap struct { base, end uintptr } - // _ uint32 // ensure 64-bit alignment of central + _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the mcentrals are @@ -300,6 +278,10 @@ type heapArena struct { // during marking. pageSpecials [pagesPerArena / 8]uint8 + // checkmarks stores the debug.gccheckmark state. It is only + // used if debug.gccheckmark > 0. + checkmarks *checkmarksMap + // zeroedBase marks the first byte of the first page in this // arena which hasn't been used yet and is therefore already // zero. zeroedBase is relative to the arena base. @@ -715,8 +697,6 @@ func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) // Initialize the heap. func (h *mheap) init() { lockInit(&h.lock, lockRankMheap) - lockInit(&h.sweepSpans[0].spineLock, lockRankSpine) - lockInit(&h.sweepSpans[1].spineLock, lockRankSpine) lockInit(&h.speciallock, lockRankMheapSpecial) h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) @@ -1290,16 +1270,6 @@ HaveSpan: h.setSpans(s.base(), npages, s) if !manual { - if !go115NewMCentralImpl { - // Add to swept in-use list. - // - // This publishes the span to root marking. - // - // h.sweepgen is guaranteed to only change during STW, - // and preemption is disabled in the page allocator. - h.sweepSpans[h.sweepgen/2%2].push(s) - } - // Mark in-use span in arena page bitmap. // // This publishes the span to the page sweeper, so @@ -1701,9 +1671,7 @@ func addspecial(p unsafe.Pointer, s *special) bool { s.offset = uint16(offset) s.next = *t *t = s - if go115NewMarkrootSpans { - spanHasSpecials(span) - } + spanHasSpecials(span) unlock(&span.speciallock) releasem(mp) @@ -1744,7 +1712,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { } t = &s.next } - if go115NewMarkrootSpans && span.specials == nil { + if span.specials == nil { spanHasNoSpecials(span) } unlock(&span.speciallock) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 1fe77663b9..44dea22ef3 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -131,7 +131,7 @@ func header(arch string) { func p(f string, args ...interface{}) { fmted := fmt.Sprintf(f, args...) - fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1)) + fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t")) } func label(l string) { diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go index a8011341bc..ff112300c3 100644 --- a/src/runtime/mpallocbits.go +++ b/src/runtime/mpallocbits.go @@ -120,84 +120,105 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) { // sake of documentation, 0s are free pages and 1s are allocated pages. type pallocBits pageBits -// consec8tab is a table containing the number of consecutive -// zero bits for any uint8 value. -// -// The table is generated by calling consec8(i) for each -// possible uint8 value, which is defined as: -// -// // consec8 counts the maximum number of consecutive 0 bits -// // in a uint8. -// func consec8(n uint8) int { -// n = ^n -// i := 0 -// for n != 0 { -// n &= (n << 1) -// i++ -// } -// return i -// } -var consec8tab = [256]uint{ - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0, -} - // summarize returns a packed summary of the bitmap in pallocBits. func (b *pallocBits) summarize() pallocSum { - // TODO(mknyszek): There may be something more clever to be done - // here to make the summarize operation more efficient. For example, - // we can compute start and end with 64-bit wide operations easily, - // but max is a bit more complex. Perhaps there exists some way to - // leverage the 64-bit start and end to our advantage? - var start, max, end uint + var start, max, cur uint + const notSetYet = ^uint(0) // sentinel for start value + start = notSetYet for i := 0; i < len(b); i++ { - a := b[i] - for j := 0; j < 64; j += 8 { - k := uint8(a >> j) + x := b[i] + if x == 0 { + cur += 64 + continue + } + t := uint(sys.TrailingZeros64(x)) + l := uint(sys.LeadingZeros64(x)) - // Compute start. - si := uint(sys.TrailingZeros8(k)) - if start == uint(i*64+j) { - start += si - } + // Finish any region spanning the uint64s + cur += t + if start == notSetYet { + start = cur + } + if cur > max { + max = cur + } + // Final region that might span to next uint64 + cur = l + } + if start == notSetYet { + // Made it all the way through without finding a single 1 bit. + const n = uint(64 * len(b)) + return packPallocSum(n, n, n) + } + if cur > max { + max = cur + } + if max >= 64-2 { + // There is no way an internal run of zeros could beat max. + return packPallocSum(start, max, cur) + } + // Now look inside each uint64 for runs of zeros. + // All uint64s must be nonzero, or we would have aborted above. +outer: + for i := 0; i < len(b); i++ { + x := b[i] - // Compute max. - if end+si > max { - max = end + si - } - if mi := consec8tab[k]; mi > max { - max = mi + // Look inside this uint64. We have a pattern like + // 000000 1xxxxx1 000000 + // We need to look inside the 1xxxxx1 for any contiguous + // region of zeros. + + // We already know the trailing zeros are no larger than max. Remove them. + x >>= sys.TrailingZeros64(x) & 63 + if x&(x+1) == 0 { // no more zeros (except at the top). + continue + } + + // Strategy: shrink all runs of zeros by max. If any runs of zero + // remain, then we've identified a larger maxiumum zero run. + p := max // number of zeros we still need to shrink by. + k := uint(1) // current minimum length of runs of ones in x. + for { + // Shrink all runs of zeros by p places (except the top zeros). + for p > 0 { + if p <= k { + // Shift p ones down into the top of each run of zeros. + x |= x >> (p & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + break + } + // Shift k ones down into the top of each run of zeros. + x |= x >> (k & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + p -= k + // We've just doubled the minimum length of 1-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - // Compute end. - if k == 0 { - end += 8 - } else { - end = uint(sys.LeadingZeros8(k)) + // The length of the lowest-order zero run is an increment to our maximum. + j := uint(sys.TrailingZeros64(^x)) // count contiguous trailing ones + x >>= j & 63 // remove trailing ones + j = uint(sys.TrailingZeros64(x)) // count contiguous trailing zeros + x >>= j & 63 // remove zeros + max += j // we have a new maximum! + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer } + p = j // remove j more zeros from each zero run. } } - return packPallocSum(start, max, end) + return packPallocSum(start, max, cur) } // find searches for npages contiguous free pages in pallocBits and returns // the index where that run starts, as well as the index of the first free page // it found in the search. searchIdx represents the first known free page and -// where to begin the search from. +// where to begin the next search from. // // If find fails to find any free space, it returns an index of ^uint(0) and // the new searchIdx should be ignored. @@ -218,9 +239,10 @@ func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) { // // See find for an explanation of the searchIdx parameter. func (b *pallocBits) find1(searchIdx uint) uint { + _ = b[0] // lift nil check out of loop for i := searchIdx / 64; i < uint(len(b)); i++ { x := b[i] - if x == ^uint64(0) { + if ^x == 0 { continue } return i*64 + uint(sys.TrailingZeros64(^x)) @@ -242,18 +264,18 @@ func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) { end, newSearchIdx := uint(0), ^uint(0) for i := searchIdx / 64; i < uint(len(b)); i++ { bi := b[i] - if bi == ^uint64(0) { + if ^bi == 0 { end = 0 continue } // First see if we can pack our allocation in the trailing // zeros plus the end of the last 64 bits. - start := uint(sys.TrailingZeros64(bi)) if newSearchIdx == ^uint(0) { // The new searchIdx is going to be at these 64 bits after any // 1s we file, so count trailing 1s. newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi)) } + start := uint(sys.TrailingZeros64(bi)) if end+start >= uint(npages) { return i*64 - end, newSearchIdx } @@ -348,15 +370,33 @@ func (b *pallocBits) pages64(i uint) uint64 { // findBitRange64 returns the bit index of the first set of // n consecutive 1 bits. If no consecutive set of 1 bits of // size n may be found in c, then it returns an integer >= 64. +// n must be > 0. func findBitRange64(c uint64, n uint) uint { - i := uint(0) - cont := uint(sys.TrailingZeros64(^c)) - for cont < n && i < 64 { - i += cont - i += uint(sys.TrailingZeros64(c >> i)) - cont = uint(sys.TrailingZeros64(^(c >> i))) + // This implementation is based on shrinking the length of + // runs of contiguous 1 bits. We remove the top n-1 1 bits + // from each run of 1s, then look for the first remaining 1 bit. + p := n - 1 // number of 1s we want to remove. + k := uint(1) // current minimum width of runs of 0 in c. + for p > 0 { + if p <= k { + // Shift p 0s down into the top of each run of 1s. + c &= c >> (p & 63) + break + } + // Shift k 0s down into the top of each run of 1s. + c &= c >> (k & 63) + if c == 0 { + return 64 + } + p -= k + // We've just doubled the minimum length of 0-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - return i + // Find first remaining 1. + // Since we shrunk from the top down, the first 1 is in + // its correct original position. + return uint(sys.TrailingZeros64(c)) } // pallocData encapsulates pallocBits and a bitmap for diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 71a29f3b3a..5095e24220 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) { // Ensures two packed summaries are identical, and reports a detailed description // of the difference if they're not. -func checkPallocSum(t *testing.T, got, want PallocSum) { +func checkPallocSum(t testing.TB, got, want PallocSum) { if got.Start() != want.Start() { t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start()) } @@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) { // Benchmarks how quickly we can summarize a PallocBits. func BenchmarkPallocBitsSummarize(b *testing.B) { - buf0 := new(PallocBits) - buf1 := new(PallocBits) - for i := 0; i < len(buf1); i++ { - buf1[i] = ^uint64(0) + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + 0x1000404044ccaaff, } - bufa := new(PallocBits) - for i := 0; i < len(bufa); i++ { - bufa[i] = 0xaa - } - for _, buf := range []*PallocBits{buf0, buf1, bufa} { - b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) { + for _, p := range patterns { + buf := new(PallocBits) + for i := 0; i < len(buf); i++ { + buf[i] = p + } + b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) { + checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf)) for i := 0; i < b.N; i++ { buf.Summarize() } @@ -492,10 +504,9 @@ func TestFindBitRange64(t *testing.T) { t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result) } } - for i := uint(0); i <= 64; i++ { + for i := uint(1); i <= 64; i++ { check(^uint64(0), i, 0) } - check(0, 0, 0) for i := uint(1); i <= 64; i++ { check(0, i, ^uint(0)) } @@ -508,3 +519,33 @@ func TestFindBitRange64(t *testing.T) { check(0xffff03ff0107ffff, 16, 0) check(0x0fff03ff01079fff, 16, ^uint(0)) } + +func BenchmarkFindBitRange64(b *testing.B) { + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + } + sizes := []uint{ + 2, 8, 32, + } + for _, pattern := range patterns { + for _, size := range sizes { + b.Run(fmt.Sprintf("Pattern%02XSize%d", pattern, size), func(b *testing.B) { + for i := 0; i < b.N; i++ { + FindBitRange64(pattern, size) + } + }) + } + } +} diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 7b95ff2428..9702920bcf 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -249,6 +249,10 @@ func sysargs(argc int32, argv **byte) { sysauxv(buf[:]) } +// startupRandomData holds random bytes initialized at startup. These come from +// the ELF AT_RANDOM auxiliary vector. +var startupRandomData []byte + func sysauxv(auxv []uintptr) int { var i int for ; auxv[i] != _AT_NULL; i += 2 { @@ -328,20 +332,11 @@ func libpreinit() { initsig(true) } -// gsignalInitQuirk, if non-nil, is called for every allocated gsignal G. -// -// TODO(austin): Remove this after Go 1.15 when we remove the -// mlockGsignal workaround. -var gsignalInitQuirk func(gsignal *g) - // Called to initialize a new m (including the bootstrap m). // Called on the parent thread (main thread in case of bootstrap), can allocate memory. func mpreinit(mp *m) { mp.gsignal = malg(32 * 1024) // Linux wants >= 2K mp.gsignal.m = mp - if gsignalInitQuirk != nil { - gsignalInitQuirk(mp.gsignal) - } } func gettid() uint32 diff --git a/src/runtime/os_linux_x86.go b/src/runtime/os_linux_x86.go index 97f870707d..d91fa1a0d1 100644 --- a/src/runtime/os_linux_x86.go +++ b/src/runtime/os_linux_x86.go @@ -7,120 +7,4 @@ package runtime -import ( - "runtime/internal/atomic" - "unsafe" -) - -//go:noescape -func uname(utsname *new_utsname) int - -func mlock(addr, len uintptr) int - -func osArchInit() { - // Linux 5.2 introduced a bug that can corrupt vector - // registers on return from a signal if the signal stack isn't - // faulted in: - // https://bugzilla.kernel.org/show_bug.cgi?id=205663 - // - // It was fixed in 5.3.15, 5.4.2, and all 5.5 and later - // kernels. - // - // If we're on an affected kernel, work around this issue by - // mlocking the top page of every signal stack. This doesn't - // help for signal stacks created in C, but there's not much - // we can do about that. - // - // TODO(austin): Remove this in Go 1.15, at which point it - // will be unlikely to encounter any of the affected kernels - // in the wild. - - var uts new_utsname - if uname(&uts) < 0 { - throw("uname failed") - } - // Check for null terminator to ensure gostringnocopy doesn't - // walk off the end of the release string. - found := false - for _, b := range uts.release { - if b == 0 { - found = true - break - } - } - if !found { - return - } - rel := gostringnocopy(&uts.release[0]) - - major, minor, patch, ok := parseRelease(rel) - if !ok { - return - } - - if major == 5 && minor == 4 && patch < 2 { - // All 5.4 versions of Ubuntu are patched. - procVersion := []byte("/proc/version\000") - f := open(&procVersion[0], _O_RDONLY, 0) - if f >= 0 { - var buf [512]byte - p := noescape(unsafe.Pointer(&buf[0])) - n := read(f, p, int32(len(buf))) - closefd(f) - - needle := []byte("Ubuntu") - contains: - for i, c := range buf[:n] { - if c != needle[0] { - continue - } - if int(n)-i < len(needle) { - break - } - for j, c2 := range needle { - if c2 != buf[i+j] { - continue contains - } - } - // This is an Ubuntu system. - return - } - } - } - - if major == 5 && (minor == 2 || minor == 3 && patch < 15 || minor == 4 && patch < 2) { - gsignalInitQuirk = mlockGsignal - if m0.gsignal != nil { - throw("gsignal quirk too late") - } - throwReportQuirk = throwBadKernel - } -} - -func mlockGsignal(gsignal *g) { - if atomic.Load(&touchStackBeforeSignal) != 0 { - // mlock has already failed, don't try again. - return - } - - // This mlock call may fail, but we don't report the failure. - // Instead, if something goes badly wrong, we rely on prepareSignalM - // and throwBadKernel to do further mitigation and to report a problem - // to the user if mitigation fails. This is because many - // systems have a limit on the total mlock size, and many kernels - // that appear to have bad versions are actually patched to avoid the - // bug described above. We want Go 1.14 to run on those systems. - // See #37436. - if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 { - atomic.Store(&touchStackBeforeSignal, uint32(-errno)) - } -} - -// throwBadKernel is called, via throwReportQuirk, by throw. -func throwBadKernel() { - if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 { - println("runtime: note: your Linux kernel may be buggy") - println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug") - println("runtime: note: mlock workaround for kernel bug failed with errno", errno) - } -} +func osArchInit() {} diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go index 97106c7b9d..f7f90cedc1 100644 --- a/src/runtime/os_netbsd.go +++ b/src/runtime/os_netbsd.go @@ -95,18 +95,28 @@ var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0) // From NetBSD's <sys/sysctl.h> const ( - _CTL_HW = 6 - _HW_NCPU = 3 - _HW_PAGESIZE = 7 + _CTL_HW = 6 + _HW_NCPU = 3 + _HW_PAGESIZE = 7 + _HW_NCPUONLINE = 16 ) -func getncpu() int32 { - mib := [2]uint32{_CTL_HW, _HW_NCPU} - out := uint32(0) +func sysctlInt(mib []uint32) (int32, bool) { + var out int32 nout := unsafe.Sizeof(out) - ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) - if ret >= 0 { - return int32(out) + ret := sysctl(&mib[0], uint32(len(mib)), (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) + if ret < 0 { + return 0, false + } + return out, true +} + +func getncpu() int32 { + if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPUONLINE}); ok { + return int32(n) + } + if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPU}); ok { + return int32(n) } return 1 } diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go index 9e187d2220..128c30adeb 100644 --- a/src/runtime/os_plan9.go +++ b/src/runtime/os_plan9.go @@ -82,10 +82,10 @@ func sigpanic() { note := gostringnocopy((*byte)(unsafe.Pointer(g.m.notesig))) switch g.sig { case _SIGRFAULT, _SIGWFAULT: - i := index(note, "addr=") + i := indexNoFloat(note, "addr=") if i >= 0 { i += 5 - } else if i = index(note, "va="); i >= 0 { + } else if i = indexNoFloat(note, "va="); i >= 0 { i += 3 } else { panicmem() @@ -111,6 +111,20 @@ func sigpanic() { } } +// indexNoFloat is bytealg.IndexString but safe to use in a note +// handler. +func indexNoFloat(s, t string) int { + if len(t) == 0 { + return 0 + } + for i := 0; i < len(s); i++ { + if s[i] == t[0] && hasPrefix(s[i:], t) { + return i + } + } + return -1 +} + func atolwhex(p string) int64 { for hasPrefix(p, " ") || hasPrefix(p, "\t") { p = p[1:] diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index a584ada702..a62e941229 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -1010,11 +1010,6 @@ func ctrlhandler1(_type uint32) uint32 { if sigsend(s) { return 1 } - if !islibrary && !isarchive { - // Only exit the program if we don't have a DLL. - // See https://golang.org/issues/35965. - exit(2) // SIGINT, SIGTERM, etc - } return 0 } diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 615249f33c..127843b081 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -1283,12 +1283,6 @@ func startpanic_m() bool { } } -// throwReportQuirk, if non-nil, is called by throw after dumping the stacks. -// -// TODO(austin): Remove this after Go 1.15 when we remove the -// mlockGsignal workaround. -var throwReportQuirk func() - var didothers bool var deadlock mutex @@ -1335,10 +1329,6 @@ func dopanic_m(gp *g, pc, sp uintptr) bool { printDebugLog() - if throwReportQuirk != nil { - throwReportQuirk() - } - return docrash } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 035822216d..5e38b3194c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" @@ -557,7 +558,6 @@ func schedinit() { sched.maxmcount = 10000 - tracebackinit() moduledataverify() stackinit() mallocinit() @@ -2575,15 +2575,20 @@ func injectglist(glist *gList) { return } - lock(&sched.lock) - npidle := int(sched.npidle) + npidle := int(atomic.Load(&sched.npidle)) + var globq gQueue var n int for n = 0; n < npidle && !q.empty(); n++ { - globrunqput(q.pop()) + g := q.pop() + globq.pushBack(g) + } + if n > 0 { + lock(&sched.lock) + globrunqputbatch(&globq, int32(n)) + unlock(&sched.lock) + startIdle(n) + qsize -= n } - unlock(&sched.lock) - startIdle(n) - qsize -= n if !q.empty() { runqputbatch(pp, &q, qsize) @@ -5460,7 +5465,7 @@ func haveexperiment(name string) bool { x := sys.Goexperiment for x != "" { xname := "" - i := index(x, ",") + i := bytealg.IndexByteString(x, ',') if i < 0 { xname, x = x, "" } else { diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s index 3a348c33e2..a938d98262 100644 --- a/src/runtime/rt0_freebsd_arm64.s +++ b/src/runtime/rt0_freebsd_arm64.s @@ -45,8 +45,7 @@ TEXT _rt0_arm64_freebsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_freebsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. diff --git a/src/runtime/rt0_netbsd_arm64.s b/src/runtime/rt0_netbsd_arm64.s index 75ecbe5176..2f3b5a5a87 100644 --- a/src/runtime/rt0_netbsd_arm64.s +++ b/src/runtime/rt0_netbsd_arm64.s @@ -44,8 +44,7 @@ TEXT _rt0_arm64_netbsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_netbsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. diff --git a/src/runtime/rt0_openbsd_arm64.s b/src/runtime/rt0_openbsd_arm64.s index 12408f2eec..722fab6129 100644 --- a/src/runtime/rt0_openbsd_arm64.s +++ b/src/runtime/rt0_openbsd_arm64.s @@ -50,8 +50,7 @@ TEXT _rt0_arm64_openbsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_openbsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index c65a534ef6..7c893aa25c 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -347,13 +348,13 @@ func parsedebugvars() { for p := gogetenv("GODEBUG"); p != ""; { field := "" - i := index(p, ",") + i := bytealg.IndexByteString(p, ',') if i < 0 { field, p = p, "" } else { field, p = p[:i], p[i+1:] } - i = index(field, "=") + i = bytealg.IndexByteString(field, '=') if i < 0 { continue } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 755c409078..eba68da624 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -366,6 +366,12 @@ type sudog struct { // g.selectDone must be CAS'd to win the wake-up race. isSelect bool + // success indicates whether communication over channel c + // succeeded. It is true if the goroutine was awoken because a + // value was delivered over channel c, and false if awoken + // because c was closed. + success bool + parent *sudog // semaRoot binary tree waitlink *sudog // g.waiting list or semaRoot waittail *sudog // semaRoot @@ -846,10 +852,6 @@ type forcegcstate struct { idle uint32 } -// startup_random_data holds random bytes initialized at startup. These come from -// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go). -var startupRandomData []byte - // extendRandom extends the random numbers in r[:n] to the whole slice r. // Treats n<0 as n==0. func extendRandom(r []byte, n int) { diff --git a/src/runtime/select.go b/src/runtime/select.go index a069e3e050..80768b285b 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -12,25 +12,12 @@ import ( const debugSelect = false -// scase.kind values. -// Known to compiler. -// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselectcases. -const ( - caseNil = iota - caseRecv - caseSend - caseDefault -) - // Select case descriptor. // Known to compiler. // Changes here must also be made in src/cmd/internal/gc/select.go's scasetype. type scase struct { - c *hchan // chan - elem unsafe.Pointer // data element - kind uint16 - pc uintptr // race pc (for race detector / msan) - releasetime int64 + c *hchan // chan + elem unsafe.Pointer // data element } var ( @@ -38,15 +25,15 @@ var ( chanrecvpc = funcPC(chanrecv) ) -func selectsetpc(cas *scase) { - cas.pc = getcallerpc() +func selectsetpc(pc *uintptr) { + *pc = getcallerpc() } func sellock(scases []scase, lockorder []uint16) { var c *hchan for _, o := range lockorder { c0 := scases[o].c - if c0 != nil && c0 != c { + if c0 != c { c = c0 lock(&c.lock) } @@ -62,11 +49,8 @@ func selunlock(scases []scase, lockorder []uint16) { // the G that calls select runnable again and schedules it for execution. // When the G runs on another M, it locks all the locks and frees sel. // Now if the first M touches sel, it will access freed memory. - for i := len(scases) - 1; i >= 0; i-- { + for i := len(lockorder) - 1; i >= 0; i-- { c := scases[lockorder[i]].c - if c == nil { - break - } if i > 0 && c == scases[lockorder[i-1]].c { continue // will unlock it on the next iteration } @@ -112,11 +96,15 @@ func block() { // Both reside on the goroutine's stack (regardless of any escaping in // selectgo). // +// For race detector builds, pc0 points to an array of type +// [ncases]uintptr (also on the stack); for other builds, it's set to +// nil. +// // selectgo returns the index of the chosen scase, which matches the // ordinal position of its respective select{recv,send,default} call. // Also, if the chosen scase was a receive operation, it reports whether // a value was received. -func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { +func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, block bool) (int, bool) { if debugSelect { print("select: cas0=", cas0, "\n") } @@ -126,25 +114,29 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0)) order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0)) + ncases := nsends + nrecvs scases := cas1[:ncases:ncases] pollorder := order1[:ncases:ncases] lockorder := order1[ncases:][:ncases:ncases] - // Replace send/receive cases involving nil channels with - // caseNil so logic below can assume non-nil channel. - for i := range scases { - cas := &scases[i] - if cas.c == nil && cas.kind != caseDefault { - *cas = scase{} + // Even when raceenabled is true, there might be select + // statements in packages compiled without -race (e.g., + // ensureSigM in runtime/signal_unix.go). + var pcs []uintptr + if raceenabled && pc0 != nil { + pc1 := (*[1 << 16]uintptr)(unsafe.Pointer(pc0)) + pcs = pc1[:ncases:ncases] + } + casePC := func(casi int) uintptr { + if pcs == nil { + return 0 } + return pcs[casi] } var t0 int64 if blockprofilerate > 0 { t0 = cputicks() - for i := 0; i < ncases; i++ { - scases[i].releasetime = -1 - } } // The compiler rewrites selects that statically have @@ -156,15 +148,27 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { // optimizing (and needing to test). // generate permuted order - for i := 1; i < ncases; i++ { - j := fastrandn(uint32(i + 1)) - pollorder[i] = pollorder[j] + norder := 0 + for i := range scases { + cas := &scases[i] + + // Omit cases without channels from the poll and lock orders. + if cas.c == nil { + cas.elem = nil // allow GC + continue + } + + j := fastrandn(uint32(norder + 1)) + pollorder[norder] = pollorder[j] pollorder[j] = uint16(i) + norder++ } + pollorder = pollorder[:norder] + lockorder = lockorder[:norder] // sort the cases by Hchan address to get the locking order. // simple heap sort, to guarantee n log n time and constant stack footprint. - for i := 0; i < ncases; i++ { + for i := range lockorder { j := i // Start with the pollorder to permute cases on the same channel. c := scases[pollorder[i]].c @@ -175,7 +179,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { } lockorder[j] = pollorder[i] } - for i := ncases - 1; i >= 0; i-- { + for i := len(lockorder) - 1; i >= 0; i-- { o := lockorder[i] c := scases[o].c lockorder[i] = lockorder[0] @@ -199,7 +203,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { } if debugSelect { - for i := 0; i+1 < ncases; i++ { + for i := 0; i+1 < len(lockorder); i++ { if scases[lockorder[i]].c.sortkey() > scases[lockorder[i+1]].c.sortkey() { print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n") throw("select: broken sort") @@ -221,23 +225,18 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { nextp **sudog ) -loop: // pass 1 - look for something already waiting - var dfli int - var dfl *scase var casi int var cas *scase + var caseSuccess bool + var caseReleaseTime int64 = -1 var recvOK bool - for i := 0; i < ncases; i++ { - casi = int(pollorder[i]) + for _, casei := range pollorder { + casi = int(casei) cas = &scases[casi] c = cas.c - switch cas.kind { - case caseNil: - continue - - case caseRecv: + if casi >= nsends { sg = c.sendq.dequeue() if sg != nil { goto recv @@ -248,10 +247,9 @@ loop: if c.closed != 0 { goto rclose } - - case caseSend: + } else { if raceenabled { - racereadpc(c.raceaddr(), cas.pc, chansendpc) + racereadpc(c.raceaddr(), casePC(casi), chansendpc) } if c.closed != 0 { goto sclose @@ -263,17 +261,12 @@ loop: if c.qcount < c.dataqsiz { goto bufsend } - - case caseDefault: - dfli = casi - dfl = cas } } - if dfl != nil { + if !block { selunlock(scases, lockorder) - casi = dfli - cas = dfl + casi = -1 goto retc } @@ -286,9 +279,6 @@ loop: for _, casei := range lockorder { casi = int(casei) cas = &scases[casi] - if cas.kind == caseNil { - continue - } c = cas.c sg := acquireSudog() sg.g = gp @@ -305,12 +295,10 @@ loop: *nextp = sg nextp = &sg.waitlink - switch cas.kind { - case caseRecv: - c.recvq.enqueue(sg) - - case caseSend: + if casi < nsends { c.sendq.enqueue(sg) + } else { + c.recvq.enqueue(sg) } } @@ -331,6 +319,7 @@ loop: // We singly-linked up the SudoGs in lock order. casi = -1 cas = nil + caseSuccess = false sglist = gp.waiting // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { @@ -342,19 +331,17 @@ loop: for _, casei := range lockorder { k = &scases[casei] - if k.kind == caseNil { - continue - } - if sglist.releasetime > 0 { - k.releasetime = sglist.releasetime - } if sg == sglist { // sg has already been dequeued by the G that woke us up. casi = int(casei) cas = k + caseSuccess = sglist.success + if sglist.releasetime > 0 { + caseReleaseTime = sglist.releasetime + } } else { c = k.c - if k.kind == caseSend { + if int(casei) < nsends { c.sendq.dequeueSudoG(sglist) } else { c.recvq.dequeueSudoG(sglist) @@ -367,40 +354,35 @@ loop: } if cas == nil { - // We can wake up with gp.param == nil (so cas == nil) - // when a channel involved in the select has been closed. - // It is easiest to loop and re-run the operation; - // we'll see that it's now closed. - // Maybe some day we can signal the close explicitly, - // but we'd have to distinguish close-on-reader from close-on-writer. - // It's easiest not to duplicate the code and just recheck above. - // We know that something closed, and things never un-close, - // so we won't block again. - goto loop + throw("selectgo: bad wakeup") } c = cas.c if debugSelect { - print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " kind=", cas.kind, "\n") + print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " send=", casi < nsends, "\n") } - if cas.kind == caseRecv { - recvOK = true + if casi < nsends { + if !caseSuccess { + goto sclose + } + } else { + recvOK = caseSuccess } if raceenabled { - if cas.kind == caseRecv && cas.elem != nil { - raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc) - } else if cas.kind == caseSend { - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + if casi < nsends { + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) + } else if cas.elem != nil { + raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) } } if msanenabled { - if cas.kind == caseRecv && cas.elem != nil { - msanwrite(cas.elem, c.elemtype.size) - } else if cas.kind == caseSend { + if casi < nsends { msanread(cas.elem, c.elemtype.size) + } else if cas.elem != nil { + msanwrite(cas.elem, c.elemtype.size) } } @@ -411,7 +393,7 @@ bufrecv: // can receive from buffer if raceenabled { if cas.elem != nil { - raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc) + raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) } raceacquire(chanbuf(c, c.recvx)) racerelease(chanbuf(c, c.recvx)) @@ -438,7 +420,7 @@ bufsend: if raceenabled { raceacquire(chanbuf(c, c.sendx)) racerelease(chanbuf(c, c.sendx)) - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) } if msanenabled { msanread(cas.elem, c.elemtype.size) @@ -476,7 +458,7 @@ rclose: send: // can send to a sleeping receiver (sg) if raceenabled { - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) } if msanenabled { msanread(cas.elem, c.elemtype.size) @@ -488,8 +470,8 @@ send: goto retc retc: - if cas.releasetime > 0 { - blockevent(cas.releasetime-t0, 1) + if caseReleaseTime > 0 { + blockevent(caseReleaseTime-t0, 1) } return casi, recvOK @@ -528,23 +510,57 @@ func reflect_rselect(cases []runtimeSelect) (int, bool) { block() } sel := make([]scase, len(cases)) - order := make([]uint16, 2*len(cases)) - for i := range cases { - rc := &cases[i] + orig := make([]int, len(cases)) + nsends, nrecvs := 0, 0 + dflt := -1 + for i, rc := range cases { + var j int switch rc.dir { case selectDefault: - sel[i] = scase{kind: caseDefault} + dflt = i + continue case selectSend: - sel[i] = scase{kind: caseSend, c: rc.ch, elem: rc.val} + j = nsends + nsends++ case selectRecv: - sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val} + nrecvs++ + j = len(cases) - nrecvs } - if raceenabled || msanenabled { - selectsetpc(&sel[i]) + + sel[j] = scase{c: rc.ch, elem: rc.val} + orig[j] = i + } + + // Only a default case. + if nsends+nrecvs == 0 { + return dflt, false + } + + // Compact sel and orig if necessary. + if nsends+nrecvs < len(cases) { + copy(sel[nsends:], sel[len(cases)-nrecvs:]) + copy(orig[nsends:], orig[len(cases)-nrecvs:]) + } + + order := make([]uint16, 2*(nsends+nrecvs)) + var pc0 *uintptr + if raceenabled { + pcs := make([]uintptr, nsends+nrecvs) + for i := range pcs { + selectsetpc(&pcs[i]) } + pc0 = &pcs[0] } - return selectgo(&sel[0], &order[0], len(cases)) + chosen, recvOK := selectgo(&sel[0], &order[0], pc0, nsends, nrecvs, dflt == -1) + + // Translate chosen back to caller's ordering. + if chosen < 0 { + chosen = dflt + } else { + chosen = orig[chosen] + } + return chosen, recvOK } func (q *waitq) dequeueSudoG(sgp *sudog) { diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index dd6d79f8ec..064a0ea100 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -272,6 +272,12 @@ func setProcessCPUProfiler(hz int32) { atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF)) setsig(_SIGPROF, funcPC(sighandler)) } + + var it itimerval + it.it_interval.tv_sec = 0 + it.it_interval.set_usec(1000000 / hz) + it.it_value = it.it_interval + setitimer(_ITIMER_PROF, &it, nil) } else { // If the Go signal handler should be disabled by default, // switch back to the signal handler that was installed @@ -296,23 +302,16 @@ func setProcessCPUProfiler(hz int32) { setsig(_SIGPROF, h) } } + + setitimer(_ITIMER_PROF, &itimerval{}, nil) } } // setThreadCPUProfiler makes any thread-specific changes required to // implement profiling at a rate of hz. +// No changes required on Unix systems. func setThreadCPUProfiler(hz int32) { - var it itimerval - if hz == 0 { - setitimer(_ITIMER_PROF, &it, nil) - } else { - it.it_interval.tv_sec = 0 - it.it_interval.set_usec(1000000 / hz) - it.it_value = it.it_interval - setitimer(_ITIMER_PROF, &it, nil) - } - _g_ := getg() - _g_.m.profilehz = hz + getg().m.profilehz = hz } func sigpipe() { @@ -616,7 +615,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { print("signal arrived during cgo execution\n") gp = _g_.m.lockedg.ptr() } - if sig == _SIGILL { + if sig == _SIGILL || sig == _SIGFPE { // It would be nice to know how long the instruction is. // Unfortunately, that's complicated to do in general (mostly for x86 // and s930x, but other archs have non-standard instruction lengths also). diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index e963a43dd3..cd2bc26d1e 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -1,6 +1,7 @@ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import ( diff --git a/src/runtime/string.go b/src/runtime/string.go index 0515b56573..9a601f0094 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -335,22 +335,6 @@ func gostringn(p *byte, l int) string { return s } -func index(s, t string) int { - if len(t) == 0 { - return 0 - } - for i := 0; i < len(s); i++ { - if s[i] == t[0] && hasPrefix(s[i:], t) { - return i - } - } - return -1 -} - -func contains(s, t string) bool { - return index(s, t) >= 0 -} - func hasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[:len(prefix)] == prefix } @@ -499,37 +483,3 @@ func gostringw(strw *uint16) string { b[n2] = 0 // for luck return s[:n2] } - -// parseRelease parses a dot-separated version number. It follows the -// semver syntax, but allows the minor and patch versions to be -// elided. -func parseRelease(rel string) (major, minor, patch int, ok bool) { - // Strip anything after a dash or plus. - for i := 0; i < len(rel); i++ { - if rel[i] == '-' || rel[i] == '+' { - rel = rel[:i] - break - } - } - - next := func() (int, bool) { - for i := 0; i < len(rel); i++ { - if rel[i] == '.' { - ver, ok := atoi(rel[:i]) - rel = rel[i+1:] - return ver, ok - } - } - ver, ok := atoi(rel) - rel = "" - return ver, ok - } - if major, ok = next(); !ok || rel == "" { - return - } - if minor, ok = next(); !ok || rel == "" { - return - } - patch, ok = next() - return -} diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go index b9ac667533..4eda12c35d 100644 --- a/src/runtime/string_test.go +++ b/src/runtime/string_test.go @@ -454,34 +454,3 @@ func TestAtoi32(t *testing.T) { } } } - -type parseReleaseTest struct { - in string - major, minor, patch int -} - -var parseReleaseTests = []parseReleaseTest{ - {"", -1, -1, -1}, - {"x", -1, -1, -1}, - {"5", 5, 0, 0}, - {"5.12", 5, 12, 0}, - {"5.12-x", 5, 12, 0}, - {"5.12.1", 5, 12, 1}, - {"5.12.1-x", 5, 12, 1}, - {"5.12.1.0", 5, 12, 1}, - {"5.20496382327982653440", -1, -1, -1}, -} - -func TestParseRelease(t *testing.T) { - for _, test := range parseReleaseTests { - major, minor, patch, ok := runtime.ParseRelease(test.in) - if !ok { - major, minor, patch = -1, -1, -1 - } - if test.major != major || test.minor != minor || test.patch != patch { - t.Errorf("parseRelease(%q) = (%v, %v, %v) want (%v, %v, %v)", - test.in, major, minor, patch, - test.major, test.minor, test.patch) - } - } -} diff --git a/src/runtime/sys_darwin.go b/src/runtime/sys_darwin.go index 06474434c9..e4f19bbf41 100644 --- a/src/runtime/sys_darwin.go +++ b/src/runtime/sys_darwin.go @@ -489,9 +489,3 @@ func setNonblock(fd int32) { //go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib" //go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib" //go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib" - -// Magic incantation to get libSystem and friends actually dynamically linked. -// TODO: Why does the code require this? See cmd/link/internal/ld/go.go -//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib" -//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/Security.framework/Versions/A/Security" -//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation" diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index 5b9b638ad7..1e3a834812 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -39,8 +39,6 @@ #define SYS_socketcall 102 #define SYS_setittimer 104 #define SYS_clone 120 -#define SYS_uname 122 -#define SYS_mlock 150 #define SYS_sched_yield 158 #define SYS_nanosleep 162 #define SYS_rt_sigreturn 173 @@ -808,20 +806,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-4 INVOKE_SYSCALL MOVL AX, ret+0(FP) RET - -// func uname(utsname *new_utsname) int -TEXT ·uname(SB),NOSPLIT,$0-8 - MOVL $SYS_uname, AX - MOVL utsname+0(FP), BX - INVOKE_SYSCALL - MOVL AX, ret+4(FP) - RET - -// func mlock(addr, len uintptr) int -TEXT ·mlock(SB),NOSPLIT,$0-12 - MOVL $SYS_mlock, AX - MOVL addr+0(FP), BX - MOVL len+4(FP), CX - INVOKE_SYSCALL - MOVL AX, ret+8(FP) - RET diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index fe9c6bce85..8d90813589 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -33,10 +33,8 @@ #define SYS_clone 56 #define SYS_exit 60 #define SYS_kill 62 -#define SYS_uname 63 #define SYS_fcntl 72 #define SYS_sigaltstack 131 -#define SYS_mlock 149 #define SYS_arch_prctl 158 #define SYS_gettid 186 #define SYS_futex 202 @@ -214,7 +212,7 @@ TEXT runtime·walltime1(SB),NOSPLIT,$16-12 // due to stack probes inserted to avoid stack/heap collisions. // See issue #20427. - MOVQ SP, BP // Save old SP; BP unchanged by C code. + MOVQ SP, R12 // Save old SP; R12 unchanged by C code. get_tls(CX) MOVQ g(CX), AX @@ -252,7 +250,7 @@ noswitch: MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec ret: - MOVQ BP, SP // Restore real SP + MOVQ R12, SP // Restore real SP // Restore vdsoPC, vdsoSP // We don't worry about being signaled between the two stores. // If we are not in a signal handler, we'll restore vdsoSP to 0, @@ -279,7 +277,7 @@ fallback: TEXT runtime·nanotime1(SB),NOSPLIT,$16-8 // Switch to g0 stack. See comment above in runtime·walltime. - MOVQ SP, BP // Save old SP; BP unchanged by C code. + MOVQ SP, R12 // Save old SP; R12 unchanged by C code. get_tls(CX) MOVQ g(CX), AX @@ -317,7 +315,7 @@ noswitch: MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec ret: - MOVQ BP, SP // Restore real SP + MOVQ R12, SP // Restore real SP // Restore vdsoPC, vdsoSP // We don't worry about being signaled between the two stores. // If we are not in a signal handler, we'll restore vdsoSP to 0, @@ -594,13 +592,25 @@ TEXT runtime·clone(SB),NOSPLIT,$0 MOVQ stk+8(FP), SI MOVQ $0, DX MOVQ $0, R10 - + MOVQ $0, R8 // Copy mp, gp, fn off parent stack for use by child. // Careful: Linux system call clobbers CX and R11. - MOVQ mp+16(FP), R8 + MOVQ mp+16(FP), R13 MOVQ gp+24(FP), R9 MOVQ fn+32(FP), R12 - + CMPQ R13, $0 // m + JEQ nog1 + CMPQ R9, $0 // g + JEQ nog1 + LEAQ m_tls(R13), R8 +#ifdef GOOS_android + // Android stores the TLS offset in runtime·tls_g. + SUBQ runtime·tls_g(SB), R8 +#else + ADDQ $8, R8 // ELF wants to use -8(FS) +#endif + ORQ $0x00080000, DI //add flag CLONE_SETTLS(0x00080000) to call clone +nog1: MOVL $SYS_clone, AX SYSCALL @@ -614,27 +624,23 @@ TEXT runtime·clone(SB),NOSPLIT,$0 MOVQ SI, SP // If g or m are nil, skip Go-related setup. - CMPQ R8, $0 // m - JEQ nog + CMPQ R13, $0 // m + JEQ nog2 CMPQ R9, $0 // g - JEQ nog + JEQ nog2 // Initialize m->procid to Linux tid MOVL $SYS_gettid, AX SYSCALL - MOVQ AX, m_procid(R8) - - // Set FS to point at m->tls. - LEAQ m_tls(R8), DI - CALL runtime·settls(SB) + MOVQ AX, m_procid(R13) // In child, set up new stack get_tls(CX) - MOVQ R8, g_m(R9) + MOVQ R13, g_m(R9) MOVQ R9, g(CX) CALL runtime·stackcheck(SB) -nog: +nog2: // Call fn CALL R12 @@ -789,20 +795,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 SYSCALL MOVQ AX, ret+0(FP) RET - -// func uname(utsname *new_utsname) int -TEXT ·uname(SB),NOSPLIT,$0-16 - MOVQ utsname+0(FP), DI - MOVL $SYS_uname, AX - SYSCALL - MOVQ AX, ret+8(FP) - RET - -// func mlock(addr, len uintptr) int -TEXT ·mlock(SB),NOSPLIT,$0-24 - MOVQ addr+0(FP), DI - MOVQ len+8(FP), SI - MOVL $SYS_mlock, AX - SYSCALL - MOVQ AX, ret+16(FP) - RET diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go index 45e6fb1aa5..e0a2794f4c 100644 --- a/src/runtime/testdata/testprog/checkptr.go +++ b/src/runtime/testdata/testprog/checkptr.go @@ -10,6 +10,7 @@ func init() { register("CheckPtrAlignmentNoPtr", CheckPtrAlignmentNoPtr) register("CheckPtrAlignmentPtr", CheckPtrAlignmentPtr) register("CheckPtrArithmetic", CheckPtrArithmetic) + register("CheckPtrArithmetic2", CheckPtrArithmetic2) register("CheckPtrSize", CheckPtrSize) register("CheckPtrSmall", CheckPtrSmall) } @@ -32,6 +33,13 @@ func CheckPtrArithmetic() { sink2 = (*int)(unsafe.Pointer(i)) } +func CheckPtrArithmetic2() { + var x [2]int64 + p := unsafe.Pointer(&x[1]) + var one uintptr = 1 + sink2 = unsafe.Pointer(uintptr(p) & ^one) +} + func CheckPtrSize() { p := new(int64) sink2 = p diff --git a/src/runtime/time.go b/src/runtime/time.go index fdb5066b24..f895bf8443 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -403,7 +403,7 @@ func dodeltimer0(pp *p) { } // modtimer modifies an existing timer. -// This is called by the netpoll code or time.Ticker.Reset. +// This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset. // Reports whether the timer was modified before it was run. func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool { if when < 0 { diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go index cfc0419b72..be3adc9801 100644 --- a/src/runtime/trace/trace_stack_test.go +++ b/src/runtime/trace/trace_stack_test.go @@ -252,7 +252,7 @@ func TestTraceSymbolize(t *testing.T) { {trace.EvGoSysCall, []frame{ {"syscall.read", 0}, {"syscall.Read", 0}, - {"internal/poll.ignoringEINTR", 0}, + {"internal/poll.ignoringEINTRIO", 0}, {"internal/poll.(*FD).Read", 0}, {"os.(*File).read", 0}, {"os.(*File).Read", 0}, diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 944c8473d2..7850eceafa 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -35,16 +36,6 @@ import ( const usesLR = sys.MinFrameSize > 0 -var skipPC uintptr - -func tracebackinit() { - // Go variable initialization happens late during runtime startup. - // Instead of initializing the variables above in the declarations, - // schedinit calls this function so that the variables are - // initialized and available earlier in the startup sequence. - skipPC = funcPC(skipPleaseUseCallersFrames) -} - // Traceback over the deferred function calls. // Report them like calls that have been invoked but not started executing yet. func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer) { @@ -82,9 +73,6 @@ func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v uns const sizeofSkipFunction = 256 -// This function is defined in asm.s to be sizeofSkipFunction bytes long. -func skipPleaseUseCallersFrames() - // Generic traceback. Handles runtime stack prints (pcbuf == nil), // the runtime.Callers function (pcbuf != nil), as well as the garbage // collector (callback != nil). A little clunky to merge these, but avoids @@ -848,7 +836,7 @@ func showfuncinfo(f funcInfo, firstFrame bool, funcID, childID funcID) bool { return true } - return contains(name, ".") && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) + return bytealg.IndexByteString(name, '.') >= 0 && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) } // isExportedRuntime reports whether name is an exported runtime function. |
