diff options
| author | Russ Cox <rsc@golang.org> | 2017-12-06 00:35:28 -0500 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2017-12-06 01:03:36 -0500 |
| commit | 185e6094fd968b35b80e56aad1286c66bb2cc261 (patch) | |
| tree | 411babe570d6faa1e99251a9167123afd07407d2 /src/runtime | |
| parent | c36033a379a4907fb75309416ffcf2904e613ab9 (diff) | |
| parent | a032f74bf0b40a94669159e7d7e96722eb76199b (diff) | |
| download | go-185e6094fd968b35b80e56aad1286c66bb2cc261.tar.xz | |
[dev.boringcrypto] all: merge master (nearly Go 1.10 beta 1) into dev.boringcrypto
This is a git merge of master into dev.boringcrypto.
The branch was previously based on release-branch.go1.9,
so there are a handful of spurious conflicts that would
also arise if trying to merge master into release-branch.go1.9
(which we never do). Those have all been resolved by taking
the original file from master, discarding any Go 1.9-specific
edits.
all.bash passes on darwin/amd64, which is to say without
actually using BoringCrypto.
Go 1.10-related fixes to BoringCrypto itself will be in a followup CL.
This CL is just the merge.
Change-Id: I4c97711fec0fb86761913dcde28d25c001246c35
Diffstat (limited to 'src/runtime')
303 files changed, 9047 insertions, 4778 deletions
diff --git a/src/runtime/alg.go b/src/runtime/alg.go index 8d388da5a2..89125f48ba 100644 --- a/src/runtime/alg.go +++ b/src/runtime/alg.go @@ -47,26 +47,25 @@ type typeAlg struct { func memhash0(p unsafe.Pointer, h uintptr) uintptr { return h } + func memhash8(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 1) } + func memhash16(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 2) } -func memhash32(p unsafe.Pointer, h uintptr) uintptr { - return memhash(p, h, 4) -} -func memhash64(p unsafe.Pointer, h uintptr) uintptr { - return memhash(p, h, 8) -} + func memhash128(p unsafe.Pointer, h uintptr) uintptr { return memhash(p, h, 16) } -// memhash_varlen is defined in assembly because it needs access -// to the closure. It appears here to provide an argument -// signature for the assembly routine. -func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr +//go:nosplit +func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr { + ptr := getclosureptr() + size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h))) + return memhash(p, h, size) +} var algarray = [alg_max]typeAlg{ alg_NOEQ: {nil, nil}, diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go index 6bd8f3bd95..ef1e812c0d 100644 --- a/src/runtime/append_test.go +++ b/src/runtime/append_test.go @@ -18,42 +18,52 @@ func BenchmarkMakeSlice(b *testing.B) { } } -func BenchmarkGrowSliceBytes(b *testing.B) { - b.StopTimer() - var x = make([]byte, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]byte(nil), x...) - } -} - -func BenchmarkGrowSliceInts(b *testing.B) { - b.StopTimer() - var x = make([]int, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]int(nil), x...) - } -} - -func BenchmarkGrowSlicePtr(b *testing.B) { - b.StopTimer() - var x = make([]*byte, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]*byte(nil), x...) - } -} +type ( + struct24 struct{ a, b, c int64 } + struct32 struct{ a, b, c, d int64 } + struct40 struct{ a, b, c, d, e int64 } +) -type struct24 struct{ a, b, c int64 } +func BenchmarkGrowSlice(b *testing.B) { + b.Run("Byte", func(b *testing.B) { + x := make([]byte, 9) + for i := 0; i < b.N; i++ { + _ = append([]byte(nil), x...) + } + }) + b.Run("Int", func(b *testing.B) { + x := make([]int, 9) + for i := 0; i < b.N; i++ { + _ = append([]int(nil), x...) + } + }) + b.Run("Ptr", func(b *testing.B) { + x := make([]*byte, 9) + for i := 0; i < b.N; i++ { + _ = append([]*byte(nil), x...) + } + }) + b.Run("Struct", func(b *testing.B) { + b.Run("24", func(b *testing.B) { + x := make([]struct24, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct24(nil), x...) + } + }) + b.Run("32", func(b *testing.B) { + x := make([]struct32, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct32(nil), x...) + } + }) + b.Run("40", func(b *testing.B) { + x := make([]struct40, 9) + for i := 0; i < b.N; i++ { + _ = append([]struct40(nil), x...) + } + }) -func BenchmarkGrowSliceStruct24Bytes(b *testing.B) { - b.StopTimer() - var x = make([]struct24, 9) - b.StartTimer() - for i := 0; i < b.N; i++ { - _ = append([]struct24(nil), x...) - } + }) } func BenchmarkAppend(b *testing.B) { diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 5bbf2866f3..80a145187c 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -7,10 +7,93 @@ #include "funcdata.h" #include "textflag.h" +// _rt0_386 is common startup code for most 386 systems when using +// internal linking. This is the entry point for the program from the +// kernel for an ordinary -buildmode=exe program. The stack holds the +// number of arguments and the C-style argv. +TEXT _rt0_386(SB),NOSPLIT,$8 + MOVL 8(SP), AX // argc + LEAL 12(SP), BX // argv + MOVL AX, 0(SP) + MOVL BX, 4(SP) + JMP runtime·rt0_go(SB) + +// _rt0_386_lib is common startup code for most 386 systems when +// using -buildmode=c-archive or -buildmode=c-shared. The linker will +// arrange to invoke this function as a global constructor (for +// c-archive) or when the shared library is loaded (for c-shared). +// We expect argc and argv to be passed on the stack following the +// usual C ABI. +TEXT _rt0_386_lib(SB),NOSPLIT,$0 + PUSHL BP + MOVL SP, BP + PUSHL BX + PUSHL SI + PUSHL DI + + MOVL 8(BP), AX + MOVL AX, _rt0_386_lib_argc<>(SB) + MOVL 12(BP), AX + MOVL AX, _rt0_386_lib_argv<>(SB) + + // Synchronous initialization. + CALL runtime·libpreinit(SB) + + SUBL $8, SP + + // Create a new thread to do the runtime initialization. + MOVL _cgo_sys_thread_create(SB), AX + TESTL AX, AX + JZ nocgo + + // Align stack to call C function. + // We moved SP to BP above, but BP was clobbered by the libpreinit call. + MOVL SP, BP + ANDL $~15, SP + + MOVL $_rt0_386_lib_go(SB), BX + MOVL BX, 0(SP) + MOVL $0, 4(SP) + + CALL AX + + MOVL BP, SP + + JMP restore + +nocgo: + MOVL $0x800000, 0(SP) // stacksize = 8192KB + MOVL $_rt0_386_lib_go(SB), AX + MOVL AX, 4(SP) // fn + CALL runtime·newosproc0(SB) + +restore: + ADDL $8, SP + POPL DI + POPL SI + POPL BX + POPL BP + RET + +// _rt0_386_lib_go initializes the Go runtime. +// This is started in a separate thread by _rt0_386_lib. +TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 + MOVL _rt0_386_lib_argc<>(SB), AX + MOVL AX, 0(SP) + MOVL _rt0_386_lib_argv<>(SB), AX + MOVL AX, 4(SP) + JMP runtime·rt0_go(SB) + +DATA _rt0_386_lib_argc<>(SB)/4, $0 +GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 +DATA _rt0_386_lib_argv<>(SB)/4, $0 +GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 + TEXT runtime·rt0_go(SB),NOSPLIT,$0 - // copy arguments forward on an even stack - MOVL argc+0(FP), AX - MOVL argv+4(FP), BX + // Copy arguments forward on an even stack. + // Users of this function jump to it, they don't call it. + MOVL 0(SP), AX + MOVL 4(SP), BX SUBL $128, SP // plenty of scratch ANDL $~15, SP MOVL AX, 120(SP) // save argc, argv away @@ -279,18 +362,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $0-4 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $8-4 MOVL buf+0(FP), BX // gobuf - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVL gobuf_ctxt(BX), DX - TESTL DX, DX - JZ nilctxt - LEAL gobuf_ctxt(BX), AX - MOVL AX, 0(SP) - MOVL $0, 4(SP) - CALL runtime·writebarrierptr_prewrite(SB) - MOVL buf+0(FP), BX - -nilctxt: MOVL gobuf_g(BX), DX MOVL 0(DX), CX // make sure g != nil get_tls(CX) @@ -403,11 +474,12 @@ switch: RET noswitch: - // already on system stack, just call directly + // already on system stack; tail call the function + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVL DI, DX MOVL 0(DI), DI - CALL DI - RET + JMP DI /* * support for morestack @@ -453,7 +525,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0 MOVL SI, (g_sched+gobuf_g)(SI) LEAL 4(SP), AX // f's SP MOVL AX, (g_sched+gobuf_sp)(SI) - // newstack will fill gobuf.ctxt. + MOVL DX, (g_sched+gobuf_ctxt)(SI) // Call newstack on m->g0's stack. MOVL m_g0(BX), BP @@ -461,10 +533,8 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0 MOVL (g_sched+gobuf_sp)(BP), AX MOVL -4(AX), BX // fault if CALL would, before smashing SP MOVL AX, SP - PUSHL DX // ctxt argument CALL runtime·newstack(SB) MOVL $0, 0x1003 // crash if newstack returns - POPL DX // keep balance check happy RET TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 @@ -849,12 +919,6 @@ TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 INT $3 RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 - MOVL argp+0(FP),AX // addr of first arg - MOVL -4(AX),AX // get calling pc - MOVL AX, ret+4(FP) - RET - // func cputicks() int64 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 CMPB runtime·support_sse2(SB), $1 @@ -885,23 +949,6 @@ TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 TEXT runtime·emptyfunc(SB),0,$0-0 RET -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 - GO_ARGS - NO_LOCAL_POINTERS - MOVL p+0(FP), AX - MOVL h+4(FP), BX - MOVL 4(DX), CX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - MOVL CX, 8(SP) - CALL runtime·memhash(SB) - MOVL 12(SP), AX - MOVL AX, ret+8(FP) - RET - // hash function using AES hardware instructions TEXT runtime·aeshash(SB),NOSPLIT,$0-16 MOVL p+0(FP), AX // ptr to data @@ -1323,23 +1370,6 @@ eq: MOVB $1, ret+8(FP) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-17 - MOVL s1_base+0(FP), SI - MOVL s2_base+8(FP), DI - CMPL SI, DI - JEQ same - MOVL s1_len+4(FP), BX - LEAL ret+16(FP), AX - JMP runtime·memeqbody(SB) -same: - MOVB $1, ret+16(FP) - RET - TEXT bytes·Equal(SB),NOSPLIT,$0-25 MOVL a_len+4(FP), BX MOVL b_len+16(FP), CX @@ -1637,19 +1667,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -// Prefetching doesn't seem to help. -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 - RET - // Add a module's moduledata to the linked list of moduledata objects. This // is called from .init_array by a function generated in the linker and so // follows the platform ABI wrt register preservation -- it only touches AX, diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 6405be92de..576a61ca6c 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -7,6 +7,83 @@ #include "funcdata.h" #include "textflag.h" +// _rt0_amd64 is common startup code for most amd64 systems when using +// internal linking. This is the entry point for the program from the +// kernel for an ordinary -buildmode=exe program. The stack holds the +// number of arguments and the C-style argv. +TEXT _rt0_amd64(SB),NOSPLIT,$-8 + MOVQ 0(SP), DI // argc + LEAQ 8(SP), SI // argv + JMP runtime·rt0_go(SB) + +// main is common startup code for most amd64 systems when using +// external linking. The C startup code will call the symbol "main" +// passing argc and argv in the usual C ABI registers DI and SI. +TEXT main(SB),NOSPLIT,$-8 + JMP runtime·rt0_go(SB) + +// _rt0_amd64_lib is common startup code for most amd64 systems when +// using -buildmode=c-archive or -buildmode=c-shared. The linker will +// arrange to invoke this function as a global constructor (for +// c-archive) or when the shared library is loaded (for c-shared). +// We expect argc and argv to be passed in the usual C ABI registers +// DI and SI. +TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50 + // Align stack per ELF ABI requirements. + MOVQ SP, AX + ANDQ $~15, SP + // Save C ABI callee-saved registers, as caller may need them. + MOVQ BX, 0x10(SP) + MOVQ BP, 0x18(SP) + MOVQ R12, 0x20(SP) + MOVQ R13, 0x28(SP) + MOVQ R14, 0x30(SP) + MOVQ R15, 0x38(SP) + MOVQ AX, 0x40(SP) + + MOVQ DI, _rt0_amd64_lib_argc<>(SB) + MOVQ SI, _rt0_amd64_lib_argv<>(SB) + + // Synchronous initialization. + CALL runtime·libpreinit(SB) + + // Create a new thread to finish Go runtime initialization. + MOVQ _cgo_sys_thread_create(SB), AX + TESTQ AX, AX + JZ nocgo + MOVQ $_rt0_amd64_lib_go(SB), DI + MOVQ $0, SI + CALL AX + JMP restore + +nocgo: + MOVQ $0x800000, 0(SP) // stacksize + MOVQ $_rt0_amd64_lib_go(SB), AX + MOVQ AX, 8(SP) // fn + CALL runtime·newosproc0(SB) + +restore: + MOVQ 0x10(SP), BX + MOVQ 0x18(SP), BP + MOVQ 0x20(SP), R12 + MOVQ 0x28(SP), R13 + MOVQ 0x30(SP), R14 + MOVQ 0x38(SP), R15 + MOVQ 0x40(SP), SP + RET + +// _rt0_amd64_lib_go initializes the Go runtime. +// This is started in a separate thread by _rt0_amd64_lib. +TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 + MOVQ _rt0_amd64_lib_argc<>(SB), DI + MOVQ _rt0_amd64_lib_argv<>(SB), SI + JMP runtime·rt0_go(SB) + +DATA _rt0_amd64_lib_argc<>(SB)/8, $0 +GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 +DATA _rt0_amd64_lib_argv<>(SB)/8, $0 +GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 + TEXT runtime·rt0_go(SB),NOSPLIT,$0 // copy arguments forward on an even stack MOVQ DI, AX // argc @@ -227,18 +304,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $0-8 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $16-8 MOVQ buf+0(FP), BX // gobuf - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVQ gobuf_ctxt(BX), AX - TESTQ AX, AX - JZ nilctxt - LEAQ gobuf_ctxt(BX), AX - MOVQ AX, 0(SP) - MOVQ $0, 8(SP) - CALL runtime·writebarrierptr_prewrite(SB) - MOVQ buf+0(FP), BX - -nilctxt: MOVQ gobuf_g(BX), DX MOVQ 0(DX), CX // make sure g != nil get_tls(CX) @@ -354,11 +419,12 @@ switch: RET noswitch: - // already on m stack, just call directly + // already on m stack; tail call the function + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVQ DI, DX MOVQ 0(DI), DI - CALL DI - RET + JMP DI /* * support for morestack @@ -405,16 +471,14 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0 LEAQ 8(SP), AX // f's SP MOVQ AX, (g_sched+gobuf_sp)(SI) MOVQ BP, (g_sched+gobuf_bp)(SI) - // newstack will fill gobuf.ctxt. + MOVQ DX, (g_sched+gobuf_ctxt)(SI) // Call newstack on m->g0's stack. MOVQ m_g0(BX), BX MOVQ BX, g(CX) MOVQ (g_sched+gobuf_sp)(BX), SP - PUSHQ DX // ctxt argument CALL runtime·newstack(SB) MOVQ $0, 0x1003 // crash if newstack returns - POPQ DX // keep balance check happy RET // morestack but not preserving ctxt. @@ -833,12 +897,6 @@ TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 INT $3 RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 - MOVQ argp+0(FP),AX // addr of first arg - MOVQ -8(AX),AX // get calling pc - MOVQ AX, ret+8(FP) - RET - // func cputicks() int64 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 CMPB runtime·lfenceBeforeRdtsc(SB), $1 @@ -854,23 +912,6 @@ done: MOVQ AX, ret+0(FP) RET -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24 - GO_ARGS - NO_LOCAL_POINTERS - MOVQ p+0(FP), AX - MOVQ h+8(FP), BX - MOVQ 8(DX), CX - MOVQ AX, 0(SP) - MOVQ BX, 8(SP) - MOVQ CX, 16(SP) - CALL runtime·memhash(SB) - MOVQ 24(SP), AX - MOVQ AX, ret+16(FP) - RET - // hash function using AES hardware instructions TEXT runtime·aeshash(SB),NOSPLIT,$0-32 MOVQ p+0(FP), AX // ptr to data @@ -1343,23 +1384,6 @@ eq: MOVB $1, ret+16(FP) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-33 - MOVQ s1_base+0(FP), SI - MOVQ s2_base+16(FP), DI - CMPQ SI, DI - JEQ eq - MOVQ s1_len+8(FP), BX - LEAQ ret+32(FP), AX - JMP runtime·memeqbody(SB) -eq: - MOVB $1, ret+32(FP) - RET - // a in SI // b in DI // count in BX @@ -2339,26 +2363,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 - MOVQ addr+0(FP), AX - PREFETCHT0 (AX) - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 - MOVQ addr+0(FP), AX - PREFETCHT1 (AX) - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 - MOVQ addr+0(FP), AX - PREFETCHT2 (AX) - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 - MOVQ addr+0(FP), AX - PREFETCHNTA (AX) - RET - // This is called from .init_array and follows the platform, not Go, ABI. TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save @@ -2367,3 +2371,87 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 MOVQ DI, runtime·lastmoduledatap(SB) POPQ R15 RET + +// gcWriteBarrier performs a heap pointer write and informs the GC. +// +// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: +// - DI is the destination of the write +// - AX is the value being written at DI +// It clobbers FLAGS. It does not clobber any general-purpose registers, +// but may clobber others (e.g., SSE registers). +TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 + // Save the registers clobbered by the fast path. This is slightly + // faster than having the caller spill these. + MOVQ R14, 104(SP) + MOVQ R13, 112(SP) + // TODO: Consider passing g.m.p in as an argument so they can be shared + // across a sequence of write barriers. + get_tls(R13) + MOVQ g(R13), R13 + MOVQ g_m(R13), R13 + MOVQ m_p(R13), R13 + MOVQ (p_wbBuf+wbBuf_next)(R13), R14 + // Increment wbBuf.next position. + LEAQ 16(R14), R14 + MOVQ R14, (p_wbBuf+wbBuf_next)(R13) + CMPQ R14, (p_wbBuf+wbBuf_end)(R13) + // Record the write. + MOVQ AX, -16(R14) // Record value + MOVQ (DI), R13 // TODO: This turns bad writes into bad reads. + MOVQ R13, -8(R14) // Record *slot + // Is the buffer full? (flags set in CMPQ above) + JEQ flush +ret: + MOVQ 104(SP), R14 + MOVQ 112(SP), R13 + // Do the write. + MOVQ AX, (DI) + RET + +flush: + // Save all general purpose registers since these could be + // clobbered by wbBufFlush and were not saved by the caller. + // It is possible for wbBufFlush to clobber other registers + // (e.g., SSE registers), but the compiler takes care of saving + // those in the caller if necessary. This strikes a balance + // with registers that are likely to be used. + // + // We don't have type information for these, but all code under + // here is NOSPLIT, so nothing will observe these. + // + // TODO: We could strike a different balance; e.g., saving X0 + // and not saving GP registers that are less likely to be used. + MOVQ DI, 0(SP) // Also first argument to wbBufFlush + MOVQ AX, 8(SP) // Also second argument to wbBufFlush + MOVQ BX, 16(SP) + MOVQ CX, 24(SP) + MOVQ DX, 32(SP) + // DI already saved + MOVQ SI, 40(SP) + MOVQ BP, 48(SP) + MOVQ R8, 56(SP) + MOVQ R9, 64(SP) + MOVQ R10, 72(SP) + MOVQ R11, 80(SP) + MOVQ R12, 88(SP) + // R13 already saved + // R14 already saved + MOVQ R15, 96(SP) + + // This takes arguments DI and AX + CALL runtime·wbBufFlush(SB) + + MOVQ 0(SP), DI + MOVQ 8(SP), AX + MOVQ 16(SP), BX + MOVQ 24(SP), CX + MOVQ 32(SP), DX + MOVQ 40(SP), SI + MOVQ 48(SP), BP + MOVQ 56(SP), R8 + MOVQ 64(SP), R9 + MOVQ 72(SP), R10 + MOVQ 80(SP), R11 + MOVQ 88(SP), R12 + MOVQ 96(SP), R15 + JMP ret diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s index 6367b3fef4..7fee79aefb 100644 --- a/src/runtime/asm_amd64p32.s +++ b/src/runtime/asm_amd64p32.s @@ -198,18 +198,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $0-4 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $8-4 MOVL buf+0(FP), BX // gobuf - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVL gobuf_ctxt(BX), DX - TESTL DX, DX - JZ nilctxt - LEAL gobuf_ctxt(BX), AX - MOVL AX, 0(SP) - MOVL $0, 4(SP) - CALL runtime·writebarrierptr_prewrite(SB) - MOVL buf+0(FP), BX - -nilctxt: MOVL gobuf_g(BX), DX MOVL 0(DX), CX // make sure g != nil get_tls(CX) @@ -318,10 +306,11 @@ switch: noswitch: // already on m stack, just call directly + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVL DI, DX MOVL 0(DI), DI - CALL DI - RET + JMP DI /* * support for morestack @@ -368,16 +357,14 @@ TEXT runtime·morestack(SB),NOSPLIT,$0-0 MOVL SI, (g_sched+gobuf_g)(SI) LEAL 8(SP), AX // f's SP MOVL AX, (g_sched+gobuf_sp)(SI) - // newstack will fill gobuf.ctxt. + MOVL DX, (g_sched+gobuf_ctxt)(SI) // Call newstack on m->g0's stack. MOVL m_g0(BX), BX MOVL BX, g(CX) MOVL (g_sched+gobuf_sp)(BX), SP - PUSHQ DX // ctxt argument CALL runtime·newstack(SB) MOVL $0, 0x1003 // crash if newstack returns - POPQ DX // keep balance check happy RET // morestack trampolines @@ -559,30 +546,6 @@ TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 MOVL 0, AX RET -TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8 - MOVL ptr+0(FP), DI - MOVL n+4(FP), CX - MOVQ CX, BX - ANDQ $3, BX - SHRQ $2, CX - MOVQ $0, AX - CLD - REP - STOSL - MOVQ BX, CX - REP - STOSB - // Note: we zero only 4 bytes at a time so that the tail is at most - // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. - // See issue 13160. - RET - -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12 - MOVL argp+0(FP),AX // addr of first arg - MOVL -8(AX),AX // get calling pc - MOVL AX, ret+8(FP) - RET - // int64 runtime·cputicks(void) TEXT runtime·cputicks(SB),NOSPLIT,$0-0 RDTSC @@ -591,23 +554,6 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-0 MOVQ AX, ret+0(FP) RET -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12 - GO_ARGS - NO_LOCAL_POINTERS - MOVL p+0(FP), AX - MOVL h+4(FP), BX - MOVL 4(DX), CX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - MOVL CX, 8(SP) - CALL runtime·memhash(SB) - MOVL 16(SP), AX - MOVL AX, ret+8(FP) - RET - // hash function using AES hardware instructions // For now, our one amd64p32 system (NaCl) does not // support using AES instructions, so have not bothered to @@ -658,24 +604,6 @@ eq: MOVB $1, ret+8(FP) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-17 - MOVL s1_base+0(FP), SI - MOVL s2_base+8(FP), DI - CMPL SI, DI - JEQ same - MOVL s1_len+4(FP), BX - CALL runtime·memeqbody(SB) - MOVB AX, ret+16(FP) - RET -same: - MOVB $1, ret+16(FP) - RET - // a in SI // b in DI // count in BX @@ -1042,27 +970,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 - MOVL addr+0(FP), AX - PREFETCHT0 (AX) - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 - MOVL addr+0(FP), AX - PREFETCHT1 (AX) - RET - - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 - MOVL addr+0(FP), AX - PREFETCHT2 (AX) - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 - MOVL addr+0(FP), AX - PREFETCHNTA (AX) - RET - TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVB $1, ret+0(FP) RET diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 09b6759749..306984e8f7 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -7,14 +7,112 @@ #include "funcdata.h" #include "textflag.h" +// _rt0_arm is common startup code for most ARM systems when using +// internal linking. This is the entry point for the program from the +// kernel for an ordinary -buildmode=exe program. The stack holds the +// number of arguments and the C-style argv. +TEXT _rt0_arm(SB),NOSPLIT,$-4 + MOVW (R13), R0 // argc + MOVW $4(R13), R1 // argv + B runtime·rt0_go(SB) + +// main is common startup code for most ARM systems when using +// external linking. The C startup code will call the symbol "main" +// passing argc and argv in the usual C ABI registers R0 and R1. +TEXT main(SB),NOSPLIT,$-4 + B runtime·rt0_go(SB) + +// _rt0_arm_lib is common startup code for most ARM systems when +// using -buildmode=c-archive or -buildmode=c-shared. The linker will +// arrange to invoke this function as a global constructor (for +// c-archive) or when the shared library is loaded (for c-shared). +// We expect argc and argv to be passed in the usual C ABI registers +// R0 and R1. +TEXT _rt0_arm_lib(SB),NOSPLIT,$104 + // Preserve callee-save registers. Raspberry Pi's dlopen(), for example, + // actually cares that R11 is preserved. + MOVW R4, 12(R13) + MOVW R5, 16(R13) + MOVW R6, 20(R13) + MOVW R7, 24(R13) + MOVW R8, 28(R13) + MOVW R11, 32(R13) + + // Skip floating point registers on GOARM < 6. + MOVB runtime·goarm(SB), R11 + CMP $6, R11 + BLT skipfpsave + MOVD F8, (32+8*1)(R13) + MOVD F9, (32+8*2)(R13) + MOVD F10, (32+8*3)(R13) + MOVD F11, (32+8*4)(R13) + MOVD F12, (32+8*5)(R13) + MOVD F13, (32+8*6)(R13) + MOVD F14, (32+8*7)(R13) + MOVD F15, (32+8*8)(R13) +skipfpsave: + // Save argc/argv. + MOVW R0, _rt0_arm_lib_argc<>(SB) + MOVW R1, _rt0_arm_lib_argv<>(SB) + + // Synchronous initialization. + CALL runtime·libpreinit(SB) + + // Create a new thread to do the runtime initialization. + MOVW _cgo_sys_thread_create(SB), R2 + CMP $0, R2 + BEQ nocgo + MOVW $_rt0_arm_lib_go<>(SB), R0 + MOVW $0, R1 + BL (R2) + B rr +nocgo: + MOVW $0x800000, R0 // stacksize = 8192KB + MOVW $_rt0_arm_lib_go<>(SB), R1 // fn + MOVW R0, 4(R13) + MOVW R1, 8(R13) + BL runtime·newosproc0(SB) +rr: + // Restore callee-save registers and return. + MOVB runtime·goarm(SB), R11 + CMP $6, R11 + BLT skipfprest + MOVD (32+8*1)(R13), F8 + MOVD (32+8*2)(R13), F9 + MOVD (32+8*3)(R13), F10 + MOVD (32+8*4)(R13), F11 + MOVD (32+8*5)(R13), F12 + MOVD (32+8*6)(R13), F13 + MOVD (32+8*7)(R13), F14 + MOVD (32+8*8)(R13), F15 +skipfprest: + MOVW 12(R13), R4 + MOVW 16(R13), R5 + MOVW 20(R13), R6 + MOVW 24(R13), R7 + MOVW 28(R13), R8 + MOVW 32(R13), R11 + RET + +// _rt0_arm_lib_go initializes the Go runtime. +// This is started in a separate thread by _rt0_arm_lib. +TEXT _rt0_arm_lib_go<>(SB),NOSPLIT,$8 + MOVW _rt0_arm_lib_argc<>(SB), R0 + MOVW _rt0_arm_lib_argv<>(SB), R1 + B runtime·rt0_go(SB) + +DATA _rt0_arm_lib_argc<>(SB)/4,$0 +GLOBL _rt0_arm_lib_argc<>(SB),NOPTR,$4 +DATA _rt0_arm_lib_argv<>(SB)/4,$0 +GLOBL _rt0_arm_lib_argv<>(SB),NOPTR,$4 + // using frame size $-4 means do not save LR on stack. +// argc is in R0, argv is in R1. TEXT runtime·rt0_go(SB),NOSPLIT,$-4 MOVW $0xcafebabe, R12 // copy arguments forward on an even stack // use R13 instead of SP to avoid linker rewriting the offsets - MOVW 0(R13), R0 // argc - MOVW 4(R13), R1 // argv SUB $64, R13 // plenty of scratch AND $~7, R13 MOVW R0, 60(R13) // save argc, argv away @@ -129,19 +227,6 @@ TEXT runtime·gosave(SB),NOSPLIT,$-4-4 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB),NOSPLIT,$8-4 MOVW buf+0(FP), R1 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVW gobuf_ctxt(R1), R0 - CMP $0, R0 - B.EQ nilctxt - MOVW $gobuf_ctxt(R1), R0 - MOVW R0, 4(R13) - MOVW $0, R0 - MOVW R0, 8(R13) - BL runtime·writebarrierptr_prewrite(SB) - MOVW buf+0(FP), R1 - -nilctxt: MOVW gobuf_g(R1), R0 BL setg<>(SB) @@ -273,10 +358,12 @@ switch: RET noswitch: + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVW R0, R7 MOVW 0(R0), R0 - BL (R0) - RET + MOVW.P 4(R13), R14 // restore LR + B (R0) /* * support for morestack @@ -314,7 +401,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-4-0 MOVW R13, (g_sched+gobuf_sp)(g) MOVW LR, (g_sched+gobuf_pc)(g) MOVW R3, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVW R7, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's caller. @@ -328,8 +415,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-4-0 BL setg<>(SB) MOVW (g_sched+gobuf_sp)(g), R13 MOVW $0, R0 - MOVW.W R0, -8(R13) // create a call frame on g0 - MOVW R7, 4(R13) // ctxt argument + MOVW.W R0, -4(R13) // create a call frame on g0 (saved LR) BL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -677,9 +763,9 @@ TEXT setg<>(SB),NOSPLIT,$-4-0 MOVW g, R0 RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 - MOVW 8(R13), R0 // LR saved by caller - MOVW R0, ret+4(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4 + MOVW 0(R13), R0 // LR saved by caller + MOVW R0, ret+0(FP) RET TEXT runtime·emptyfunc(SB),0,$0-0 @@ -719,23 +805,6 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-4-0 MOVW $0, R0 MOVW (R0), R1 -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 - GO_ARGS - NO_LOCAL_POINTERS - MOVW p+0(FP), R0 - MOVW h+4(FP), R1 - MOVW 4(R7), R2 - MOVW R0, 4(R13) - MOVW R1, 8(R13) - MOVW R2, 12(R13) - BL runtime·memhash(SB) - MOVW 16(R13), R0 - MOVW R0, ret+8(FP) - RET - // memequal(p, q unsafe.Pointer, size uintptr) bool TEXT runtime·memequal(SB),NOSPLIT,$-4-13 MOVW a+0(FP), R1 @@ -830,31 +899,6 @@ samebytes: MOVW R0, (R7) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$-4-17 - MOVW s1_base+0(FP), R2 - MOVW s2_base+8(FP), R3 - MOVW $1, R8 - MOVB R8, ret+16(FP) - CMP R2, R3 - RET.EQ - MOVW s1_len+4(FP), R0 - ADD R2, R0, R6 -loop: - CMP R2, R6 - RET.EQ - MOVBU.P 1(R2), R4 - MOVBU.P 1(R3), R5 - CMP R4, R5 - BEQ loop - MOVW $0, R8 - MOVB R8, ret+16(FP) - RET - // TODO: share code with memequal? TEXT bytes·Equal(SB),NOSPLIT,$0-25 MOVW a_len+4(FP), R1 @@ -973,18 +1017,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0 // traceback from goexit1 must hit code range of goexit MOVW R0, R0 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 - RET - // x -> x/1000000, x%1000000, called from Go with args, results on stack. TEXT runtime·usplit(SB),NOSPLIT,$0-12 MOVW x+0(FP), R0 diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 30ecec7675..9bf0646c8d 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -122,18 +122,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $24-8 MOVD buf+0(FP), R5 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVD gobuf_ctxt(R5), R0 - CMP $0, R0 - BEQ nilctxt - MOVD $gobuf_ctxt(R5), R0 - MOVD R0, 8(RSP) - MOVD ZR, 16(RSP) - BL runtime·writebarrierptr_prewrite(SB) - MOVD buf+0(FP), R5 - -nilctxt: MOVD gobuf_g(R5), g BL runtime·save_g(SB) @@ -251,9 +239,11 @@ switch: noswitch: // already on m stack, just call directly + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVD 0(R26), R3 // code pointer - BL (R3) - RET + MOVD.P 16(RSP), R30 // restore LR + B (R3) /* * support for morestack @@ -289,7 +279,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-8-0 MOVD R0, (g_sched+gobuf_sp)(g) MOVD LR, (g_sched+gobuf_pc)(g) MOVD R3, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVD R26, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's callers. @@ -303,8 +293,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-8-0 BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP - MOVD.W $0, -16(RSP) // create a call frame on g0 - MOVD R26, 8(RSP) // ctxt argument + MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned) BL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -368,16 +357,26 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ NO_LOCAL_POINTERS; \ /* copy arguments to stack */ \ MOVD arg+16(FP), R3; \ - MOVWU argsize+24(FP), R4; \ - MOVD RSP, R5; \ - ADD $(8-1), R5; \ - SUB $1, R3; \ - ADD R5, R4; \ - CMP R5, R4; \ - BEQ 4(PC); \ - MOVBU.W 1(R3), R6; \ - MOVBU.W R6, 1(R5); \ - B -4(PC); \ + MOVWU argsize+24(FP), R4; \ + ADD $8, RSP, R5; \ + BIC $0xf, R4, R6; \ + CBZ R6, 6(PC); \ + /* if R6=(argsize&~15) != 0 */ \ + ADD R6, R5, R6; \ + /* copy 16 bytes a time */ \ + LDP.P 16(R3), (R7, R8); \ + STP.P (R7, R8), 16(R5); \ + CMP R5, R6; \ + BNE -3(PC); \ + AND $0xf, R4, R6; \ + CBZ R6, 6(PC); \ + /* if R6=(argsize&15) != 0 */ \ + ADD R6, R5, R6; \ + /* copy 1 byte a time for the rest */ \ + MOVBU.P 1(R3), R7; \ + MOVBU.P R7, 1(R5); \ + CMP R5, R6; \ + BNE -3(PC); \ /* call function */ \ MOVD f+8(FP), R26; \ MOVD (R26), R0; \ @@ -704,52 +703,27 @@ TEXT setg_gcc<>(SB),NOSPLIT,$8 MOVD savedR27-8(SP), R27 RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 - MOVD 16(RSP), R0 // LR saved by caller - MOVD R0, ret+8(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8 + MOVD 0(RSP), R0 // LR saved by caller + MOVD R0, ret+0(FP) RET TEXT runtime·abort(SB),NOSPLIT,$-8-0 B (ZR) UNDEF -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24 - GO_ARGS - NO_LOCAL_POINTERS - MOVD p+0(FP), R3 - MOVD h+8(FP), R4 - MOVD 8(R26), R5 - MOVD R3, 8(RSP) - MOVD R4, 16(RSP) - MOVD R5, 24(RSP) - BL runtime·memhash(SB) - MOVD 32(RSP), R3 - MOVD R3, ret+16(FP) - RET - -// memequal(p, q unsafe.Pointer, size uintptr) bool +// memequal(a, b unsafe.Pointer, size uintptr) bool TEXT runtime·memequal(SB),NOSPLIT,$-8-25 - MOVD a+0(FP), R1 + MOVD size+16(FP), R1 + // short path to handle 0-byte case + CBZ R1, equal + MOVD a+0(FP), R0 MOVD b+8(FP), R2 - MOVD size+16(FP), R3 - ADD R1, R3, R6 + MOVD $ret+24(FP), R8 + B runtime·memeqbody<>(SB) +equal: MOVD $1, R0 MOVB R0, ret+24(FP) - CMP R1, R2 - BEQ done -loop: - CMP R1, R6 - BEQ done - MOVBU.P 1(R1), R4 - MOVBU.P 1(R2), R5 - CMP R4, R5 - BEQ loop - - MOVB $0, ret+24(FP) -done: RET // memequal_varlen(a, b unsafe.Pointer) bool @@ -823,102 +797,234 @@ samebytes: MOVD R4, (R7) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-33 - MOVD s1_base+0(FP), R0 - MOVD s1_len+8(FP), R1 - MOVD s2_base+16(FP), R2 - ADD R0, R1 // end -loop: - CMP R0, R1 - BEQ equal // reaches the end - MOVBU.P 1(R0), R4 - MOVBU.P 1(R2), R5 - CMP R4, R5 - BEQ loop -notequal: - MOVB ZR, ret+32(FP) - RET -equal: - MOVD $1, R0 - MOVB R0, ret+32(FP) - RET - // // functions for other packages // TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 MOVD b+0(FP), R0 - MOVD b_len+8(FP), R1 - MOVBU c+24(FP), R2 // byte to find - MOVD R0, R4 // store base for later - ADD R0, R1 // end -loop: - CMP R0, R1 - BEQ notfound - MOVBU.P 1(R0), R3 - CMP R2, R3 - BNE loop - - SUB $1, R0 // R0 will be one beyond the position we want - SUB R4, R0 // remove base - MOVD R0, ret+32(FP) - RET - -notfound: - MOVD $-1, R0 - MOVD R0, ret+32(FP) - RET + MOVD b_len+8(FP), R2 + MOVBU c+24(FP), R1 + MOVD $ret+32(FP), R8 + B runtime·indexbytebody<>(SB) TEXT strings·IndexByte(SB),NOSPLIT,$0-32 MOVD s+0(FP), R0 - MOVD s_len+8(FP), R1 - MOVBU c+16(FP), R2 // byte to find - MOVD R0, R4 // store base for later - ADD R0, R1 // end + MOVD s_len+8(FP), R2 + MOVBU c+16(FP), R1 + MOVD $ret+24(FP), R8 + B runtime·indexbytebody<>(SB) + +// input: +// R0: data +// R1: byte to search +// R2: data len +// R8: address to put result +TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0 + // Core algorithm: + // For each 32-byte chunk we calculate a 64-bit syndrome value, + // with two bits per byte. For each tuple, bit 0 is set if the + // relevant byte matched the requested character and bit 1 is + // not used (faster than using a 32bit syndrome). Since the bits + // in the syndrome reflect exactly the order in which things occur + // in the original string, counting trailing zeros allows to + // identify exactly which byte has matched. + + CBZ R2, fail + MOVD R0, R11 + // Magic constant 0x40100401 allows us to identify + // which lane matches the requested byte. + // 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24)) + // Different bytes have different bit masks (i.e: 1, 4, 16, 64) + MOVD $0x40100401, R5 + VMOV R1, V0.B16 + // Work with aligned 32-byte chunks + BIC $0x1f, R0, R3 + VMOV R5, V5.S4 + ANDS $0x1f, R0, R9 + AND $0x1f, R2, R10 + BEQ loop + + // Input string is not 32-byte aligned. We calculate the + // syndrome value for the aligned 32 bytes block containing + // the first bytes and mask off the irrelevant part. + VLD1.P (R3), [V1.B16, V2.B16] + SUB $0x20, R9, R4 + ADDS R4, R2, R2 + VCMEQ V0.B16, V1.B16, V3.B16 + VCMEQ V0.B16, V2.B16, V4.B16 + VAND V5.B16, V3.B16, V3.B16 + VAND V5.B16, V4.B16, V4.B16 + VADDP V4.B16, V3.B16, V6.B16 // 256->128 + VADDP V6.B16, V6.B16, V6.B16 // 128->64 + VMOV V6.D[0], R6 + // Clear the irrelevant lower bits + LSL $1, R9, R4 + LSR R4, R6, R6 + LSL R4, R6, R6 + // The first block can also be the last + BLS masklast + // Have we found something already? + CBNZ R6, tail + loop: - CMP R0, R1 - BEQ notfound - MOVBU.P 1(R0), R3 - CMP R2, R3 - BNE loop + VLD1.P (R3), [V1.B16, V2.B16] + SUBS $0x20, R2, R2 + VCMEQ V0.B16, V1.B16, V3.B16 + VCMEQ V0.B16, V2.B16, V4.B16 + // If we're out of data we finish regardless of the result + BLS end + // Use a fast check for the termination condition + VORR V4.B16, V3.B16, V6.B16 + VADDP V6.D2, V6.D2, V6.D2 + VMOV V6.D[0], R6 + // We're not out of data, loop if we haven't found the character + CBZ R6, loop + +end: + // Termination condition found, let's calculate the syndrome value + VAND V5.B16, V3.B16, V3.B16 + VAND V5.B16, V4.B16, V4.B16 + VADDP V4.B16, V3.B16, V6.B16 + VADDP V6.B16, V6.B16, V6.B16 + VMOV V6.D[0], R6 + // Only do the clear for the last possible block with less than 32 bytes + // Condition flags come from SUBS in the loop + BHS tail + +masklast: + // Clear the irrelevant upper bits + ADD R9, R10, R4 + AND $0x1f, R4, R4 + SUB $0x20, R4, R4 + NEG R4<<1, R4 + LSL R4, R6, R6 + LSR R4, R6, R6 - SUB $1, R0 // R0 will be one beyond the position we want - SUB R4, R0 // remove base - MOVD R0, ret+24(FP) +tail: + // Check that we have found a character + CBZ R6, fail + // Count the trailing zeros using bit reversing + RBIT R6, R6 + // Compensate the last post-increment + SUB $0x20, R3, R3 + // And count the leading zeros + CLZ R6, R6 + // R6 is twice the offset into the fragment + ADD R6>>1, R3, R0 + // Compute the offset result + SUB R11, R0, R0 + MOVD R0, (R8) RET -notfound: +fail: MOVD $-1, R0 - MOVD R0, ret+24(FP) + MOVD R0, (R8) RET -// TODO: share code with memequal? +// Equal(a, b []byte) bool TEXT bytes·Equal(SB),NOSPLIT,$0-49 MOVD a_len+8(FP), R1 MOVD b_len+32(FP), R3 - CMP R1, R3 // unequal lengths are not equal - BNE notequal + CMP R1, R3 + // unequal lengths are not equal + BNE not_equal + // short path to handle 0-byte case + CBZ R1, equal MOVD a+0(FP), R0 MOVD b+24(FP), R2 - ADD R0, R1 // end -loop: - CMP R0, R1 - BEQ equal // reaches the end - MOVBU.P 1(R0), R4 - MOVBU.P 1(R2), R5 - CMP R4, R5 - BEQ loop -notequal: + MOVD $ret+48(FP), R8 + B runtime·memeqbody<>(SB) +equal: + MOVD $1, R0 + MOVB R0, ret+48(FP) + RET +not_equal: MOVB ZR, ret+48(FP) RET + +// input: +// R0: pointer a +// R1: data len +// R2: pointer b +// R8: address to put result +TEXT runtime·memeqbody<>(SB),NOSPLIT,$0 + CMP $1, R1 + // handle 1-byte special case for better performance + BEQ one + CMP $16, R1 + // handle specially if length < 16 + BLO tail + BIC $0x3f, R1, R3 + CBZ R3, chunk16 + // work with 64-byte chunks + ADD R3, R0, R6 // end of chunks +chunk64_loop: + VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] + VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2] + VCMEQ V0.D2, V4.D2, V8.D2 + VCMEQ V1.D2, V5.D2, V9.D2 + VCMEQ V2.D2, V6.D2, V10.D2 + VCMEQ V3.D2, V7.D2, V11.D2 + VAND V8.B16, V9.B16, V8.B16 + VAND V8.B16, V10.B16, V8.B16 + VAND V8.B16, V11.B16, V8.B16 + CMP R0, R6 + VMOV V8.D[0], R4 + VMOV V8.D[1], R5 + CBZ R4, not_equal + CBZ R5, not_equal + BNE chunk64_loop + AND $0x3f, R1, R1 + CBZ R1, equal +chunk16: + // work with 16-byte chunks + BIC $0xf, R1, R3 + CBZ R3, tail + ADD R3, R0, R6 // end of chunks +chunk16_loop: + VLD1.P (R0), [V0.D2] + VLD1.P (R2), [V1.D2] + VCMEQ V0.D2, V1.D2, V2.D2 + CMP R0, R6 + VMOV V2.D[0], R4 + VMOV V2.D[1], R5 + CBZ R4, not_equal + CBZ R5, not_equal + BNE chunk16_loop + AND $0xf, R1, R1 + CBZ R1, equal +tail: + // special compare of tail with length < 16 + TBZ $3, R1, lt_8 + MOVD.P 8(R0), R4 + MOVD.P 8(R2), R5 + CMP R4, R5 + BNE not_equal +lt_8: + TBZ $2, R1, lt_4 + MOVWU.P 4(R0), R4 + MOVWU.P 4(R2), R5 + CMP R4, R5 + BNE not_equal +lt_4: + TBZ $1, R1, lt_2 + MOVHU.P 2(R0), R4 + MOVHU.P 2(R2), R5 + CMP R4, R5 + BNE not_equal +lt_2: + TBZ $0, R1, equal +one: + MOVBU (R0), R4 + MOVBU (R2), R5 + CMP R4, R5 + BNE not_equal equal: MOVD $1, R0 - MOVB R0, ret+48(FP) + MOVB R0, (R8) + RET +not_equal: + MOVB ZR, (R8) RET TEXT runtime·return0(SB), NOSPLIT, $0 @@ -931,19 +1037,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0 MOVD R0, R0 // NOP BL runtime·goexit1(SB) // does not return -// TODO(aram): use PRFM here. -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 - RET - TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 RET diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s index 57d45785f1..12cea00adc 100644 --- a/src/runtime/asm_mips64x.s +++ b/src/runtime/asm_mips64x.s @@ -108,17 +108,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $16-8 MOVV buf+0(FP), R3 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVV gobuf_ctxt(R3), R1 - BEQ R1, nilctxt - MOVV $gobuf_ctxt(R3), R1 - MOVV R1, 8(R29) - MOVV R0, 16(R29) - JAL runtime·writebarrierptr_prewrite(SB) - MOVV buf+0(FP), R3 - -nilctxt: MOVV gobuf_g(R3), g // make sure g is not nil JAL runtime·save_g(SB) @@ -225,9 +214,12 @@ switch: noswitch: // already on m stack, just call directly + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVV 0(REGCTXT), R4 // code pointer - JAL (R4) - RET + MOVV 0(R29), R31 // restore LR + ADDV $8, R29 + JMP (R4) /* * support for morestack @@ -260,7 +252,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-8-0 MOVV R29, (g_sched+gobuf_sp)(g) MOVV R31, (g_sched+gobuf_pc)(g) MOVV R3, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVV REGCTXT, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's caller. @@ -273,9 +265,8 @@ TEXT runtime·morestack(SB),NOSPLIT,$-8-0 JAL runtime·save_g(SB) MOVV (g_sched+gobuf_sp)(g), R29 // Create a stack frame on g0 to call newstack. - MOVV R0, -16(R29) // Zero saved LR in frame - ADDV $-16, R29 - MOVV REGCTXT, 8(R29) // ctxt argument + MOVV R0, -8(R29) // Zero saved LR in frame + ADDV $-8, R29 JAL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -616,32 +607,15 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0-0 JAL runtime·save_g(SB) RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 - MOVV 16(R29), R1 // LR saved by caller - MOVV R1, ret+8(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8 + MOVV 0(R29), R1 // LR saved by caller + MOVV R1, ret+0(FP) RET TEXT runtime·abort(SB),NOSPLIT,$-8-0 MOVW (R0), R0 UNDEF -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24 - GO_ARGS - NO_LOCAL_POINTERS - MOVV p+0(FP), R1 - MOVV h+8(FP), R2 - MOVV 8(REGCTXT), R3 - MOVV R1, 8(R29) - MOVV R2, 16(R29) - MOVV R3, 24(R29) - JAL runtime·memhash(SB) - MOVV 32(R29), R1 - MOVV R1, ret+16(FP) - RET - // AES hashing not implemented for mips64 TEXT runtime·aeshash(SB),NOSPLIT,$-8-0 MOVW (R0), R1 @@ -696,31 +670,6 @@ eq: MOVB R1, ret+16(FP) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-33 - MOVV s1_base+0(FP), R1 - MOVV s2_base+16(FP), R2 - MOVV $1, R3 - MOVB R3, ret+32(FP) - BNE R1, R2, 2(PC) - RET - MOVV s1_len+8(FP), R3 - ADDV R1, R3, R4 -loop: - BNE R1, R4, 2(PC) - RET - MOVBU (R1), R6 - ADDV $1, R1 - MOVBU (R2), R7 - ADDV $1, R2 - BEQ R6, R7, loop - MOVB R0, ret+32(FP) - RET - // TODO: share code with memequal? TEXT bytes·Equal(SB),NOSPLIT,$0-49 MOVV a_len+8(FP), R3 @@ -823,18 +772,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0 // traceback from goexit1 must hit code range of goexit NOR R0, R0 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 - RET - TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVW $1, R1 MOVB R1, ret+0(FP) diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s index 536c3156b5..bba6a9501d 100644 --- a/src/runtime/asm_mipsx.s +++ b/src/runtime/asm_mipsx.s @@ -109,17 +109,6 @@ TEXT runtime·gosave(SB),NOSPLIT,$-4-4 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB),NOSPLIT,$8-4 MOVW buf+0(FP), R3 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVW gobuf_ctxt(R3), R1 - BEQ R1, nilctxt - MOVW $gobuf_ctxt(R3), R1 - MOVW R1, 4(R29) - MOVW R0, 8(R29) - JAL runtime·writebarrierptr_prewrite(SB) - MOVW buf+0(FP), R3 - -nilctxt: MOVW gobuf_g(R3), g // make sure g is not nil JAL runtime·save_g(SB) @@ -226,9 +215,12 @@ switch: noswitch: // already on m stack, just call directly + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVW 0(REGCTXT), R4 // code pointer - JAL (R4) - RET + MOVW 0(R29), R31 // restore LR + ADD $4, R29 + JMP (R4) /* * support for morestack @@ -261,7 +253,7 @@ TEXT runtime·morestack(SB),NOSPLIT,$-4-0 MOVW R29, (g_sched+gobuf_sp)(g) MOVW R31, (g_sched+gobuf_pc)(g) MOVW R3, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVW REGCTXT, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's caller. @@ -274,9 +266,8 @@ TEXT runtime·morestack(SB),NOSPLIT,$-4-0 JAL runtime·save_g(SB) MOVW (g_sched+gobuf_sp)(g), R29 // Create a stack frame on g0 to call newstack. - MOVW R0, -8(R29) // Zero saved LR in frame - ADDU $-8, R29 - MOVW REGCTXT, 4(R29) // ctxt argument + MOVW R0, -4(R29) // Zero saved LR in frame + ADDU $-4, R29 JAL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -619,31 +610,14 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0 JAL runtime·save_g(SB) RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 - MOVW 8(R29), R1 // LR saved by caller - MOVW R1, ret+4(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4 + MOVW 0(R29), R1 // LR saved by caller + MOVW R1, ret+0(FP) RET TEXT runtime·abort(SB),NOSPLIT,$0-0 UNDEF -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 - GO_ARGS - NO_LOCAL_POINTERS - MOVW p+0(FP), R1 - MOVW h+4(FP), R2 - MOVW 4(REGCTXT), R3 - MOVW R1, 4(R29) - MOVW R2, 8(R29) - MOVW R3, 12(R29) - JAL runtime·memhash(SB) - MOVW 16(R29), R1 - MOVW R1, ret+8(FP) - RET - // Not implemented. TEXT runtime·aeshash(SB),NOSPLIT,$0 UNDEF @@ -712,31 +686,6 @@ eq: MOVB R1, ret+8(FP) RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-17 - MOVW s1_base+0(FP), R1 - MOVW s2_base+8(FP), R2 - MOVW $1, R3 - MOVBU R3, ret+16(FP) - BNE R1, R2, 2(PC) - RET - MOVW s1_len+4(FP), R3 - ADDU R1, R3, R4 -loop: - BNE R1, R4, 2(PC) - RET - MOVBU (R1), R6 - ADDU $1, R1 - MOVBU (R2), R7 - ADDU $1, R2 - BEQ R6, R7, loop - MOVB R0, ret+16(FP) - RET - TEXT bytes·Equal(SB),NOSPLIT,$0-25 MOVW a_len+4(FP), R3 MOVW b_len+16(FP), R4 @@ -903,18 +852,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0 // traceback from goexit1 must hit code range of goexit NOR R0, R0 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 - RET - TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVW $1, R1 MOVB R1, ret+0(FP) diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 616861ea7d..e02ca16907 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -133,18 +133,6 @@ TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $16-8 MOVD buf+0(FP), R5 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVD gobuf_ctxt(R5), R3 - CMP R0, R3 - BEQ nilctxt - MOVD $gobuf_ctxt(R5), R3 - MOVD R3, FIXED_FRAME+0(R1) - MOVD R0, FIXED_FRAME+8(R1) - BL runtime·writebarrierptr_prewrite(SB) - MOVD buf+0(FP), R5 - -nilctxt: MOVD gobuf_g(R5), g // make sure g is not nil BL runtime·save_g(SB) @@ -277,6 +265,9 @@ switch: noswitch: // already on m stack, just call directly + // On other arches we do a tail call here, but it appears to be + // impossible to tail call a function pointer in shared mode on + // ppc64 because the caller is responsible for restoring the TOC. MOVD 0(R11), R12 // code pointer MOVD R12, CTR BL (CTR) @@ -317,7 +308,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 MOVD LR, R8 MOVD R8, (g_sched+gobuf_pc)(g) MOVD R5, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVD R11, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's caller. @@ -329,8 +320,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 MOVD m_g0(R7), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R1 - MOVDU R0, -(FIXED_FRAME+8)(R1) // create a call frame on g0 - MOVD R11, FIXED_FRAME+0(R1) // ctxt argument + MOVDU R0, -(FIXED_FRAME+0)(R1) // create a call frame on g0 BL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -714,9 +704,9 @@ TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0 MOVD R4, LR RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 - MOVD FIXED_FRAME+8(R1), R3 // LR saved by caller - MOVD R3, ret+8(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8 + MOVD 0(R1), R3 // LR saved by caller + MOVD R3, ret+0(FP) RET TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 @@ -738,23 +728,6 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8 MOVD R3, ret+0(FP) RET -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24 - GO_ARGS - NO_LOCAL_POINTERS - MOVD p+0(FP), R3 - MOVD h+8(FP), R4 - MOVD 8(R11), R5 - MOVD R3, FIXED_FRAME+0(R1) - MOVD R4, FIXED_FRAME+8(R1) - MOVD R5, FIXED_FRAME+16(R1) - BL runtime·memhash(SB) - MOVD FIXED_FRAME+24(R1), R3 - MOVD R3, ret+16(FP) - RET - // AES hashing not implemented for ppc64 TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0 MOVW (R0), R1 @@ -1074,24 +1047,6 @@ equal: MOVD $1, R9 RET -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT,$0-33 - MOVD s1_base+0(FP), R3 - MOVD s2_base+16(FP), R4 - MOVD $1, R5 - MOVB R5, ret+32(FP) - CMP R3, R4 - BNE 2(PC) - RET - MOVD s1_len+8(FP), R5 - BL runtime·memeqbody(SB) - MOVB R9, ret+32(FP) - RET - TEXT bytes·Equal(SB),NOSPLIT,$0-49 MOVD a_len+8(FP), R4 MOVD b_len+32(FP), R5 @@ -1129,24 +1084,17 @@ TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0 DCBT (R3) // Prepare cache line. - MOVD R3,R10 // Save base address for calculating the index later. + MOVD R3,R17 // Save base address for calculating the index later. RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8. RLDIMI $8,R5,$48,R5 // Replicating the byte across the register. - - // Calculate last acceptable address and check for possible overflow - // using a saturated add. - // Overflows set last acceptable address to 0xffffffffffffffff. - ADD R4,R3,R7 - SUBC R3,R7,R6 - SUBE R0,R0,R9 - MOVW R9,R6 - OR R6,R7,R7 + ADD R4,R3,R7 // Last acceptable address in R7. RLDIMI $16,R5,$32,R5 CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently. MOVD $-1,R9 - WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28). + WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28). RLDIMI $32,R5,$0,R5 + MOVD R7,R10 // Save last acceptable address in R10 for later. ADD $-1,R7,R7 #ifdef GOARCH_ppc64le SLD R6,R9,R9 // Prepare mask for Little Endian @@ -1155,56 +1103,142 @@ TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0 #endif BLE small_string // Jump to the small string case if it's <32 bytes. - // Case for length >32 bytes + // If we are 64-byte aligned, branch to qw_align just to get the auxiliary values + // in V0, V1 and V10, then branch to the preloop. + ANDCC $63,R3,R11 + BEQ CR0,qw_align + RLDICL $0,R3,$61,R11 + MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. CMPB R12,R5,R3 // Check for a match. AND R9,R3,R3 // Mask bytes below s_base - RLDICL $0,R7,$61,R4 // length-1 + RLDICL $0,R7,$61,R6 // length-1 RLDICR $0,R7,$60,R7 // Last doubleword in R7 CMPU R3,$0,CR7 // If we have a match, jump to the final computation BNE CR7,done + ADD $8,R8,R8 + ADD $-8,R4,R4 + ADD R4,R11,R4 - // Check for doubleword alignment and jump to the loop setup if aligned. - MOVFL R8,CR7 - BC 12,28,loop_setup + // Check for quadword alignment + ANDCC $15,R8,R11 + BEQ CR0,qw_align - // Not aligned, so handle the second doubleword - MOVDU 8(R8),R12 + // Not aligned, so handle the next doubleword + MOVD 0(R8),R12 CMPB R12,R5,R3 CMPU R3,$0,CR7 BNE CR7,done + ADD $8,R8,R8 + ADD $-8,R4,R4 -loop_setup: - // We are now aligned to a 16-byte boundary. We will load two doublewords - // per loop iteration. The last doubleword is in R7, so our loop counter - // starts at (R7-R8)/16. - SUB R8,R7,R6 - SRD $4,R6,R6 - MOVD R6,CTR + // Either quadword aligned or 64-byte at this point. We can use LVX. +qw_align: + + // Set up auxiliary data for the vectorized algorithm. + VSPLTISB $0,V0 // Replicate 0 across V0 + VSPLTISB $3,V10 // Use V10 as control for VBPERMQ + MTVRD R5,V1 + LVSL (R0+R0),V11 + VSLB V11,V10,V10 + VSPLTB $7,V1,V1 // Replicate byte across V1 + CMPU R4, $64 // If len <= 64, don't use the vectorized loop + BLE tail + + // We will load 4 quardwords per iteration in the loop, so check for + // 64-byte alignment. If 64-byte aligned, then branch to the preloop. + ANDCC $63,R8,R11 + BEQ CR0,preloop + + // Not 64-byte aligned. Load one quadword at a time until aligned. + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 // Check for byte in V4 + BNE CR6,found_qw_align + ADD $16,R8,R8 + ADD $-16,R4,R4 + + ANDCC $63,R8,R11 + BEQ CR0,preloop + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 // Check for byte in V4 + BNE CR6,found_qw_align + ADD $16,R8,R8 + ADD $-16,R4,R4 + + ANDCC $63,R8,R11 + BEQ CR0,preloop + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 // Check for byte in V4 + BNE CR6,found_qw_align + ADD $-16,R4,R4 + ADD $16,R8,R8 - // Note: when we have an align directive, align this loop to 32 bytes so - // it fits in a single icache sector. + // 64-byte aligned. Prepare for the main loop. +preloop: + CMPU R4,$64 + BLE tail // If len <= 64, don't use the vectorized loop + + // We are now aligned to a 64-byte boundary. We will load 4 quadwords + // per loop iteration. The last doubleword is in R10, so our loop counter + // starts at (R10-R8)/64. + SUB R8,R10,R6 + SRD $6,R6,R9 // Loop counter in R9 + MOVD R9,CTR + + MOVD $16,R11 // Load offsets for the vector loads + MOVD $32,R9 + MOVD $48,R7 + + // Main loop we will load 64 bytes per iteration loop: - // Load two doublewords, then compare and merge in a single register. We - // will check two doublewords per iteration, then find out which of them - // contains the byte later. This speeds up the search. - MOVD 8(R8),R12 - MOVDU 16(R8),R11 - CMPB R12,R5,R3 - CMPB R11,R5,R9 - OR R3,R9,R6 - CMPU R6,$0,CR7 - BNE CR7,found - BC 16,0,loop + LVX (R8+R0),V2 // Load 4 16-byte vectors + LVX (R11+R8),V3 + LVX (R9+R8),V4 + LVX (R7+R8),V5 + VCMPEQUB V1,V2,V6 // Look for byte in each vector + VCMPEQUB V1,V3,V7 + VCMPEQUB V1,V4,V8 + VCMPEQUB V1,V5,V9 + VOR V6,V7,V11 // Compress the result in a single vector + VOR V8,V9,V12 + VOR V11,V12,V11 + VCMPEQUBCC V0,V11,V11 // Check for byte + BGE CR6,found + ADD $64,R8,R8 + BC 16,0,loop // bdnz loop - // Counter zeroed, but we may have another doubleword to read - CMPU R8,R7 - BEQ notfound + // Handle the tailing bytes or R4 <= 64 + RLDICL $0,R6,$58,R4 +tail: + CMPU R4,$0 + BEQ notfound + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 + BNE CR6,found_qw_align + ADD $16,R8,R8 + CMPU R4,$16,CR6 + BLE CR6,notfound + ADD $-16,R4,R4 - MOVDU 8(R8),R12 - CMPB R12,R5,R3 - CMPU R3,$0,CR6 - BNE CR6,done + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 + BNE CR6,found_qw_align + ADD $16,R8,R8 + CMPU R4,$16,CR6 + BLE CR6,notfound + ADD $-16,R4,R4 + + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 + BNE CR6,found_qw_align + ADD $16,R8,R8 + CMPU R4,$16,CR6 + BLE CR6,notfound + ADD $-16,R4,R4 + + LVX (R8+R0),V4 + VCMPEQUBCC V1,V4,V6 + BNE CR6,found_qw_align notfound: MOVD $-1,R3 @@ -1212,15 +1246,68 @@ notfound: RET found: - // One of the doublewords from the loop contains the byte we are looking - // for. Check the first doubleword and adjust the address if found. - CMPU R3,$0,CR6 - ADD $-8,R8,R8 - BNE CR6,done + // We will now compress the results into a single doubleword, + // so it can be moved to a GPR for the final index calculation. - // Not found, so it must be in the second doubleword of the merged pair. - MOVD R9,R3 - ADD $8,R8,R8 + // The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the + // first bit of each byte into bits 48-63. + VBPERMQ V6,V10,V6 + VBPERMQ V7,V10,V7 + VBPERMQ V8,V10,V8 + VBPERMQ V9,V10,V9 + + // Shift each 16-bit component into its correct position for + // merging into a single doubleword. +#ifdef GOARCH_ppc64le + VSLDOI $2,V7,V7,V7 + VSLDOI $4,V8,V8,V8 + VSLDOI $6,V9,V9,V9 +#else + VSLDOI $6,V6,V6,V6 + VSLDOI $4,V7,V7,V7 + VSLDOI $2,V8,V8,V8 +#endif + + // Merge V6-V9 into a single doubleword and move to a GPR. + VOR V6,V7,V11 + VOR V8,V9,V4 + VOR V4,V11,V4 + MFVRD V4,R3 + +#ifdef GOARCH_ppc64le + ADD $-1,R3,R11 + ANDN R3,R11,R11 + POPCNTD R11,R11 // Count trailing zeros (Little Endian). +#else + CNTLZD R3,R11 // Count leading zeros (Big Endian). +#endif + ADD R8,R11,R3 // Calculate byte address + +return: + SUB R17,R3 + MOVD R3,(R14) + RET + +found_qw_align: + // Use the same algorithm as above. Compress the result into + // a single doubleword and move it to a GPR for the final + // calculation. + VBPERMQ V6,V10,V6 + +#ifdef GOARCH_ppc64le + MFVRD V6,R3 + ADD $-1,R3,R11 + ANDN R3,R11,R11 + POPCNTD R11,R11 +#else + VSLDOI $6,V6,V6,V6 + MFVRD V6,R3 + CNTLZD R3,R11 +#endif + ADD R8,R11,R3 + CMPU R11,R4 + BLT return + BR notfound done: // At this point, R3 has 0xFF in the same position as the byte we are @@ -1236,17 +1323,10 @@ done: CMPU R8,R7 // Check if we are at the last doubleword. SRD $3,R11 // Convert trailing zeros to bytes. ADD R11,R8,R3 - CMPU R11,R4,CR7 // If at the last doubleword, check the byte offset. + CMPU R11,R6,CR7 // If at the last doubleword, check the byte offset. BNE return BLE CR7,return - MOVD $-1,R3 - MOVD R3,(R14) - RET - -return: - SUB R10,R3 // Calculate index. - MOVD R3,(R14) - RET + BR notfound small_string: // We unroll this loop for better performance. @@ -1257,9 +1337,9 @@ small_string: CMPB R12,R5,R3 // Check for a match. AND R9,R3,R3 // Mask bytes below s_base. CMPU R3,$0,CR7 // If we have a match, jump to the final computation. - RLDICL $0,R7,$61,R4 // length-1 + RLDICL $0,R7,$61,R6 // length-1 RLDICR $0,R7,$60,R7 // Last doubleword in R7. - CMPU R8,R7 + CMPU R8,R7 BNE CR7,done BEQ notfound // Hit length. @@ -1287,34 +1367,70 @@ small_string: MOVDU 8(R8),R12 CMPB R12,R5,R3 CMPU R3,$0,CR6 - CMPU R8,R7 BNE CR6,done BR notfound TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 MOVD s1_base+0(FP), R5 - MOVD s1_len+8(FP), R3 MOVD s2_base+16(FP), R6 + MOVD s1_len+8(FP), R3 + CMP R5,R6,CR7 MOVD s2_len+24(FP), R4 MOVD $ret+32(FP), R7 + CMP R3,R4,CR6 + BEQ CR7,equal + +notequal: #ifdef GOARCH_ppc64le BR cmpbodyLE<>(SB) #else BR cmpbodyBE<>(SB) #endif +equal: + BEQ CR6,done + MOVD $1, R8 + BGT CR6,greater + NEG R8 + +greater: + MOVD R8, (R7) + RET + +done: + MOVD $0, (R7) + RET + TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 MOVD s1+0(FP), R5 - MOVD s1+8(FP), R3 MOVD s2+24(FP), R6 + MOVD s1+8(FP), R3 + CMP R5,R6,CR7 MOVD s2+32(FP), R4 MOVD $ret+48(FP), R7 + CMP R3,R4,CR6 + BEQ CR7,equal + #ifdef GOARCH_ppc64le BR cmpbodyLE<>(SB) #else BR cmpbodyBE<>(SB) #endif +equal: + BEQ CR6,done + MOVD $1, R8 + BGT CR6,greater + NEG R8 + +greater: + MOVD R8, (R7) + RET + +done: + MOVD $0, (R7) + RET + TEXT runtime·return0(SB), NOSPLIT, $0 MOVW $0, R3 RET @@ -1353,18 +1469,6 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0 // traceback from goexit1 must hit code range of goexit MOVD R0, R0 // NOP -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 - RET - TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 RET diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index 20e740b927..6b71830557 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -7,6 +7,83 @@ #include "funcdata.h" #include "textflag.h" +// _rt0_s390x_lib is common startup code for s390x systems when +// using -buildmode=c-archive or -buildmode=c-shared. The linker will +// arrange to invoke this function as a global constructor (for +// c-archive) or when the shared library is loaded (for c-shared). +// We expect argc and argv to be passed in the usual C ABI registers +// R2 and R3. +TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0 + STMG R6, R15, 48(R15) + MOVD R2, _rt0_s390x_lib_argc<>(SB) + MOVD R3, _rt0_s390x_lib_argv<>(SB) + + // Save R6-R15 in the register save area of the calling function. + STMG R6, R15, 48(R15) + + // Allocate 80 bytes on the stack. + MOVD $-80(R15), R15 + + // Save F8-F15 in our stack frame. + FMOVD F8, 16(R15) + FMOVD F9, 24(R15) + FMOVD F10, 32(R15) + FMOVD F11, 40(R15) + FMOVD F12, 48(R15) + FMOVD F13, 56(R15) + FMOVD F14, 64(R15) + FMOVD F15, 72(R15) + + // Synchronous initialization. + MOVD $runtime·libpreinit(SB), R1 + BL R1 + + // Create a new thread to finish Go runtime initialization. + MOVD _cgo_sys_thread_create(SB), R1 + CMP R1, $0 + BEQ nocgo + MOVD $_rt0_s390x_lib_go(SB), R2 + MOVD $0, R3 + BL R1 + BR restore + +nocgo: + MOVD $0x800000, R1 // stacksize + MOVD R1, 0(R15) + MOVD $_rt0_s390x_lib_go(SB), R1 + MOVD R1, 8(R15) // fn + MOVD $runtime·newosproc(SB), R1 + BL R1 + +restore: + // Restore F8-F15 from our stack frame. + FMOVD 16(R15), F8 + FMOVD 24(R15), F9 + FMOVD 32(R15), F10 + FMOVD 40(R15), F11 + FMOVD 48(R15), F12 + FMOVD 56(R15), F13 + FMOVD 64(R15), F14 + FMOVD 72(R15), F15 + MOVD $80(R15), R15 + + // Restore R6-R15. + LMG 48(R15), R6, R15 + RET + +// _rt0_s390x_lib_go initializes the Go runtime. +// This is started in a separate thread by _rt0_s390x_lib. +TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0 + MOVD _rt0_s390x_lib_argc<>(SB), R2 + MOVD _rt0_s390x_lib_argv<>(SB), R3 + MOVD $runtime·rt0_go(SB), R1 + BR R1 + +DATA _rt0_s390x_lib_argc<>(SB)/8, $0 +GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8 +DATA _rt0_s90x_lib_argv<>(SB)/8, $0 +GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8 + TEXT runtime·rt0_go(SB),NOSPLIT,$0 // R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer // C TLS base pointer in AR0:AR1 @@ -116,17 +193,6 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $16-8 MOVD buf+0(FP), R5 - - // If ctxt is not nil, invoke deletion barrier before overwriting. - MOVD gobuf_ctxt(R5), R1 - CMPBEQ R1, $0, nilctxt - MOVD $gobuf_ctxt(R5), R1 - MOVD R1, 8(R15) - MOVD R0, 16(R15) - BL runtime·writebarrierptr_prewrite(SB) - MOVD buf+0(FP), R5 - -nilctxt: MOVD gobuf_g(R5), g // make sure g is not nil BL runtime·save_g(SB) @@ -235,9 +301,12 @@ switch: noswitch: // already on m stack, just call directly + // Using a tail call here cleans up tracebacks since we won't stop + // at an intermediate systemstack. MOVD 0(R12), R3 // code pointer - BL (R3) - RET + MOVD 0(R15), LR // restore LR + ADD $8, R15 + BR (R3) /* * support for morestack @@ -272,7 +341,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 MOVD LR, R8 MOVD R8, (g_sched+gobuf_pc)(g) MOVD R5, (g_sched+gobuf_lr)(g) - // newstack will fill gobuf.ctxt. + MOVD R12, (g_sched+gobuf_ctxt)(g) // Called from f. // Set m->morebuf to f's caller. @@ -285,9 +354,8 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R15 // Create a stack frame on g0 to call newstack. - MOVD $0, -16(R15) // Zero saved LR in frame - SUB $16, R15 - MOVD R12, 8(R15) // ctxt argument + MOVD $0, -8(R15) // Zero saved LR in frame + SUB $8, R15 BL runtime·newstack(SB) // Not reached, but make sure the return PC from the call to newstack @@ -656,9 +724,9 @@ TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0 MOVD R1, LR RET -TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 - MOVD 16(R15), R3 // LR saved by caller - MOVD R3, ret+8(FP) +TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8 + MOVD 0(R15), R3 // LR saved by caller + MOVD R3, ret+0(FP) RET TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 @@ -678,23 +746,6 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8 MOVD R3, ret+0(FP) RET -// memhash_varlen(p unsafe.Pointer, h seed) uintptr -// redirects to memhash(p, h, size) using the size -// stored in the closure. -TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24 - GO_ARGS - NO_LOCAL_POINTERS - MOVD p+0(FP), R3 - MOVD h+8(FP), R4 - MOVD 8(R12), R5 - MOVD R3, 8(R15) - MOVD R4, 16(R15) - MOVD R5, 24(R15) - BL runtime·memhash(SB) - MOVD 32(R15), R3 - MOVD R3, ret+16(FP) - RET - // AES hashing not implemented for s390x TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0 MOVW (R0), R15 @@ -721,18 +772,6 @@ TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 LA ret+16(FP), R7 BR runtime·memeqbody(SB) -// eqstring tests whether two strings are equal. -// The compiler guarantees that strings passed -// to eqstring have equal length. -// See runtime_test.go:eqstring_generic for -// equivalent Go code. -TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33 - MOVD s1_base+0(FP), R3 - MOVD s1_len+8(FP), R6 - MOVD s2_base+16(FP), R5 - LA ret+32(FP), R7 - BR runtime·memeqbody(SB) - TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49 MOVD a_len+8(FP), R2 MOVD b_len+32(FP), R6 @@ -949,23 +988,12 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x07; BYTE $0x00; // 2-byte nop -TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 - RET - -TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 - RET - TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 RET TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 - SYNC + // Stores are already ordered on s390x, so this is just a + // compile barrier. RET TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 diff --git a/src/runtime/cgo/asm_386.s b/src/runtime/cgo/asm_386.s index dc8897d353..7293c20bf8 100644 --- a/src/runtime/cgo/asm_386.s +++ b/src/runtime/cgo/asm_386.s @@ -7,26 +7,23 @@ // Called by C code generated by cmd/cgo. // func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) // Saves C callee-saved registers and calls fn with three arguments. -TEXT crosscall2(SB),NOSPLIT,$0 - PUSHL BP - MOVL SP, BP - PUSHL BX - PUSHL SI - PUSHL DI - - SUBL $12, SP - MOVL 20(BP), AX +TEXT crosscall2(SB),NOSPLIT,$28-16 + MOVL BP, 24(SP) + MOVL BX, 20(SP) + MOVL SI, 16(SP) + MOVL DI, 12(SP) + + MOVL ctxt+12(FP), AX MOVL AX, 8(SP) - MOVL 16(BP), AX + MOVL n+8(FP), AX MOVL AX, 4(SP) - MOVL 12(BP), AX + MOVL a+4(FP), AX MOVL AX, 0(SP) - MOVL 8(BP), AX + MOVL fn+0(FP), AX CALL AX - ADDL $12, SP - - POPL DI - POPL SI - POPL BX - POPL BP + + MOVL 12(SP), DI + MOVL 16(SP), SI + MOVL 20(SP), BX + MOVL 24(SP), BP RET diff --git a/src/runtime/cgo/asm_amd64.s b/src/runtime/cgo/asm_amd64.s index 541bd9ea01..0e33fc4796 100644 --- a/src/runtime/cgo/asm_amd64.s +++ b/src/runtime/cgo/asm_amd64.s @@ -7,14 +7,12 @@ // Called by C code generated by cmd/cgo. // func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) // Saves C callee-saved registers and calls fn with three arguments. -TEXT crosscall2(SB),NOSPLIT,$0 #ifndef GOOS_windows - SUBQ $0x58, SP /* keeps stack pointer 32-byte aligned */ +TEXT crosscall2(SB),NOSPLIT,$0x50-0 /* keeps stack pointer 32-byte aligned */ #else - SUBQ $0x118, SP /* also need to save xmm6 - xmm15 */ +TEXT crosscall2(SB),NOSPLIT,$0x110-0 /* also need to save xmm6 - xmm15 */ #endif MOVQ BX, 0x18(SP) - MOVQ BP, 0x20(SP) MOVQ R12, 0x28(SP) MOVQ R13, 0x30(SP) MOVQ R14, 0x38(SP) @@ -62,15 +60,9 @@ TEXT crosscall2(SB),NOSPLIT,$0 #endif MOVQ 0x18(SP), BX - MOVQ 0x20(SP), BP MOVQ 0x28(SP), R12 MOVQ 0x30(SP), R13 MOVQ 0x38(SP), R14 MOVQ 0x40(SP), R15 -#ifndef GOOS_windows - ADDQ $0x58, SP -#else - ADDQ $0x118, SP -#endif RET diff --git a/src/runtime/cgo/asm_mipsx.s b/src/runtime/cgo/asm_mipsx.s index dd16af6fbe..2483bdd7d4 100644 --- a/src/runtime/cgo/asm_mipsx.s +++ b/src/runtime/cgo/asm_mipsx.s @@ -20,7 +20,11 @@ TEXT crosscall2(SB),NOSPLIT,$-4 // Space for 9 caller-saved GPR + LR + 6 caller-saved FPR. // O32 ABI allows us to smash 16 bytes argument area of caller frame. +#ifndef GOMIPS_softfloat SUBU $(4*14+8*6-16), R29 +#else + SUBU $(4*14-16), R29 // For soft-float, no FPR. +#endif MOVW R5, (4*1)(R29) MOVW R6, (4*2)(R29) MOVW R7, (4*3)(R29) @@ -34,14 +38,14 @@ TEXT crosscall2(SB),NOSPLIT,$-4 MOVW R23, (4*11)(R29) MOVW g, (4*12)(R29) MOVW R31, (4*13)(R29) - +#ifndef GOMIPS_softfloat MOVD F20, (4*14)(R29) MOVD F22, (4*14+8*1)(R29) MOVD F24, (4*14+8*2)(R29) MOVD F26, (4*14+8*3)(R29) MOVD F28, (4*14+8*4)(R29) MOVD F30, (4*14+8*5)(R29) - +#endif JAL runtime·load_g(SB) JAL (R4) @@ -55,7 +59,7 @@ TEXT crosscall2(SB),NOSPLIT,$-4 MOVW (4*11)(R29), R23 MOVW (4*12)(R29), g MOVW (4*13)(R29), R31 - +#ifndef GOMIPS_softfloat MOVD (4*14)(R29), F20 MOVD (4*14+8*1)(R29), F22 MOVD (4*14+8*2)(R29), F24 @@ -64,4 +68,7 @@ TEXT crosscall2(SB),NOSPLIT,$-4 MOVD (4*14+8*5)(R29), F30 ADDU $(4*14+8*6-16), R29 +#else + ADDU $(4*14-16), R29 +#endif RET diff --git a/src/runtime/cgo/gcc_android_386.c b/src/runtime/cgo/gcc_android_386.c index 23a15f1c87..28f553c446 100644 --- a/src/runtime/cgo/gcc_android_386.c +++ b/src/runtime/cgo/gcc_android_386.c @@ -36,7 +36,7 @@ inittls(void) */ ntofree = 0; for(;;) { - if(pthread_key_create(&k, nil) < 0) { + if(pthread_key_create(&k, nil) != 0) { fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); abort(); } @@ -77,7 +77,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - pthread_setspecific(k1, (void*)ts.g); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_386(ts.fn); return nil; diff --git a/src/runtime/cgo/gcc_android_amd64.c b/src/runtime/cgo/gcc_android_amd64.c index e006c49bcf..6f92d90dd4 100644 --- a/src/runtime/cgo/gcc_android_amd64.c +++ b/src/runtime/cgo/gcc_android_amd64.c @@ -41,7 +41,7 @@ inittls(void) */ ntofree = 0; for(;;) { - if(pthread_key_create(&k, nil) < 0) { + if(pthread_key_create(&k, nil) != 0) { fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); abort(); } @@ -82,7 +82,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - pthread_setspecific(k1, (void*)ts.g); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_amd64(ts.fn); return nil; diff --git a/src/runtime/cgo/gcc_darwin_386.c b/src/runtime/cgo/gcc_darwin_386.c index 4ab3267d64..7d3c55cd56 100644 --- a/src/runtime/cgo/gcc_darwin_386.c +++ b/src/runtime/cgo/gcc_darwin_386.c @@ -39,8 +39,8 @@ inittls(void) * * The linker and runtime hard-code this constant offset * from %gs where we expect to find g. - * Known to ../../../liblink/sym.c:/468 - * and to ../sys_darwin_386.s:/468 + * Known to src/cmd/link/internal/ld/sym.go:/0x468 + * and to src/runtime/sys_darwin_386.s:/0x468 * * This is truly disgusting and a bit fragile, but taking care * of it here protects the rest of the system from damage. @@ -64,7 +64,7 @@ inittls(void) */ ntofree = 0; for(;;) { - if(pthread_key_create(&k, nil) < 0) { + if(pthread_key_create(&k, nil) != 0) { fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); abort(); } @@ -142,7 +142,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - pthread_setspecific(k1, (void*)ts.g); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_386(ts.fn); return nil; diff --git a/src/runtime/cgo/gcc_darwin_amd64.c b/src/runtime/cgo/gcc_darwin_amd64.c index 181d0ab490..c57608c675 100644 --- a/src/runtime/cgo/gcc_darwin_amd64.c +++ b/src/runtime/cgo/gcc_darwin_amd64.c @@ -28,14 +28,14 @@ inittls(void) * * The linker and runtime hard-code this constant offset * from %gs where we expect to find g. - * Known to ../../../liblink/sym.c:/8a0 - * and to ../sys_darwin_amd64.s:/8a0 + * Known to src/cmd/link/internal/ld/sym.go:/0x8a0 + * and to src/runtime/sys_darwin_amd64.s:/0x8a0 * * As disgusting as on the 386; same justification. */ ntofree = 0; for(;;) { - if(pthread_key_create(&k, nil) < 0) { + if(pthread_key_create(&k, nil) != 0) { fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); abort(); } @@ -113,7 +113,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - pthread_setspecific(k1, (void*)ts.g); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_amd64(ts.fn); return nil; diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c index 31594addce..3dc5bde4cc 100644 --- a/src/runtime/cgo/gcc_libinit.c +++ b/src/runtime/cgo/gcc_libinit.c @@ -98,6 +98,10 @@ _cgo_try_pthread_create(pthread_t* thread, const pthread_attr_t* attr, void* (*p for (tries = 0; tries < 20; tries++) { err = pthread_create(thread, attr, pfn, arg); + if (err == 0) { + pthread_detach(*thread); + return 0; + } if (err != EAGAIN) { return err; } diff --git a/src/runtime/cgo/gcc_mipsx.S b/src/runtime/cgo/gcc_mipsx.S index c51c36a9b7..54f4b8201a 100644 --- a/src/runtime/cgo/gcc_mipsx.S +++ b/src/runtime/cgo/gcc_mipsx.S @@ -14,8 +14,11 @@ .globl crosscall1 .set noat crosscall1: +#ifndef __mips_soft_float addiu $29, $29, -88 - +#else + addiu $29, $29, -40 // For soft-float, no need to make room for FP registers +#endif sw $31, 0($29) sw $16, 4($29) sw $17, 8($29) @@ -27,14 +30,14 @@ crosscall1: sw $23, 32($29) sw $30, 36($29) +#ifndef __mips_soft_float sdc1 $f20, 40($29) sdc1 $f22, 48($29) sdc1 $f24, 56($29) sdc1 $f26, 64($29) sdc1 $f28, 72($29) sdc1 $f30, 80($29) - - +#endif move $20, $4 // save R4 move $4, $6 jalr $5 // call setg_gcc @@ -49,16 +52,20 @@ crosscall1: lw $22, 28($29) lw $23, 32($29) lw $30, 36($29) +#ifndef __mips_soft_float ldc1 $f20, 40($29) ldc1 $f22, 48($29) ldc1 $f24, 56($29) ldc1 $f26, 64($29) ldc1 $f28, 72($29) ldc1 $f30, 80($29) - +#endif lw $31, 0($29) - +#ifndef __mips_soft_float addiu $29, $29, 88 +#else + addiu $29, $29, 40 +#endif jr $31 .set at diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c index 29acd3c185..5cf6bdf8cf 100644 --- a/src/runtime/cgo/gcc_mmap.c +++ b/src/runtime/cgo/gcc_mmap.c @@ -11,7 +11,7 @@ #include "libcgo.h" -void * +uintptr_t x_cgo_mmap(void *addr, uintptr_t length, int32_t prot, int32_t flags, int32_t fd, uint32_t offset) { void *p; @@ -20,9 +20,9 @@ x_cgo_mmap(void *addr, uintptr_t length, int32_t prot, int32_t flags, int32_t fd _cgo_tsan_release(); if (p == MAP_FAILED) { /* This is what the Go code expects on failure. */ - p = (void *) (uintptr_t) errno; + return (uintptr_t)errno; } - return p; + return (uintptr_t)p; } void diff --git a/src/runtime/cgo/gcc_signal2_darwin_armx.c b/src/runtime/cgo/gcc_signal2_darwin_armx.c new file mode 100644 index 0000000000..54b7e32658 --- /dev/null +++ b/src/runtime/cgo/gcc_signal2_darwin_armx.c @@ -0,0 +1,13 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build lldb +// +build darwin +// +build arm arm64 + +// Used by gcc_signal_darwin_armx.c when doing the test build during cgo. +// We hope that for real binaries the definition provided by Go will take precedence +// and the linker will drop this .o file altogether, which is why this definition +// is all by itself in its own file. +void __attribute__((weak)) xx_cgo_panicmem(void) {} diff --git a/src/runtime/cgo/gcc_signal_darwin_armx.c b/src/runtime/cgo/gcc_signal_darwin_armx.c index a2d520bce8..3ab1d8b0d6 100644 --- a/src/runtime/cgo/gcc_signal_darwin_armx.c +++ b/src/runtime/cgo/gcc_signal_darwin_armx.c @@ -39,7 +39,8 @@ #include "libcgo.h" #include "libcgo_unix.h" -uintptr_t x_cgo_panicmem; +void xx_cgo_panicmem(void); +uintptr_t x_cgo_panicmem = (uintptr_t)xx_cgo_panicmem; static pthread_mutex_t mach_exception_handler_port_set_mu; static mach_port_t mach_exception_handler_port_set = MACH_PORT_NULL; diff --git a/src/runtime/cgo/gcc_signal_darwin_lldb.c b/src/runtime/cgo/gcc_signal_darwin_lldb.c index 12cc388400..54d91f6390 100644 --- a/src/runtime/cgo/gcc_signal_darwin_lldb.c +++ b/src/runtime/cgo/gcc_signal_darwin_lldb.c @@ -8,7 +8,5 @@ #include <stdint.h> -uintptr_t x_cgo_panicmem; - void darwin_arm_init_thread_exception_port() {} void darwin_arm_init_mach_exception_handler() {} diff --git a/src/runtime/cgo/gcc_util.c b/src/runtime/cgo/gcc_util.c index 2d5382a8f0..3fcb48cc8d 100644 --- a/src/runtime/cgo/gcc_util.c +++ b/src/runtime/cgo/gcc_util.c @@ -29,6 +29,10 @@ void(* const _cgo_yield)() = NULL; #include <string.h> +char x_cgo_yield_strncpy_src = 0; +char x_cgo_yield_strncpy_dst = 0; +size_t x_cgo_yield_strncpy_n = 0; + /* Stub for allowing libc interceptors to execute. @@ -50,9 +54,14 @@ x_cgo_yield() So we choose strncpy(_, _, 0): it requires an extra header, but it's standard and should be very efficient. + + GCC 7 has an unfortunate habit of optimizing out strncpy calls (see + https://golang.org/issue/21196), so the arguments here need to be global + variables with external linkage in order to ensure that the call traps all the + way down into libc. */ - char nothing = 0; - strncpy(¬hing, ¬hing, 0); + strncpy(&x_cgo_yield_strncpy_dst, &x_cgo_yield_strncpy_src, + x_cgo_yield_strncpy_n); } void(* const _cgo_yield)() = &x_cgo_yield; diff --git a/src/runtime/cgo/signal_darwin_arm.s b/src/runtime/cgo/signal_darwin_arm.s index ee5c3d3476..f886e4bc06 100644 --- a/src/runtime/cgo/signal_darwin_arm.s +++ b/src/runtime/cgo/signal_darwin_arm.s @@ -4,13 +4,13 @@ #include "textflag.h" -// panicmem is the entrypoint for SIGSEGV as intercepted via a +// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a // mach thread port as EXC_BAD_ACCESS. As the segfault may have happened -// in C code, we first need to load_g then call panicmem. +// in C code, we first need to load_g then call xx_cgo_panicmem. // // R1 - LR at moment of fault // R2 - PC at moment of fault -TEXT ·panicmem(SB),NOSPLIT,$-4 +TEXT xx_cgo_panicmem(SB),NOSPLIT,$-4 // If in external C code, we need to load the g register. BL runtime·load_g(SB) CMP $0, g diff --git a/src/runtime/cgo/signal_darwin_arm64.s b/src/runtime/cgo/signal_darwin_arm64.s index 75aefd4b95..17781cf496 100644 --- a/src/runtime/cgo/signal_darwin_arm64.s +++ b/src/runtime/cgo/signal_darwin_arm64.s @@ -4,13 +4,13 @@ #include "textflag.h" -// panicmem is the entrypoint for SIGSEGV as intercepted via a +// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a // mach thread port as EXC_BAD_ACCESS. As the segfault may have happened -// in C code, we first need to load_g then call panicmem. +// in C code, we first need to load_g then call xx_cgo_panicmem. // // R1 - LR at moment of fault // R2 - PC at moment of fault -TEXT ·panicmem(SB),NOSPLIT,$-8 +TEXT xx_cgo_panicmem(SB),NOSPLIT,$-8 // If in external C code, we need to load the g register. BL runtime·load_g(SB) CMP $0, g diff --git a/src/runtime/cgo/signal_darwin_armx.go b/src/runtime/cgo/signal_darwin_armx.go index 9f6741eb08..9f4b462415 100644 --- a/src/runtime/cgo/signal_darwin_armx.go +++ b/src/runtime/cgo/signal_darwin_armx.go @@ -7,29 +7,7 @@ package cgo -import "unsafe" +import _ "unsafe" -//go:cgo_import_static x_cgo_panicmem -//go:linkname x_cgo_panicmem x_cgo_panicmem -var x_cgo_panicmem uintptr - -// use a pointer to avoid relocation of external symbol in __TEXT -// make linker happy -var _cgo_panicmem = &x_cgo_panicmem - -// TODO(crawshaw): move this into x_cgo_init, it will not run until -// runtime has finished loading, which may be after its use. -func init() { - *_cgo_panicmem = funcPC(panicmem) -} - -func funcPC(f interface{}) uintptr { - var ptrSize = unsafe.Sizeof(uintptr(0)) - return **(**uintptr)(add(unsafe.Pointer(&f), ptrSize)) -} - -func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { - return unsafe.Pointer(uintptr(p) + x) -} - -func panicmem() +//go:cgo_export_static xx_cgo_panicmem xx_cgo_panicmem +func xx_cgo_panicmem() diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go index aa531b9020..b7c70c6fff 100644 --- a/src/runtime/cgo_mmap.go +++ b/src/runtime/cgo_mmap.go @@ -20,19 +20,21 @@ var _cgo_mmap unsafe.Pointer //go:linkname _cgo_munmap _cgo_munmap var _cgo_munmap unsafe.Pointer -func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer { +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) { if _cgo_mmap != nil { // Make ret a uintptr so that writing to it in the // function literal does not trigger a write barrier. // A write barrier here could break because of the way // that mmap uses the same value both as a pointer and // an errno value. - // TODO: Fix mmap to return two values. var ret uintptr systemstack(func() { ret = callCgoMmap(addr, n, prot, flags, fd, off) }) - return unsafe.Pointer(ret) + if ret < 4096 { + return nil, int(ret) + } + return unsafe.Pointer(ret), 0 } return sysMmap(addr, n, prot, flags, fd, off) } @@ -46,7 +48,7 @@ func munmap(addr unsafe.Pointer, n uintptr) { } // sysMmap calls the mmap system call. It is implemented in assembly. -func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer +func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) // callCgoMmap calls the mmap function in the runtime/cgo package // using the GCC calling convention. It is implemented in assembly. diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 755269ebd2..02c4cb3622 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -8,9 +8,9 @@ // runtime.cgocall(_cgo_Cfunc_f, frame), where _cgo_Cfunc_f is a // gcc-compiled function written by cgo. // -// runtime.cgocall (below) locks g to m, calls entersyscall -// so as not to block other goroutines or the garbage collector, -// and then calls runtime.asmcgocall(_cgo_Cfunc_f, frame). +// runtime.cgocall (below) calls entersyscall so as not to block +// other goroutines or the garbage collector, and then calls +// runtime.asmcgocall(_cgo_Cfunc_f, frame). // // runtime.asmcgocall (in asm_$GOARCH.s) switches to the m->g0 stack // (assumed to be an operating system-allocated stack, so safe to run @@ -104,13 +104,9 @@ func cgocall(fn, arg unsafe.Pointer) int32 { racereleasemerge(unsafe.Pointer(&racecgosync)) } - // Lock g to m to ensure we stay on the same stack if we do a - // cgo callback. In case of panic, unwindm calls endcgo. - lockOSThread() mp := getg().m mp.ncgocall++ mp.ncgo++ - mp.incgo = true // Reset traceback. mp.cgoCallers[0] = 0 @@ -130,7 +126,14 @@ func cgocall(fn, arg unsafe.Pointer) int32 { // and then re-enter the "system call" reusing the PC and SP // saved by entersyscall here. entersyscall(0) + + mp.incgo = true errno := asmcgocall(fn, arg) + + // Call endcgo before exitsyscall because exitsyscall may + // reschedule us on to a different M. + endcgo(mp) + exitsyscall(0) // From the garbage collector's perspective, time can move @@ -145,8 +148,8 @@ func cgocall(fn, arg unsafe.Pointer) int32 { // GC by forcing them to stay live across this time warp. KeepAlive(fn) KeepAlive(arg) + KeepAlive(mp) - endcgo(mp) return errno } @@ -158,8 +161,6 @@ func endcgo(mp *m) { if raceenabled { raceacquire(unsafe.Pointer(&racecgosync)) } - - unlockOSThread() // invalidates mp } // Call from C back to Go. @@ -171,6 +172,12 @@ func cgocallbackg(ctxt uintptr) { exit(2) } + // The call from C is on gp.m's g0 stack, so we must ensure + // that we stay on that M. We have to do this before calling + // exitsyscall, since it would otherwise be free to move us to + // a different M. The call to unlockOSThread is in unwindm. + lockOSThread() + // Save current syscall parameters, so m.syscall can be // used again if callback decide to make syscall. syscall := gp.m.syscall @@ -186,6 +193,10 @@ func cgocallbackg(ctxt uintptr) { cgocallbackg1(ctxt) + // At this point unlockOSThread has been called. + // The following code must not change to a different m. + // This is enforced by checking incgo in the schedule function. + gp.m.incgo = true // going back to cgo call reentersyscall(savedpc, uintptr(savedsp)) @@ -321,32 +332,35 @@ func cgocallbackg1(ctxt uintptr) { } func unwindm(restore *bool) { - if !*restore { - return - } - // Restore sp saved by cgocallback during - // unwind of g's stack (see comment at top of file). - mp := acquirem() - sched := &mp.g0.sched - switch GOARCH { - default: - throw("unwindm not implemented") - case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle": - sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize)) - case "arm64": - sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16)) - } + if *restore { + // Restore sp saved by cgocallback during + // unwind of g's stack (see comment at top of file). + mp := acquirem() + sched := &mp.g0.sched + switch GOARCH { + default: + throw("unwindm not implemented") + case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle": + sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize)) + case "arm64": + sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16)) + } - // Call endcgo to do the accounting that cgocall will not have a - // chance to do during an unwind. - // - // In the case where a a Go call originates from C, ncgo is 0 - // and there is no matching cgocall to end. - if mp.ncgo > 0 { - endcgo(mp) + // Call endcgo to do the accounting that cgocall will not have a + // chance to do during an unwind. + // + // In the case where a Go call originates from C, ncgo is 0 + // and there is no matching cgocall to end. + if mp.ncgo > 0 { + endcgo(mp) + } + + releasem(mp) } - releasem(mp) + // Undo the call to lockOSThread in cgocallbackg. + // We must still stay on the same m. + unlockOSThread() } // called from assembly @@ -580,10 +594,8 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { // No more possible pointers. break } - if hbits.isPointer() { - if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) { - panic(errorString(msg)) - } + if hbits.isPointer() && cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) { + panic(errorString(msg)) } hbits = hbits.next() } diff --git a/src/runtime/cgocheck.go b/src/runtime/cgocheck.go index 61aaa0a8f7..ea1ab974c3 100644 --- a/src/runtime/cgocheck.go +++ b/src/runtime/cgocheck.go @@ -16,6 +16,10 @@ const cgoWriteBarrierFail = "Go pointer stored into non-Go memory" // cgoCheckWriteBarrier is called whenever a pointer is stored into memory. // It throws if the program is storing a Go pointer into non-Go memory. +// +// This is called from the write barrier, so its entire call tree must +// be nosplit. +// //go:nosplit //go:nowritebarrier func cgoCheckWriteBarrier(dst *uintptr, src uintptr) { diff --git a/src/runtime/chan.go b/src/runtime/chan.go index 6294678d4a..41ae803574 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -55,11 +55,19 @@ type waitq struct { } //go:linkname reflect_makechan reflect.makechan -func reflect_makechan(t *chantype, size int64) *hchan { +func reflect_makechan(t *chantype, size int) *hchan { return makechan(t, size) } -func makechan(t *chantype, size int64) *hchan { +func makechan64(t *chantype, size int64) *hchan { + if int64(int(size)) != size { + panic(plainError("makechan: size out of range")) + } + + return makechan(t, int(size)) +} + +func makechan(t *chantype, size int) *hchan { elem := t.elem // compiler checks this but be safe. @@ -69,29 +77,33 @@ func makechan(t *chantype, size int64) *hchan { if hchanSize%maxAlign != 0 || elem.align > maxAlign { throw("makechan: bad alignment") } - if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) { + + if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize { panic(plainError("makechan: size out of range")) } + // Hchan does not contain pointers interesting for GC when elements stored in buf do not contain pointers. + // buf points into the same allocation, elemtype is persistent. + // SudoG's are referenced from their owning thread so they can't be collected. + // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. var c *hchan - if elem.kind&kindNoPointers != 0 || size == 0 { - // Allocate memory in one call. - // Hchan does not contain pointers interesting for GC in this case: - // buf points into the same allocation, elemtype is persistent. - // SudoG's are referenced from their owning thread so they can't be collected. - // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. + switch { + case size == 0 || elem.size == 0: + // Queue or element size is zero. + c = (*hchan)(mallocgc(hchanSize, nil, true)) + // Race detector uses this location for synchronization. + c.buf = unsafe.Pointer(c) + case elem.kind&kindNoPointers != 0: + // Elements do not contain pointers. + // Allocate hchan and buf in one call. c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true)) - if size > 0 && elem.size != 0 { - c.buf = add(unsafe.Pointer(c), hchanSize) - } else { - // race detector uses this location for synchronization - // Also prevents us from pointing beyond the allocation (see issue 9401). - c.buf = unsafe.Pointer(c) - } - } else { + c.buf = add(unsafe.Pointer(c), hchanSize) + default: + // Elements contain pointers. c = new(hchan) - c.buf = newarray(elem, int(size)) + c.buf = mallocgc(uintptr(size)*elem.size, elem, true) } + c.elemsize = uint16(elem.size) c.elemtype = elem c.dataqsiz = uint(size) @@ -110,7 +122,7 @@ func chanbuf(c *hchan, i uint) unsafe.Pointer { // entry point for c <- x from compiled code //go:nosplit func chansend1(c *hchan, elem unsafe.Pointer) { - chansend(c, elem, true, getcallerpc(unsafe.Pointer(&c))) + chansend(c, elem, true, getcallerpc()) } /* @@ -214,7 +226,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { mysg.elem = ep mysg.waitlink = nil mysg.g = gp - mysg.selectdone = nil + mysg.isSelect = false mysg.c = c gp.waiting = mysg gp.param = nil @@ -322,7 +334,7 @@ func closechan(c *hchan) { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&c)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(c), callerpc, funcPC(closechan)) racerelease(unsafe.Pointer(c)) } @@ -499,7 +511,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) mysg.waitlink = nil gp.waiting = mysg mysg.g = gp - mysg.selectdone = nil + mysg.isSelect = false mysg.c = c gp.param = nil c.recvq.enqueue(mysg) @@ -594,7 +606,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { // } // func selectnbsend(c *hchan, elem unsafe.Pointer) (selected bool) { - return chansend(c, elem, false, getcallerpc(unsafe.Pointer(&c))) + return chansend(c, elem, false, getcallerpc()) } // compiler implements @@ -644,7 +656,7 @@ func selectnbrecv2(elem unsafe.Pointer, received *bool, c *hchan) (selected bool //go:linkname reflect_chansend reflect.chansend func reflect_chansend(c *hchan, elem unsafe.Pointer, nb bool) (selected bool) { - return chansend(c, elem, !nb, getcallerpc(unsafe.Pointer(&c))) + return chansend(c, elem, !nb, getcallerpc()) } //go:linkname reflect_chanrecv reflect.chanrecv @@ -703,10 +715,16 @@ func (q *waitq) dequeue() *sudog { sgp.next = nil // mark as removed (see dequeueSudog) } - // if sgp participates in a select and is already signaled, ignore it - if sgp.selectdone != nil { - // claim the right to signal - if *sgp.selectdone != 0 || !atomic.Cas(sgp.selectdone, 0, 1) { + // if a goroutine was put on this queue because of a + // select, there is a small window between the goroutine + // being woken up by a different case and it grabbing the + // channel locks. Once it has the lock + // it removes itself from the queue, so we won't see it after that. + // We use a flag in the G struct to tell us when someone + // else has won the race to signal this goroutine but the goroutine + // hasn't removed itself from the queue yet. + if sgp.isSelect { + if !atomic.Cas(&sgp.g.selectDone, 0, 1) { continue } } diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go index 0c94cf1a63..b6188f5e87 100644 --- a/src/runtime/chan_test.go +++ b/src/runtime/chan_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "internal/testenv" "math" "runtime" "sync" @@ -433,6 +434,9 @@ func TestSelectStress(t *testing.T) { func TestSelectFairness(t *testing.T) { const trials = 10000 + if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64le" { + testenv.SkipFlaky(t, 22047) + } c1 := make(chan byte, trials+1) c2 := make(chan byte, trials+1) for i := 0; i < trials+1; i++ { @@ -726,6 +730,55 @@ done: <-ready2 } +type struct0 struct{} + +func BenchmarkMakeChan(b *testing.B) { + b.Run("Byte", func(b *testing.B) { + var x chan byte + for i := 0; i < b.N; i++ { + x = make(chan byte, 8) + } + close(x) + }) + b.Run("Int", func(b *testing.B) { + var x chan int + for i := 0; i < b.N; i++ { + x = make(chan int, 8) + } + close(x) + }) + b.Run("Ptr", func(b *testing.B) { + var x chan *byte + for i := 0; i < b.N; i++ { + x = make(chan *byte, 8) + } + close(x) + }) + b.Run("Struct", func(b *testing.B) { + b.Run("0", func(b *testing.B) { + var x chan struct0 + for i := 0; i < b.N; i++ { + x = make(chan struct0, 8) + } + close(x) + }) + b.Run("32", func(b *testing.B) { + var x chan struct32 + for i := 0; i < b.N; i++ { + x = make(chan struct32, 8) + } + close(x) + }) + b.Run("40", func(b *testing.B) { + var x chan struct40 + for i := 0; i < b.N; i++ { + x = make(chan struct40, 8) + } + close(x) + }) + }) +} + func BenchmarkChanNonblocking(b *testing.B) { myc := make(chan int) b.RunParallel(func(pb *testing.PB) { diff --git a/src/runtime/cputicks.go b/src/runtime/cputicks.go index ccc3947bb2..de97d5b6fa 100644 --- a/src/runtime/cputicks.go +++ b/src/runtime/cputicks.go @@ -11,6 +11,6 @@ package runtime -// careful: cputicks is not guaranteed to be monotonic! In particular, we have +// careful: cputicks is not guaranteed to be monotonic! In particular, we have // noticed drift between cpus on certain os/arch combinations. See issue 8976. func cputicks() int64 diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go index a5cbbad69b..50b634dda4 100644 --- a/src/runtime/crash_cgo_test.go +++ b/src/runtime/crash_cgo_test.go @@ -13,6 +13,7 @@ import ( "os" "os/exec" "runtime" + "strconv" "strings" "testing" "time" @@ -113,7 +114,7 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() if err != nil { t.Fatalf("exit status: %v\n%s", err, got) } @@ -136,7 +137,7 @@ func TestCgoExternalThreadSignal(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput() if err != nil { t.Fatalf("exit status: %v\n%s", err, got) } @@ -203,14 +204,14 @@ func TestCgoCheckBytes(t *testing.T) { const tries = 10 var tot1, tot2 time.Duration for i := 0; i < tries; i++ { - cmd := testEnv(exec.Command(exe, "CgoCheckBytes")) + cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes")) cmd.Env = append(cmd.Env, "GODEBUG=cgocheck=0", fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i)) start := time.Now() cmd.Run() d1 := time.Since(start) - cmd = testEnv(exec.Command(exe, "CgoCheckBytes")) + cmd = testenv.CleanCmdEnv(exec.Command(exe, "CgoCheckBytes")) cmd.Env = append(cmd.Env, fmt.Sprintf("GO_CGOCHECKBYTES_TRY=%d", i)) start = time.Now() @@ -251,7 +252,7 @@ func TestCgoCCodeSIGPROF(t *testing.T) { func TestCgoCrashTraceback(t *testing.T) { t.Parallel() - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } got := runTestProg(t, "testprogcgo", "CrashTraceback") @@ -273,7 +274,7 @@ func TestCgoTracebackContext(t *testing.T) { func testCgoPprof(t *testing.T, buildArg, runArg string) { t.Parallel() - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } testenv.MustHaveGoRun(t) @@ -283,7 +284,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, runArg)).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, runArg)).CombinedOutput() if err != nil { if testenv.Builder() == "linux-amd64-alpine" { // See Issue 18243 and Issue 19938. @@ -295,7 +296,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { defer os.Remove(fn) for try := 0; try < 2; try++ { - cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1")) + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1")) // Check that pprof works both with and without explicit executable on command line. if try == 0 { cmd.Args = append(cmd.Args, exe, fn) @@ -330,7 +331,7 @@ func TestCgoPprof(t *testing.T) { } func TestCgoPprofPIE(t *testing.T) { - testCgoPprof(t, "-ldflags=-extldflags=-pie", "CgoPprof") + testCgoPprof(t, "-buildmode=pie", "CgoPprof") } func TestCgoPprofThread(t *testing.T) { @@ -359,7 +360,7 @@ func TestRaceProf(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput() if err != nil { t.Fatal(err) } @@ -388,7 +389,7 @@ func TestRaceSignal(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput() if err != nil { t.Logf("%s\n", got) t.Fatal(err) @@ -411,3 +412,72 @@ func TestCgoNumGoroutine(t *testing.T) { t.Errorf("expected %q got %v", want, got) } } + +func TestCatchPanic(t *testing.T) { + t.Parallel() + switch runtime.GOOS { + case "plan9", "windows": + t.Skipf("no signals on %s", runtime.GOOS) + case "darwin": + if runtime.GOARCH == "amd64" { + t.Skipf("crash() on darwin/amd64 doesn't raise SIGABRT") + } + } + + testenv.MustHaveGoRun(t) + + exe, err := buildTestProg(t, "testprogcgo") + if err != nil { + t.Fatal(err) + } + + for _, early := range []bool{true, false} { + cmd := testenv.CleanCmdEnv(exec.Command(exe, "CgoCatchPanic")) + // Make sure a panic results in a crash. + cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") + if early { + // Tell testprogcgo to install an early signal handler for SIGABRT + cmd.Env = append(cmd.Env, "CGOCATCHPANIC_EARLY_HANDLER=1") + } + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("testprogcgo CgoCatchPanic failed: %v\n%s", err, out) + } + } +} + +func TestCgoLockOSThreadExit(t *testing.T) { + switch runtime.GOOS { + case "plan9", "windows": + t.Skipf("no pthreads on %s", runtime.GOOS) + } + t.Parallel() + testLockOSThreadExit(t, "testprogcgo") +} + +func TestWindowsStackMemoryCgo(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("skipping windows specific test") + } + testenv.SkipFlaky(t, 22575) + o := runTestProg(t, "testprogcgo", "StackMemory") + stackUsage, err := strconv.Atoi(o) + if err != nil { + t.Fatalf("Failed to read stack usage: %v", err) + } + if expected, got := 100<<10, stackUsage; got > expected { + t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got) + } +} + +func TestSigStackSwapping(t *testing.T) { + switch runtime.GOOS { + case "plan9", "windows": + t.Skip("no sigaltstack on %s", runtime.GOOS) + } + t.Parallel() + got := runTestProg(t, "testprogcgo", "SigStack") + want := "OK\n" + if got != want { + t.Errorf("expected %q got %v", want, got) + } +} diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 7753809d45..9588ddd4de 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -32,25 +32,6 @@ func TestMain(m *testing.M) { os.Exit(status) } -func testEnv(cmd *exec.Cmd) *exec.Cmd { - if cmd.Env != nil { - panic("environment already set") - } - for _, env := range os.Environ() { - // Exclude GODEBUG from the environment to prevent its output - // from breaking tests that are trying to parse other command output. - if strings.HasPrefix(env, "GODEBUG=") { - continue - } - // Exclude GOTRACEBACK for the same reason. - if strings.HasPrefix(env, "GOTRACEBACK=") { - continue - } - cmd.Env = append(cmd.Env, env) - } - return cmd -} - var testprog struct { sync.Mutex dir string @@ -62,7 +43,11 @@ type buildexe struct { err error } -func runTestProg(t *testing.T, binary, name string) string { +func runTestProg(t *testing.T, binary, name string, env ...string) string { + if *flagQuick { + t.Skip("-quick") + } + testenv.MustHaveGoBuild(t) exe, err := buildTestProg(t, binary) @@ -70,7 +55,11 @@ func runTestProg(t *testing.T, binary, name string) string { t.Fatal(err) } - cmd := testEnv(exec.Command(exe, name)) + cmd := testenv.CleanCmdEnv(exec.Command(exe, name)) + cmd.Env = append(cmd.Env, env...) + if testing.Short() { + cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1") + } var b bytes.Buffer cmd.Stdout = &b cmd.Stderr = &b @@ -111,6 +100,10 @@ func runTestProg(t *testing.T, binary, name string) string { } func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) { + if *flagQuick { + t.Skip("-quick") + } + checkStaleRuntime(t) testprog.Lock() @@ -139,7 +132,7 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) exe := filepath.Join(testprog.dir, name+".exe") cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...) cmd.Dir = "testdata/" + binary - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out) testprog.target[name] = target @@ -158,14 +151,14 @@ var ( func checkStaleRuntime(t *testing.T) { staleRuntimeOnce.Do(func() { // 'go run' uses the installed copy of runtime.a, which may be out of date. - out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput() + out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput() if err != nil { staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out)) return } if string(out) != "false\n" { t.Logf("go list -f {{.Stale}} runtime:\n%s", out) - out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.StaleReason}}", "runtime")).CombinedOutput() + out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput() if err != nil { t.Logf("go list -f {{.StaleReason}} failed: %v", err) } @@ -468,7 +461,7 @@ func TestMemPprof(t *testing.T) { t.Fatal(err) } - got, err := testEnv(exec.Command(exe, "MemProf")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "MemProf")).CombinedOutput() if err != nil { t.Fatal(err) } @@ -476,7 +469,7 @@ func TestMemPprof(t *testing.T) { defer os.Remove(fn) for try := 0; try < 2; try++ { - cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top")) + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top")) // Check that pprof works both with and without explicit executable on command line. if try == 0 { cmd.Args = append(cmd.Args, exe, fn) @@ -586,7 +579,7 @@ func TestPanicRace(t *testing.T) { const tries = 10 retry: for i := 0; i < tries; i++ { - got, err := testEnv(exec.Command(exe, "PanicRace")).CombinedOutput() + got, err := testenv.CleanCmdEnv(exec.Command(exe, "PanicRace")).CombinedOutput() if err == nil { t.Logf("try %d: program exited successfully, should have failed", i+1) continue diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go index cbaa1f65fe..af9e6430da 100644 --- a/src/runtime/crash_unix_test.go +++ b/src/runtime/crash_unix_test.go @@ -65,13 +65,13 @@ func TestCrashDumpsAllThreads(t *testing.T) { cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") cmd.Dir = dir - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source: %v\n%s", err, out) } cmd = exec.Command(filepath.Join(dir, "a.exe")) - cmd = testEnv(cmd) + cmd = testenv.CleanCmdEnv(cmd) cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") // Set GOGC=off. Because of golang.org/issue/10958, the tight @@ -184,7 +184,7 @@ func TestPanicSystemstack(t *testing.T) { t.Parallel() cmd := exec.Command(os.Args[0], "testPanicSystemstackInternal") - cmd = testEnv(cmd) + cmd = testenv.CleanCmdEnv(cmd) cmd.Env = append(cmd.Env, "GOTRACEBACK=crash") pr, pw, err := os.Pipe() if err != nil { @@ -249,7 +249,7 @@ func TestSignalExitStatus(t *testing.T) { if err != nil { t.Fatal(err) } - err = testEnv(exec.Command(exe, "SignalExitStatus")).Run() + err = testenv.CleanCmdEnv(exec.Command(exe, "SignalExitStatus")).Run() if err == nil { t.Error("test program succeeded unexpectedly") } else if ee, ok := err.(*exec.ExitError); !ok { diff --git a/src/runtime/debug.go b/src/runtime/debug.go index 0e798fc6f5..feacfb6026 100644 --- a/src/runtime/debug.go +++ b/src/runtime/debug.go @@ -15,9 +15,6 @@ import ( // The number of logical CPUs on the local machine can be queried with NumCPU. // This call will go away when the scheduler improves. func GOMAXPROCS(n int) int { - if n > _MaxGomaxprocs { - n = _MaxGomaxprocs - } lock(&sched.lock) ret := int(gomaxprocs) unlock(&sched.lock) diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go index 66f07ce5a5..c26f417a02 100644 --- a/src/runtime/defs1_netbsd_386.go +++ b/src/runtime/defs1_netbsd_386.go @@ -79,6 +79,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = 0x0 _EVFILT_WRITE = 0x1 ) diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go index 9e314718f3..0704cd4fb3 100644 --- a/src/runtime/defs1_netbsd_amd64.go +++ b/src/runtime/defs1_netbsd_amd64.go @@ -79,6 +79,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = 0x0 _EVFILT_WRITE = 0x1 ) diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go index db8e4c63fc..d2a13ad4b0 100644 --- a/src/runtime/defs1_netbsd_arm.go +++ b/src/runtime/defs1_netbsd_arm.go @@ -79,6 +79,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = 0x0 _EVFILT_WRITE = 0x1 ) diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go index 78df4e7ac8..f7d65e700d 100644 --- a/src/runtime/defs_darwin.go +++ b/src/runtime/defs_darwin.go @@ -139,6 +139,7 @@ const ( EV_CLEAR = C.EV_CLEAR EV_RECEIPT = C.EV_RECEIPT EV_ERROR = C.EV_ERROR + EV_EOF = C.EV_EOF EVFILT_READ = C.EVFILT_READ EVFILT_WRITE = C.EVFILT_WRITE ) diff --git a/src/runtime/defs_darwin_386.go b/src/runtime/defs_darwin_386.go index 1a5967b24b..f6dbcc519c 100644 --- a/src/runtime/defs_darwin_386.go +++ b/src/runtime/defs_darwin_386.go @@ -118,6 +118,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go index a4ab090d51..245fe158c7 100644 --- a/src/runtime/defs_darwin_amd64.go +++ b/src/runtime/defs_darwin_amd64.go @@ -118,6 +118,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_darwin_arm.go b/src/runtime/defs_darwin_arm.go index 3f8dbbf254..f89aee6775 100644 --- a/src/runtime/defs_darwin_arm.go +++ b/src/runtime/defs_darwin_arm.go @@ -120,6 +120,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_darwin_arm64.go b/src/runtime/defs_darwin_arm64.go index c25a41b749..a0ca7f1703 100644 --- a/src/runtime/defs_darwin_arm64.go +++ b/src/runtime/defs_darwin_arm64.go @@ -118,6 +118,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_dragonfly.go b/src/runtime/defs_dragonfly.go index ed00be0f44..95014fe6e7 100644 --- a/src/runtime/defs_dragonfly.go +++ b/src/runtime/defs_dragonfly.go @@ -103,6 +103,7 @@ const ( EV_DELETE = C.EV_DELETE EV_CLEAR = C.EV_CLEAR EV_ERROR = C.EV_ERROR + EV_EOF = C.EV_EOF EVFILT_READ = C.EVFILT_READ EVFILT_WRITE = C.EVFILT_WRITE ) diff --git a/src/runtime/defs_dragonfly_amd64.go b/src/runtime/defs_dragonfly_amd64.go index fc70103286..c30da805cc 100644 --- a/src/runtime/defs_dragonfly_amd64.go +++ b/src/runtime/defs_dragonfly_amd64.go @@ -82,6 +82,7 @@ const ( _EV_DELETE = 0x2 _EV_CLEAR = 0x20 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go index 0a11d09db2..9d55111786 100644 --- a/src/runtime/defs_freebsd.go +++ b/src/runtime/defs_freebsd.go @@ -125,6 +125,7 @@ const ( EV_CLEAR = C.EV_CLEAR EV_RECEIPT = C.EV_RECEIPT EV_ERROR = C.EV_ERROR + EV_EOF = C.EV_EOF EVFILT_READ = C.EVFILT_READ EVFILT_WRITE = C.EVFILT_WRITE ) diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go index 92b05503a3..49bcbb12a2 100644 --- a/src/runtime/defs_freebsd_386.go +++ b/src/runtime/defs_freebsd_386.go @@ -95,6 +95,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go index 645e2053f2..0e1c6752d6 100644 --- a/src/runtime/defs_freebsd_amd64.go +++ b/src/runtime/defs_freebsd_amd64.go @@ -95,6 +95,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go index c8a198fb4a..71684fe9f8 100644 --- a/src/runtime/defs_freebsd_arm.go +++ b/src/runtime/defs_freebsd_arm.go @@ -95,6 +95,7 @@ const ( _EV_CLEAR = 0x20 _EV_RECEIPT = 0x40 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go index 56db1f0159..41aa07af98 100644 --- a/src/runtime/defs_netbsd.go +++ b/src/runtime/defs_netbsd.go @@ -105,6 +105,7 @@ const ( EV_CLEAR = C.EV_CLEAR EV_RECEIPT = 0 EV_ERROR = C.EV_ERROR + EV_EOF = C.EV_EOF EVFILT_READ = C.EVFILT_READ EVFILT_WRITE = C.EVFILT_WRITE ) diff --git a/src/runtime/defs_openbsd.go b/src/runtime/defs_openbsd.go index 7e721504e6..9ff13dfcbf 100644 --- a/src/runtime/defs_openbsd.go +++ b/src/runtime/defs_openbsd.go @@ -100,6 +100,7 @@ const ( EV_DELETE = C.EV_DELETE EV_CLEAR = C.EV_CLEAR EV_ERROR = C.EV_ERROR + EV_EOF = C.EV_EOF EVFILT_READ = C.EVFILT_READ EVFILT_WRITE = C.EVFILT_WRITE ) diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go index ce08111dea..1185530964 100644 --- a/src/runtime/defs_openbsd_386.go +++ b/src/runtime/defs_openbsd_386.go @@ -80,6 +80,7 @@ const ( _EV_DELETE = 0x2 _EV_CLEAR = 0x20 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go index ea0709809a..4bb8eac08f 100644 --- a/src/runtime/defs_openbsd_amd64.go +++ b/src/runtime/defs_openbsd_amd64.go @@ -80,6 +80,7 @@ const ( _EV_DELETE = 0x2 _EV_CLEAR = 0x20 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/defs_openbsd_arm.go b/src/runtime/defs_openbsd_arm.go index b0fb639c72..38b77c92d0 100644 --- a/src/runtime/defs_openbsd_arm.go +++ b/src/runtime/defs_openbsd_arm.go @@ -80,6 +80,7 @@ const ( _EV_DELETE = 0x2 _EV_CLEAR = 0x20 _EV_ERROR = 0x4000 + _EV_EOF = 0x8000 _EVFILT_READ = -0x1 _EVFILT_WRITE = -0x2 ) diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s index a1112a4b59..44dc75d297 100644 --- a/src/runtime/duff_amd64.s +++ b/src/runtime/duff_amd64.s @@ -9,97 +9,97 @@ TEXT runtime·duffzero(SB), NOSPLIT, $0-0 MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) MOVUPS X0,48(DI) - ADDQ $64,DI + LEAQ 64(DI),DI RET diff --git a/src/runtime/duff_arm64.s b/src/runtime/duff_arm64.s index 60a0e26cd3..21619ff910 100644 --- a/src/runtime/duff_arm64.s +++ b/src/runtime/duff_arm64.s @@ -5,134 +5,70 @@ #include "textflag.h" TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) - MOVD.W ZR, 8(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP.P (ZR, ZR), 16(R16) + STP (ZR, ZR), (R16) RET TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 diff --git a/src/runtime/error.go b/src/runtime/error.go index eafcc9b173..16f3e53a47 100644 --- a/src/runtime/error.go +++ b/src/runtime/error.go @@ -126,34 +126,31 @@ func printany(i interface{}) { //go:linkname stringsIndexByte strings.IndexByte func stringsIndexByte(s string, c byte) int -// called from generated code +// panicwrap generates a panic for a call to a wrapped value method +// with a nil pointer receiver. +// +// It is called from the generated wrapper code. func panicwrap() { - pc := make([]uintptr, 1) - n := Callers(2, pc) - if n == 0 { - throw("panicwrap: Callers failed") - } - frames := CallersFrames(pc) - frame, _ := frames.Next() - name := frame.Function + pc := getcallerpc() + name := funcname(findfunc(pc)) // name is something like "main.(*T).F". // We want to extract pkg ("main"), typ ("T"), and meth ("F"). // Do it by finding the parens. i := stringsIndexByte(name, '(') if i < 0 { - throw("panicwrap: no ( in " + frame.Function) + throw("panicwrap: no ( in " + name) } pkg := name[:i-1] if i+2 >= len(name) || name[i-1:i+2] != ".(*" { - throw("panicwrap: unexpected string after package name: " + frame.Function) + throw("panicwrap: unexpected string after package name: " + name) } name = name[i+2:] i = stringsIndexByte(name, ')') if i < 0 { - throw("panicwrap: no ) in " + frame.Function) + throw("panicwrap: no ) in " + name) } if i+2 >= len(name) || name[i:i+2] != ")." { - throw("panicwrap: unexpected string after type name: " + frame.Function) + throw("panicwrap: unexpected string after type name: " + name) } typ := name[:i] meth := name[i+2:] diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index c929bd4618..385c569ed8 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -152,12 +152,19 @@ func RunSchedLocalQueueEmptyTest(iters int) { } } -var StringHash = stringHash -var BytesHash = bytesHash -var Int32Hash = int32Hash -var Int64Hash = int64Hash -var EfaceHash = efaceHash -var IfaceHash = ifaceHash +var ( + StringHash = stringHash + BytesHash = bytesHash + Int32Hash = int32Hash + Int64Hash = int64Hash + MemHash = memhash + MemHash32 = memhash32 + MemHash64 = memhash64 + EfaceHash = efaceHash + IfaceHash = ifaceHash +) + +var UseAeshash = &useAeshash func MemclrBytes(b []byte) { s := (*slice)(unsafe.Pointer(&b)) @@ -369,3 +376,40 @@ func (rw *RWMutex) Lock() { func (rw *RWMutex) Unlock() { rw.rw.unlock() } + +func MapBucketsCount(m map[int]int) int { + h := *(**hmap)(unsafe.Pointer(&m)) + return 1 << h.B +} + +func MapBucketsPointerIsNil(m map[int]int) bool { + h := *(**hmap)(unsafe.Pointer(&m)) + return h.buckets == nil +} + +func LockOSCounts() (external, internal uint32) { + g := getg() + if g.m.lockedExt+g.m.lockedInt == 0 { + if g.lockedm != 0 { + panic("lockedm on non-locked goroutine") + } + } else { + if g.lockedm == 0 { + panic("nil lockedm on locked goroutine") + } + } + return g.m.lockedExt, g.m.lockedInt +} + +//go:noinline +func TracebackSystemstack(stk []uintptr, i int) int { + if i == 0 { + pc, sp := getcallerpc(), getcallersp(unsafe.Pointer(&stk)) + return gentraceback(pc, sp, 0, getg(), 0, &stk[0], len(stk), nil, nil, _TraceJumpStack) + } + n := 0 + systemstack(func() { + n = TracebackSystemstack(stk, i-1) + }) + return n +} diff --git a/src/runtime/extern.go b/src/runtime/extern.go index 6e6c674d96..2c20e0d8af 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -178,11 +178,11 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) { // We asked for one extra, so skip that one. If this is sigpanic, // stepping over this frame will set up state in Frames so the // next frame is correct. - callers, _, ok = stackExpander.next(callers) + callers, _, ok = stackExpander.next(callers, true) if !ok { return } - _, frame, _ := stackExpander.next(callers) + _, frame, _ := stackExpander.next(callers, true) pc = frame.PC file = frame.File line = frame.Line @@ -212,8 +212,8 @@ func Callers(skip int, pc []uintptr) int { return callers(skip, pc) } -// GOROOT returns the root of the Go tree. -// It uses the GOROOT environment variable, if set, +// GOROOT returns the root of the Go tree. It uses the +// GOROOT environment variable, if set at process start, // or else the root used during the Go build. func GOROOT() string { s := gogetenv("GOROOT") diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0620f2d61e..1b1db25b17 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -10,6 +10,7 @@ import ( "reflect" "runtime" "runtime/debug" + "sync/atomic" "testing" "time" "unsafe" @@ -515,3 +516,126 @@ func TestUserForcedGC(t *testing.T) { t.Fatalf("runtime.GC() was not accounted in NumForcedGC") } } + +func writeBarrierBenchmark(b *testing.B, f func()) { + runtime.GC() + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + //b.Logf("heap size: %d MB", ms.HeapAlloc>>20) + + // Keep GC running continuously during the benchmark, which in + // turn keeps the write barrier on continuously. + var stop uint32 + done := make(chan bool) + go func() { + for atomic.LoadUint32(&stop) == 0 { + runtime.GC() + } + close(done) + }() + defer func() { + atomic.StoreUint32(&stop, 1) + <-done + }() + + b.ResetTimer() + f() + b.StopTimer() +} + +func BenchmarkWriteBarrier(b *testing.B) { + if runtime.GOMAXPROCS(-1) < 2 { + // We don't want GC to take our time. + b.Skip("need GOMAXPROCS >= 2") + } + + // Construct a large tree both so the GC runs for a while and + // so we have a data structure to manipulate the pointers of. + type node struct { + l, r *node + } + var wbRoots []*node + var mkTree func(level int) *node + mkTree = func(level int) *node { + if level == 0 { + return nil + } + n := &node{mkTree(level - 1), mkTree(level - 1)} + if level == 10 { + // Seed GC with enough early pointers so it + // doesn't accidentally switch to mark 2 when + // it only has the top of the tree. + wbRoots = append(wbRoots, n) + } + return n + } + const depth = 22 // 64 MB + root := mkTree(22) + + writeBarrierBenchmark(b, func() { + var stack [depth]*node + tos := -1 + + // There are two write barriers per iteration, so i+=2. + for i := 0; i < b.N; i += 2 { + if tos == -1 { + stack[0] = root + tos = 0 + } + + // Perform one step of reversing the tree. + n := stack[tos] + if n.l == nil { + tos-- + } else { + n.l, n.r = n.r, n.l + stack[tos] = n.l + stack[tos+1] = n.r + tos++ + } + + if i%(1<<12) == 0 { + // Avoid non-preemptible loops (see issue #10958). + runtime.Gosched() + } + } + }) + + runtime.KeepAlive(wbRoots) +} + +func BenchmarkBulkWriteBarrier(b *testing.B) { + if runtime.GOMAXPROCS(-1) < 2 { + // We don't want GC to take our time. + b.Skip("need GOMAXPROCS >= 2") + } + + // Construct a large set of objects we can copy around. + const heapSize = 64 << 20 + type obj [16]*byte + ptrs := make([]*obj, heapSize/unsafe.Sizeof(obj{})) + for i := range ptrs { + ptrs[i] = new(obj) + } + + writeBarrierBenchmark(b, func() { + const blockSize = 1024 + var pos int + for i := 0; i < b.N; i += blockSize { + // Rotate block. + block := ptrs[pos : pos+blockSize] + first := block[0] + copy(block, block[1:]) + block[blockSize-1] = first + + pos += blockSize + if pos+blockSize > len(ptrs) { + pos = 0 + } + + runtime.Gosched() + } + }) + + runtime.KeepAlive(ptrs) +} diff --git a/src/runtime/hash32.go b/src/runtime/hash32.go index be59076635..5574923911 100644 --- a/src/runtime/hash32.go +++ b/src/runtime/hash32.go @@ -81,6 +81,32 @@ tail: return uintptr(h) } +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + h := uint32(seed + 4*hashkey[0]) + h ^= readUnaligned32(p) + h = rotl_15(h*m1) * m2 + h ^= h >> 17 + h *= m3 + h ^= h >> 13 + h *= m4 + h ^= h >> 16 + return uintptr(h) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + h := uint32(seed + 8*hashkey[0]) + h ^= readUnaligned32(p) + h = rotl_15(h*m1) * m2 + h ^= readUnaligned32(add(p, 4)) + h = rotl_15(h*m1) * m2 + h ^= h >> 17 + h *= m3 + h ^= h >> 13 + h *= m4 + h ^= h >> 16 + return uintptr(h) +} + // Note: in order to get the compiler to issue rotl instructions, we // need to constant fold the shift amount by hand. // TODO: convince the compiler to issue rotl instructions after inlining. diff --git a/src/runtime/hash64.go b/src/runtime/hash64.go index d61f114475..3cf3f4629b 100644 --- a/src/runtime/hash64.go +++ b/src/runtime/hash64.go @@ -81,6 +81,28 @@ tail: return uintptr(h) } +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + h := uint64(seed + 4*hashkey[0]) + v := uint64(readUnaligned32(p)) + h ^= v + h ^= v << 32 + h = rotl_31(h*m1) * m2 + h ^= h >> 29 + h *= m3 + h ^= h >> 32 + return uintptr(h) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + h := uint64(seed + 8*hashkey[0]) + h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32 + h = rotl_31(h*m1) * m2 + h ^= h >> 29 + h *= m3 + h ^= h >> 32 + return uintptr(h) +} + // Note: in order to get the compiler to issue rotl instructions, we // need to constant fold the shift amount by hand. // TODO: convince the compiler to issue rotl instructions after inlining. diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go index a6f3cdbdbe..1400579cda 100644 --- a/src/runtime/hash_test.go +++ b/src/runtime/hash_test.go @@ -14,6 +14,40 @@ import ( "unsafe" ) +func TestMemHash32Equality(t *testing.T) { + if *UseAeshash { + t.Skip("skipping since AES hash implementation is used") + } + var b [4]byte + r := rand.New(rand.NewSource(1234)) + seed := uintptr(r.Uint64()) + for i := 0; i < 100; i++ { + randBytes(r, b[:]) + got := MemHash32(unsafe.Pointer(&b), seed) + want := MemHash(unsafe.Pointer(&b), seed, 4) + if got != want { + t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want) + } + } +} + +func TestMemHash64Equality(t *testing.T) { + if *UseAeshash { + t.Skip("skipping since AES hash implementation is used") + } + var b [8]byte + r := rand.New(rand.NewSource(1234)) + seed := uintptr(r.Uint64()) + for i := 0; i < 100; i++ { + randBytes(r, b[:]) + got := MemHash64(unsafe.Pointer(&b), seed) + want := MemHash(unsafe.Pointer(&b), seed, 8) + if got != want { + t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want) + } + } +} + // Smhasher is a torture test for hash functions. // https://code.google.com/p/smhasher/ // This code is a port of some of the Smhasher tests to Go. diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go index 11ce0cbc4b..dee5dd5816 100644 --- a/src/runtime/hashmap.go +++ b/src/runtime/hashmap.go @@ -64,8 +64,10 @@ const ( bucketCntBits = 3 bucketCnt = 1 << bucketCntBits - // Maximum average load of a bucket that triggers growth. - loadFactor = 6.5 + // Maximum average load of a bucket that triggers growth is 6.5. + // Represent as loadFactorNum/loadFactDen, to allow integer math. + loadFactorNum = 13 + loadFactorDen = 2 // Maximum key or value size to keep inline (instead of mallocing per element). // Must fit in a uint8. @@ -124,12 +126,13 @@ type mapextra struct { // If both key and value do not contain pointers and are inline, then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However, bmap.overflow is a pointer. In order to keep overflow buckets - // alive, we store pointers to all overflow buckets in hmap.overflow. - // Overflow is used only if key and value do not contain pointers. - // overflow[0] contains overflow buckets for hmap.buckets. - // overflow[1] contains overflow buckets for hmap.oldbuckets. + // alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow. + // overflow and oldoverflow are only used if key and value do not contain pointers. + // overflow contains overflow buckets for hmap.buckets. + // oldoverflow contains overflow buckets for hmap.oldbuckets. // The indirection allows to store a pointer to the slice in hiter. - overflow [2]*[]*bmap + overflow *[]*bmap + oldoverflow *[]*bmap // nextOverflow holds a pointer to a free overflow bucket. nextOverflow *bmap @@ -158,7 +161,8 @@ type hiter struct { h *hmap buckets unsafe.Pointer // bucket ptr at hash_iter initialization time bptr *bmap // current bucket - overflow [2]*[]*bmap // keeps overflow buckets alive + overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive + oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive startBucket uintptr // bucket iteration started at offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1) wrapped bool // already wrapped around from end of bucket array to beginning @@ -168,6 +172,28 @@ type hiter struct { checkBucket uintptr } +// bucketShift returns 1<<b, optimized for code generation. +func bucketShift(b uint8) uintptr { + if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 { + b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks + } + return uintptr(1) << b +} + +// bucketMask returns 1<<b - 1, optimized for code generation. +func bucketMask(b uint8) uintptr { + return bucketShift(b) - 1 +} + +// tophash calculates the tophash value for hash. +func tophash(hash uintptr) uint8 { + top := uint8(hash >> (sys.PtrSize*8 - 8)) + if top < minTopHash { + top += minTopHash + } + return top +} + func evacuated(b *bmap) bool { h := b.tophash[0] return h > empty && h < minTopHash @@ -181,6 +207,10 @@ func (b *bmap) setoverflow(t *maptype, ovf *bmap) { *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf } +func (b *bmap) keys() unsafe.Pointer { + return add(unsafe.Pointer(b), dataOffset) +} + // incrnoverflow increments h.noverflow. // noverflow counts the number of overflow buckets. // This is used to trigger same-size map growth. @@ -229,7 +259,7 @@ func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap { h.incrnoverflow() if t.bucket.kind&kindNoPointers != 0 { h.createOverflow() - *h.extra.overflow[0] = append(*h.extra.overflow[0], ovf) + *h.extra.overflow = append(*h.extra.overflow, ovf) } b.setoverflow(t, ovf) return ovf @@ -239,97 +269,69 @@ func (h *hmap) createOverflow() { if h.extra == nil { h.extra = new(mapextra) } - if h.extra.overflow[0] == nil { - h.extra.overflow[0] = new([]*bmap) + if h.extra.overflow == nil { + h.extra.overflow = new([]*bmap) } } -// makemap implements a Go map creation make(map[k]v, hint) +func makemap64(t *maptype, hint int64, h *hmap) *hmap { + if int64(int(hint)) != hint { + hint = 0 + } + return makemap(t, int(hint), h) +} + +// makehmap_small implements Go map creation for make(map[k]v) and +// make(map[k]v, hint) when hint is known to be at most bucketCnt +// at compile time and the map needs to be allocated on the heap. +func makemap_small() *hmap { + h := new(hmap) + h.hash0 = fastrand() + return h +} + +// makemap implements Go map creation for make(map[k]v, hint). // If the compiler has determined that the map or the first bucket // can be created on the stack, h and/or bucket may be non-nil. // If h != nil, the map can be created directly in h. -// If bucket != nil, bucket can be used as the first bucket. -func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap { - if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != t.hmap.size { +// If h.buckets != nil, bucket pointed to can be used as the first bucket. +func makemap(t *maptype, hint int, h *hmap) *hmap { + // The size of hmap should be 48 bytes on 64 bit + // and 28 bytes on 32 bit platforms. + if sz := unsafe.Sizeof(hmap{}); sz != 8+5*sys.PtrSize { println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size) throw("bad hmap size") } - if hint < 0 || hint > int64(maxSliceCap(t.bucket.size)) { + if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) { hint = 0 } - if !ismapkey(t.key) { - throw("runtime.makemap: unsupported map key type") - } - - // check compiler's and reflect's math - if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) || - t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) { - throw("key size wrong") - } - if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) || - t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) { - throw("value size wrong") - } - - // invariants we depend on. We should probably check these at compile time - // somewhere, but for now we'll do it here. - if t.key.align > bucketCnt { - throw("key align too big") - } - if t.elem.align > bucketCnt { - throw("value align too big") - } - if t.key.size%uintptr(t.key.align) != 0 { - throw("key size not a multiple of key align") - } - if t.elem.size%uintptr(t.elem.align) != 0 { - throw("value size not a multiple of value align") - } - if bucketCnt < 8 { - throw("bucketsize too small for proper alignment") - } - if dataOffset%uintptr(t.key.align) != 0 { - throw("need padding in bucket (key)") - } - if dataOffset%uintptr(t.elem.align) != 0 { - throw("need padding in bucket (value)") + // initialize Hmap + if h == nil { + h = (*hmap)(newobject(t.hmap)) } + h.hash0 = fastrand() // find size parameter which will hold the requested # of elements B := uint8(0) - for ; overLoadFactor(hint, B); B++ { + for overLoadFactor(hint, B) { + B++ } + h.B = B // allocate initial hash table // if B == 0, the buckets field is allocated lazily later (in mapassign) // If hint is large zeroing this memory could take a while. - buckets := bucket - var extra *mapextra - if B != 0 { + if h.B != 0 { var nextOverflow *bmap - buckets, nextOverflow = makeBucketArray(t, B) + h.buckets, nextOverflow = makeBucketArray(t, h.B) if nextOverflow != nil { - extra = new(mapextra) - extra.nextOverflow = nextOverflow + h.extra = new(mapextra) + h.extra.nextOverflow = nextOverflow } } - // initialize Hmap - if h == nil { - h = (*hmap)(newobject(t.hmap)) - } - h.count = 0 - h.B = B - h.extra = extra - h.flags = 0 - h.hash0 = fastrand() - h.buckets = buckets - h.oldbuckets = nil - h.nevacuate = 0 - h.noverflow = 0 - return h } @@ -340,7 +342,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap { // hold onto it for very long. func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() pc := funcPC(mapaccess1) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -356,7 +358,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { } alg := t.key.alg hash := alg.hash(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -368,11 +370,8 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -389,16 +388,13 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { return v } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) - } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() pc := funcPC(mapaccess2) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -414,7 +410,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) } alg := t.key.alg hash := alg.hash(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -426,11 +422,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -447,11 +440,8 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) return v, true } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false - } } + return unsafe.Pointer(&zeroVal[0]), false } // returns both key and value. Used by map iterator @@ -461,7 +451,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe } alg := t.key.alg hash := alg.hash(key, uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -473,11 +463,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -494,11 +481,8 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe return k, v } } - b = b.overflow(t) - if b == nil { - return nil, nil - } } + return nil, nil } func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer { @@ -523,7 +507,7 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() pc := funcPC(mapassign) racewritepc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -542,19 +526,16 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } + top := tophash(hash) var inserti *uint8 var insertk unsafe.Pointer @@ -594,7 +575,7 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } @@ -634,7 +615,7 @@ done: func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() pc := funcPC(mapdelete) racewritepc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) @@ -656,16 +637,14 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { // in which case we have not actually done a write (delete). h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + top := tophash(hash) +search: + for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue @@ -678,53 +657,44 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { if !alg.equal(key, k2) { continue } + // Only clear key if there are pointers in it. if t.indirectkey { *(*unsafe.Pointer)(k) = nil - } else { - typedmemclr(t.key, k) + } else if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) } - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize)) - if t.indirectvalue { - *(*unsafe.Pointer)(v) = nil - } else { - typedmemclr(t.elem, v) + // Only clear value if there are pointers in it. + if t.indirectvalue || t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + if t.indirectvalue { + *(*unsafe.Pointer)(v) = nil + } else { + memclrHasPointers(v, t.elem.size) + } } b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting } +// mapiterinit initializes the hiter struct used for ranging over maps. +// The hiter struct pointed to by 'it' is allocated on the stack +// by the compilers order pass or on the heap by reflect_mapiterinit. +// Both need to have zeroed hiter since the struct contains pointers. func mapiterinit(t *maptype, h *hmap, it *hiter) { - // Clear pointer fields so garbage collector does not complain. - it.key = nil - it.value = nil - it.t = nil - it.h = nil - it.buckets = nil - it.bptr = nil - it.overflow[0] = nil - it.overflow[1] = nil - if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit)) } if h == nil || h.count == 0 { - it.key = nil - it.value = nil return } @@ -744,6 +714,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { // while we are iterating. h.createOverflow() it.overflow = h.extra.overflow + it.oldoverflow = h.extra.oldoverflow } // decide where to start @@ -751,16 +722,14 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { if h.B > 31-bucketCntBits { r += uintptr(fastrand()) << 31 } - it.startBucket = r & (uintptr(1)<<h.B - 1) + it.startBucket = r & bucketMask(h.B) it.offset = uint8(r >> h.B & (bucketCnt - 1)) // iterator state it.bucket = it.startBucket - it.wrapped = false - it.bptr = nil // Remember we have an iterator. - // Can run concurrently with another hash_iter_init(). + // Can run concurrently with another mapiterinit(). if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator { atomic.Or8(&h.flags, iterator|oldIterator) } @@ -771,7 +740,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { func mapiternext(it *hiter) { h := it.h if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&it)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext)) } if h.flags&hashWriting != 0 { @@ -810,7 +779,7 @@ next: checkBucket = noCheck } bucket++ - if bucket == uintptr(1)<<it.B { + if bucket == bucketShift(it.B) { bucket = 0 it.wrapped = true } @@ -818,90 +787,75 @@ next: } for ; i < bucketCnt; i++ { offi := (i + it.offset) & (bucketCnt - 1) + if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty { + continue + } k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize)) + if t.indirectkey { + k = *((*unsafe.Pointer)(k)) + } v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize)) - if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty { - if checkBucket != noCheck && !h.sameSizeGrow() { - // Special case: iterator was started during a grow to a larger size - // and the grow is not done yet. We're working on a bucket whose - // oldbucket has not been evacuated yet. Or at least, it wasn't - // evacuated when we started the bucket. So we're iterating - // through the oldbucket, skipping any keys that will go - // to the other new bucket (each oldbucket expands to two - // buckets during a grow). - k2 := k - if t.indirectkey { - k2 = *((*unsafe.Pointer)(k2)) - } - if t.reflexivekey || alg.equal(k2, k2) { - // If the item in the oldbucket is not destined for - // the current new bucket in the iteration, skip it. - hash := alg.hash(k2, uintptr(h.hash0)) - if hash&(uintptr(1)<<it.B-1) != checkBucket { - continue - } - } else { - // Hash isn't repeatable if k != k (NaNs). We need a - // repeatable and randomish choice of which direction - // to send NaNs during evacuation. We'll use the low - // bit of tophash to decide which way NaNs go. - // NOTE: this case is why we need two evacuate tophash - // values, evacuatedX and evacuatedY, that differ in - // their low bit. - if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) { - continue - } - } - } - if b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY { - // this is the golden data, we can return it. - if t.indirectkey { - k = *((*unsafe.Pointer)(k)) - } - it.key = k - if t.indirectvalue { - v = *((*unsafe.Pointer)(v)) + if checkBucket != noCheck && !h.sameSizeGrow() { + // Special case: iterator was started during a grow to a larger size + // and the grow is not done yet. We're working on a bucket whose + // oldbucket has not been evacuated yet. Or at least, it wasn't + // evacuated when we started the bucket. So we're iterating + // through the oldbucket, skipping any keys that will go + // to the other new bucket (each oldbucket expands to two + // buckets during a grow). + if t.reflexivekey || alg.equal(k, k) { + // If the item in the oldbucket is not destined for + // the current new bucket in the iteration, skip it. + hash := alg.hash(k, uintptr(h.hash0)) + if hash&bucketMask(it.B) != checkBucket { + continue } - it.value = v } else { - // The hash table has grown since the iterator was started. - // The golden data for this key is now somewhere else. - k2 := k - if t.indirectkey { - k2 = *((*unsafe.Pointer)(k2)) - } - if t.reflexivekey || alg.equal(k2, k2) { - // Check the current hash table for the data. - // This code handles the case where the key - // has been deleted, updated, or deleted and reinserted. - // NOTE: we need to regrab the key as it has potentially been - // updated to an equal() but not identical key (e.g. +0.0 vs -0.0). - rk, rv := mapaccessK(t, h, k2) - if rk == nil { - continue // key has been deleted - } - it.key = rk - it.value = rv - } else { - // if key!=key then the entry can't be deleted or - // updated, so we can just return it. That's lucky for - // us because when key!=key we can't look it up - // successfully in the current table. - it.key = k2 - if t.indirectvalue { - v = *((*unsafe.Pointer)(v)) - } - it.value = v + // Hash isn't repeatable if k != k (NaNs). We need a + // repeatable and randomish choice of which direction + // to send NaNs during evacuation. We'll use the low + // bit of tophash to decide which way NaNs go. + // NOTE: this case is why we need two evacuate tophash + // values, evacuatedX and evacuatedY, that differ in + // their low bit. + if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) { + continue } } - it.bucket = bucket - if it.bptr != b { // avoid unnecessary write barrier; see issue 14921 - it.bptr = b + } + if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) || + !(t.reflexivekey || alg.equal(k, k)) { + // This is the golden data, we can return it. + // OR + // key!=key, so the entry can't be deleted or updated, so we can just return it. + // That's lucky for us because when key!=key we can't look it up successfully. + it.key = k + if t.indirectvalue { + v = *((*unsafe.Pointer)(v)) } - it.i = i + 1 - it.checkBucket = checkBucket - return + it.value = v + } else { + // The hash table has grown since the iterator was started. + // The golden data for this key is now somewhere else. + // Check the current hash table for the data. + // This code handles the case where the key + // has been deleted, updated, or deleted and reinserted. + // NOTE: we need to regrab the key as it has potentially been + // updated to an equal() but not identical key (e.g. +0.0 vs -0.0). + rk, rv := mapaccessK(t, h, k) + if rk == nil { + continue // key has been deleted + } + it.key = rk + it.value = rv + } + it.bucket = bucket + if it.bptr != b { // avoid unnecessary write barrier; see issue 14921 + it.bptr = b } + it.i = i + 1 + it.checkBucket = checkBucket + return } b = b.overflow(t) i = 0 @@ -909,7 +863,7 @@ next: } func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) { - base := uintptr(1 << b) + base := bucketShift(b) nbuckets := base // For small b, overflow buckets are unlikely. // Avoid the overhead of the calculation. @@ -917,7 +871,7 @@ func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow // Add on the estimated number of overflow buckets // required to insert the median number of elements // used with this value of b. - nbuckets += 1 << (b - 4) + nbuckets += bucketShift(b - 4) sz := t.bucket.size * nbuckets up := roundupsize(sz) if up != sz { @@ -943,7 +897,7 @@ func hashGrow(t *maptype, h *hmap) { // Otherwise, there are too many overflow buckets, // so keep the same number of buckets and "grow" laterally. bigger := uint8(1) - if !overLoadFactor(int64(h.count), h.B) { + if !overLoadFactor(h.count+1, h.B) { bigger = 0 h.flags |= sameSizeGrow } @@ -962,13 +916,13 @@ func hashGrow(t *maptype, h *hmap) { h.nevacuate = 0 h.noverflow = 0 - if h.extra != nil && h.extra.overflow[0] != nil { + if h.extra != nil && h.extra.overflow != nil { // Promote current overflow buckets to the old generation. - if h.extra.overflow[1] != nil { - throw("overflow is not nil") + if h.extra.oldoverflow != nil { + throw("oldoverflow is not nil") } - h.extra.overflow[1] = h.extra.overflow[0] - h.extra.overflow[0] = nil + h.extra.oldoverflow = h.extra.overflow + h.extra.overflow = nil } if nextOverflow != nil { if h.extra == nil { @@ -982,9 +936,8 @@ func hashGrow(t *maptype, h *hmap) { } // overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor. -func overLoadFactor(count int64, B uint8) bool { - // TODO: rewrite to use integer math and comparison? - return count >= bucketCnt && float32(count) >= loadFactor*float32((uint64(1)<<B)) +func overLoadFactor(count int, B uint8) bool { + return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen) } // tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets. @@ -995,10 +948,11 @@ func tooManyOverflowBuckets(noverflow uint16, B uint8) bool { // If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory. // "too many" means (approximately) as many overflow buckets as regular buckets. // See incrnoverflow for more details. - if B < 16 { - return noverflow >= uint16(1)<<B + if B > 15 { + B = 15 } - return noverflow >= 1<<15 + // The compiler doesn't see here that B < 16; mask B to generate shorter shift code. + return noverflow >= uint16(1)<<(B&15) } // growing reports whether h is growing. The growth may be to the same size or bigger. @@ -1017,7 +971,7 @@ func (h *hmap) noldbuckets() uintptr { if !h.sameSizeGrow() { oldB-- } - return uintptr(1) << oldB + return bucketShift(oldB) } // oldbucketmask provides a mask that can be applied to calculate n % noldbuckets(). @@ -1041,32 +995,37 @@ func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool { return evacuated(b) } +// evacDst is an evacuation destination. +type evacDst struct { + b *bmap // current destination bucket + i int // key/val index into b + k unsafe.Pointer // pointer to current key storage + v unsafe.Pointer // pointer to current value storage +} + func evacuate(t *maptype, h *hmap, oldbucket uintptr) { b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) newbit := h.noldbuckets() - alg := t.key.alg if !evacuated(b) { // TODO: reuse overflow buckets instead of using new ones, if there // is no iterator using the old buckets. (If !oldIterator.) - var ( - x, y *bmap // current low/high buckets in new map - xi, yi int // key/val indices into x and y - xk, yk unsafe.Pointer // pointers to current x and y key storage - xv, yv unsafe.Pointer // pointers to current x and y value storage - ) - x = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) - xi = 0 - xk = add(unsafe.Pointer(x), dataOffset) - xv = add(xk, bucketCnt*uintptr(t.keysize)) + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*uintptr(t.keysize)) + if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. // Otherwise GC can see bad pointers. - y = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) - yi = 0 - yk = add(unsafe.Pointer(y), dataOffset) - yv = add(yk, bucketCnt*uintptr(t.keysize)) + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*uintptr(t.keysize)) } + for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) v := add(k, bucketCnt*uintptr(t.keysize)) @@ -1083,122 +1042,102 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { if t.indirectkey { k2 = *((*unsafe.Pointer)(k2)) } - useX := true + var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need // to send this key/value to bucket x or bucket y). - hash := alg.hash(k2, uintptr(h.hash0)) - if h.flags&iterator != 0 { - if !t.reflexivekey && !alg.equal(k2, k2) { - // If key != key (NaNs), then the hash could be (and probably - // will be) entirely different from the old hash. Moreover, - // it isn't reproducible. Reproducibility is required in the - // presence of iterators, as our evacuation decision must - // match whatever decision the iterator made. - // Fortunately, we have the freedom to send these keys either - // way. Also, tophash is meaningless for these kinds of keys. - // We let the low bit of tophash drive the evacuation decision. - // We recompute a new random tophash for the next level so - // these keys will get evenly distributed across all buckets - // after multiple grows. - if top&1 != 0 { - hash |= newbit - } else { - hash &^= newbit - } - top = uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } + hash := t.key.alg.hash(k2, uintptr(h.hash0)) + if h.flags&iterator != 0 && !t.reflexivekey && !t.key.alg.equal(k2, k2) { + // If key != key (NaNs), then the hash could be (and probably + // will be) entirely different from the old hash. Moreover, + // it isn't reproducible. Reproducibility is required in the + // presence of iterators, as our evacuation decision must + // match whatever decision the iterator made. + // Fortunately, we have the freedom to send these keys either + // way. Also, tophash is meaningless for these kinds of keys. + // We let the low bit of tophash drive the evacuation decision. + // We recompute a new random tophash for the next level so + // these keys will get evenly distributed across all buckets + // after multiple grows. + useY = top & 1 + top = tophash(hash) + } else { + if hash&newbit != 0 { + useY = 1 } } - useX = hash&newbit == 0 } - if useX { - b.tophash[i] = evacuatedX - if xi == bucketCnt { - newx := h.newoverflow(t, x) - x = newx - xi = 0 - xk = add(unsafe.Pointer(x), dataOffset) - xv = add(xk, bucketCnt*uintptr(t.keysize)) - } - x.tophash[xi] = top - if t.indirectkey { - *(*unsafe.Pointer)(xk) = k2 // copy pointer - } else { - typedmemmove(t.key, xk, k) // copy value - } - if t.indirectvalue { - *(*unsafe.Pointer)(xv) = *(*unsafe.Pointer)(v) - } else { - typedmemmove(t.elem, xv, v) - } - xi++ - xk = add(xk, uintptr(t.keysize)) - xv = add(xv, uintptr(t.valuesize)) + + if evacuatedX+1 != evacuatedY { + throw("bad evacuatedN") + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*uintptr(t.keysize)) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + if t.indirectkey { + *(*unsafe.Pointer)(dst.k) = k2 // copy pointer } else { - b.tophash[i] = evacuatedY - if yi == bucketCnt { - newy := h.newoverflow(t, y) - y = newy - yi = 0 - yk = add(unsafe.Pointer(y), dataOffset) - yv = add(yk, bucketCnt*uintptr(t.keysize)) - } - y.tophash[yi] = top - if t.indirectkey { - *(*unsafe.Pointer)(yk) = k2 - } else { - typedmemmove(t.key, yk, k) - } - if t.indirectvalue { - *(*unsafe.Pointer)(yv) = *(*unsafe.Pointer)(v) - } else { - typedmemmove(t.elem, yv, v) - } - yi++ - yk = add(yk, uintptr(t.keysize)) - yv = add(yv, uintptr(t.valuesize)) + typedmemmove(t.key, dst.k, k) // copy value } + if t.indirectvalue { + *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v) + } else { + typedmemmove(t.elem, dst.v, v) + } + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, uintptr(t.keysize)) + dst.v = add(dst.v, uintptr(t.valuesize)) } } // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 { - b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. - if t.bucket.kind&kindNoPointers == 0 { - memclrHasPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset) - } else { - memclrNoHeapPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset) - } + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) } } - // Advance evacuation mark if oldbucket == h.nevacuate { - h.nevacuate = oldbucket + 1 - // Experiments suggest that 1024 is overkill by at least an order of magnitude. - // Put it in there as a safeguard anyway, to ensure O(1) behavior. - stop := h.nevacuate + 1024 - if stop > newbit { - stop = newbit - } - for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) { - h.nevacuate++ - } - if h.nevacuate == newbit { // newbit == # of oldbuckets - // Growing is all done. Free old main bucket array. - h.oldbuckets = nil - // Can discard old overflow buckets as well. - // If they are still referenced by an iterator, - // then the iterator holds a pointers to the slice. - if h.extra != nil { - h.extra.overflow[1] = nil - } - h.flags &^= sameSizeGrow + advanceEvacuationMark(h, t, newbit) + } +} + +func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) { + h.nevacuate++ + // Experiments suggest that 1024 is overkill by at least an order of magnitude. + // Put it in there as a safeguard anyway, to ensure O(1) behavior. + stop := h.nevacuate + 1024 + if stop > newbit { + stop = newbit + } + for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) { + h.nevacuate++ + } + if h.nevacuate == newbit { // newbit == # of oldbuckets + // Growing is all done. Free old main bucket array. + h.oldbuckets = nil + // Can discard old overflow buckets as well. + // If they are still referenced by an iterator, + // then the iterator holds a pointers to the slice. + if h.extra != nil { + h.extra.oldoverflow = nil } + h.flags &^= sameSizeGrow } } @@ -1210,7 +1149,45 @@ func ismapkey(t *_type) bool { //go:linkname reflect_makemap reflect.makemap func reflect_makemap(t *maptype, cap int) *hmap { - return makemap(t, int64(cap), nil, nil) + // Check invariants and reflects math. + if sz := unsafe.Sizeof(hmap{}); sz != t.hmap.size { + println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size) + throw("bad hmap size") + } + if !ismapkey(t.key) { + throw("runtime.reflect_makemap: unsupported map key type") + } + if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) || + t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) { + throw("key size wrong") + } + if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) || + t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) { + throw("value size wrong") + } + if t.key.align > bucketCnt { + throw("key align too big") + } + if t.elem.align > bucketCnt { + throw("value align too big") + } + if t.key.size%uintptr(t.key.align) != 0 { + throw("key size not a multiple of key align") + } + if t.elem.size%uintptr(t.elem.align) != 0 { + throw("value size not a multiple of value align") + } + if bucketCnt < 8 { + throw("bucketsize too small for proper alignment") + } + if dataOffset%uintptr(t.key.align) != 0 { + throw("need padding in bucket (key)") + } + if dataOffset%uintptr(t.elem.align) != 0 { + throw("need padding in bucket (value)") + } + + return makemap(t, cap, nil) } //go:linkname reflect_mapaccess reflect.mapaccess @@ -1257,7 +1234,7 @@ func reflect_maplen(h *hmap) int { return 0 } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&h)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen)) } return h.count diff --git a/src/runtime/hashmap_fast.go b/src/runtime/hashmap_fast.go index 67b9787909..2de381412b 100644 --- a/src/runtime/hashmap_fast.go +++ b/src/runtime/hashmap_fast.go @@ -11,7 +11,7 @@ import ( func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32)) } if h == nil || h.count == 0 { @@ -26,7 +26,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { b = (*bmap)(h.buckets) } else { hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -39,28 +39,19 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32)) } if h == nil || h.count == 0 { @@ -75,7 +66,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { b = (*bmap)(h.buckets) } else { hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -88,28 +79,19 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue - } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false } } + return unsafe.Pointer(&zeroVal[0]), false } func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64)) } if h == nil || h.count == 0 { @@ -124,7 +106,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { b = (*bmap)(h.buckets) } else { hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -137,28 +119,19 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64)) } if h == nil || h.count == 0 { @@ -173,7 +146,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { b = (*bmap)(h.buckets) } else { hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -186,28 +159,19 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { } } } - for { - for i := uintptr(0); i < bucketCnt; i++ { - k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue - } - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true } - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true - } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false } } + return unsafe.Pointer(&zeroVal[0]), false } func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr)) } if h == nil || h.count == 0 { @@ -222,13 +186,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { b := (*bmap)(h.buckets) if key.len < 32 { // short key, doing lots of comparisons is ok - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -239,13 +199,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { } // long key, try not to do more comparisons than necessary keymaybe := uintptr(bucketCnt) - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str { @@ -275,7 +231,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { } dohash: hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -287,34 +243,24 @@ dohash: b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x != top { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) } } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]) - } } + return unsafe.Pointer(&zeroVal[0]) } func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr)) } if h == nil || h.count == 0 { @@ -329,13 +275,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { b := (*bmap)(h.buckets) if key.len < 32 { // short key, doing lots of comparisons is ok - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -346,13 +288,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { } // long key, try not to do more comparisons than necessary keymaybe := uintptr(bucketCnt) - for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x == empty { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { continue } if k.str == key.str { @@ -382,7 +320,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { } dohash: hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := uintptr(1)<<h.B - 1 + m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { @@ -394,37 +332,113 @@ dohash: b = oldb } } - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + } + } + } + return unsafe.Pointer(&zeroVal[0]), false +} + +func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast32(t, h, bucket) } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.tophash[i] without the bounds check - if x != top { + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b + } continue } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) + if k != key { continue } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true - } + inserti = i + insertb = b + goto done } - b = b.overflow(t) - if b == nil { - return unsafe.Pointer(&zeroVal[0]), false + ovf := b.overflow(t) + if ovf == nil { + break } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*uint32)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val } -func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { +func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if h == nil { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) } if h.flags&hashWriting != 0 { @@ -436,38 +450,35 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast32(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - var inserti *uint8 + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*4) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b } continue } - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4))) if k != key { continue } - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + inserti = i + insertb = b goto done } ovf := b.overflow(t) @@ -481,25 +492,26 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*4) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key - // store new key/value at insert position - typedmemmove(t.key, insertk, unsafe.Pointer(&key)) - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -512,7 +524,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) } if h.flags&hashWriting != 0 { @@ -524,30 +536,26 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - var inserti *uint8 + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*8) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i } continue } @@ -555,7 +563,8 @@ again: if k != key { continue } - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + insertb = b + inserti = i goto done } ovf := b.overflow(t) @@ -569,25 +578,26 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*8) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*uint64)(insertk) = key - // store new key/value at insert position - typedmemmove(t.key, insertk, unsafe.Pointer(&key)) - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -595,48 +605,131 @@ done: return val } -func mapassign_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { +func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if h == nil { panic(plainError("assignment to entry in nil map")) } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&t)) - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) } if h.flags&hashWriting != 0 { throw("concurrent map writes") } - key := stringStructOf(&ky) - hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) + hash := t.key.alg.hash(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. h.flags |= hashWriting if h.buckets == nil { - h.buckets = newarray(t.bucket, 1) + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) } again: - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i + } + continue + } + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8))) + if k != key { + continue + } + insertb = b + inserti = i + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf } - var inserti *uint8 + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + key := stringStructOf(&s) + hash := t.key.alg.hash(noescape(unsafe.Pointer(&s)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_faststr(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + top := tophash(hash) + + var insertb *bmap + var inserti uintptr var insertk unsafe.Pointer - var val unsafe.Pointer + for { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { - inserti = &b.tophash[i] - insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + if b.tophash[i] == empty && insertb == nil { + insertb = b + inserti = i } continue } @@ -648,7 +741,8 @@ again: continue } // already have a mapping for key. Update it. - val = add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + inserti = i + insertb = b goto done } ovf := b.overflow(t) @@ -662,25 +756,25 @@ again: // If we hit the max load factor or we have too many overflow buckets, // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } - if inserti == nil { + if insertb == nil { // all current buckets are full, allocate a new one. - newb := h.newoverflow(t, b) - inserti = &newb.tophash[0] - insertk = add(unsafe.Pointer(newb), dataOffset) - val = add(insertk, bucketCnt*2*sys.PtrSize) + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti } + insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks - // store new key/value at insert position + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize) + // store new key at insert position *((*stringStruct)(insertk)) = *key - *inserti = top h.count++ done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize)) if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -690,7 +784,7 @@ done: func mapdelete_fast32(t *maptype, h *hmap, key uint32) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32)) } if h == nil || h.count == 0 { @@ -705,38 +799,32 @@ func mapdelete_fast32(t *maptype, h *hmap, key uint32) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + growWork_fast32(t, h, bucket) } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if key != *(*uint32)(k) || b.tophash[i] == empty { continue } - k := (*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)) - if key != *k { - continue + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*4 + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -745,7 +833,7 @@ done: func mapdelete_fast64(t *maptype, h *hmap, key uint64) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64)) } if h == nil || h.count == 0 { @@ -760,38 +848,32 @@ func mapdelete_fast64(t *maptype, h *hmap, key uint64) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) + growWork_fast64(t, h, bucket) } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash - } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if key != *(*uint64)(k) || b.tophash[i] == empty { continue } - k := (*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)) - if key != *k { - continue + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*8 + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } @@ -800,7 +882,7 @@ done: func mapdelete_faststr(t *maptype, h *hmap, ky string) { if raceenabled && h != nil { - callerpc := getcallerpc(unsafe.Pointer(&t)) + callerpc := getcallerpc() racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr)) } if h == nil || h.count == 0 { @@ -816,43 +898,340 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) { // Set hashWriting after calling alg.hash for consistency with mapdelete h.flags |= hashWriting - bucket := hash & (uintptr(1)<<h.B - 1) + bucket := hash & bucketMask(h.B) if h.growing() { - growWork(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := uint8(hash >> (sys.PtrSize*8 - 8)) - if top < minTopHash { - top += minTopHash + growWork_faststr(t, h, bucket) } - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + top := tophash(hash) +search: + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { continue } if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { continue } - typedmemclr(t.key, unsafe.Pointer(k)) - v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*2*sys.PtrSize + i*uintptr(t.valuesize)) - typedmemclr(t.elem, v) + // Clear key's pointer. + k.str = nil + // Only clear value if there are pointers in it. + if t.elem.kind&kindNoPointers == 0 { + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + memclrHasPointers(v, t.elem.size) + } b.tophash[i] = empty h.count-- - goto done - } - b = b.overflow(t) - if b == nil { - goto done + break search } } -done: if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting } + +func growWork_fast32(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast32(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast32(t, h, h.nevacuate) + } +} + +func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*4) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*4) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*4) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.alg.hash(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*4) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) + } else { + *(*uint32)(dst.k) = *(*uint32)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 4) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} + +func growWork_fast64(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast64(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast64(t, h, h.nevacuate) + } +} + +func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*8) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*8) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*8) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.alg.hash(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*8) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + if sys.PtrSize == 8 { + writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) + } else { + // There are three ways to squeeze at least one 32 bit pointer into 64 bits. + // Give up and call typedmemmove. + typedmemmove(t.key, dst.k, k) + } + } else { + *(*uint64)(dst.k) = *(*uint64)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 8) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} + +func growWork_faststr(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_faststr(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_faststr(t, h, h.nevacuate) + } +} + +func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*2*sys.PtrSize) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*2*sys.PtrSize) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*2*sys.PtrSize) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.alg.hash(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*2*sys.PtrSize) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + *(*string)(dst.k) = *(*string)(k) + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 2*sys.PtrSize) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 35f6124643..2b51758ae1 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -200,7 +200,6 @@ func dumptype(t *_type) { // dump an object func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) { - dumpbvtypes(&bv, obj) dumpint(tagObject) dumpint(uint64(uintptr(obj))) dumpmemrange(obj, size) @@ -261,14 +260,9 @@ func dumpframe(s *stkframe, arg unsafe.Pointer) bool { } stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) - // Dump any types we will need to resolve Efaces. - if child.args.n >= 0 { - dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff)) - } var bv bitvector if stkmap != nil && stkmap.n > 0 { bv = stackmapdata(stkmap, pcdata) - dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n*sys.PtrSize))) } else { bv.n = -1 } @@ -423,14 +417,12 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, o func dumproots() { // TODO(mwhudson): dump datamask etc from all objects // data segment - dumpbvtypes(&firstmoduledata.gcdatamask, unsafe.Pointer(firstmoduledata.data)) dumpint(tagData) dumpint(uint64(firstmoduledata.data)) dumpmemrange(unsafe.Pointer(firstmoduledata.data), firstmoduledata.edata-firstmoduledata.data) dumpfields(firstmoduledata.gcdatamask) // bss segment - dumpbvtypes(&firstmoduledata.gcbssmask, unsafe.Pointer(firstmoduledata.bss)) dumpint(tagBSS) dumpint(uint64(firstmoduledata.bss)) dumpmemrange(unsafe.Pointer(firstmoduledata.bss), firstmoduledata.ebss-firstmoduledata.bss) @@ -677,16 +669,6 @@ func dumpfields(bv bitvector) { dumpint(fieldKindEol) } -// The heap dump reader needs to be able to disambiguate -// Eface entries. So it needs to know every type that might -// appear in such an entry. The following routine accomplishes that. -// TODO(rsc, khr): Delete - no longer possible. - -// Dump all the types that appear in the type field of -// any Eface described by this bit vector. -func dumpbvtypes(bv *bitvector, base unsafe.Pointer) { -} - func makeheapobjbv(p uintptr, size uintptr) bitvector { // Extend the temp buffer if necessary. nptr := size / sys.PtrSize diff --git a/src/runtime/iface.go b/src/runtime/iface.go index 58ed61e3aa..7c5d3a05b2 100644 --- a/src/runtime/iface.go +++ b/src/runtime/iface.go @@ -10,21 +10,24 @@ import ( "unsafe" ) -const ( - hashSize = 1009 -) +const itabInitSize = 512 var ( - ifaceLock mutex // lock for accessing hash - hash [hashSize]*itab + itabLock mutex // lock for accessing itab table + itabTable = &itabTableInit // pointer to current table + itabTableInit = itabTableType{size: itabInitSize} // starter table ) -func itabhash(inter *interfacetype, typ *_type) uint32 { +//Note: change the formula in the mallocgc call in itabAdd if you change these fields. +type itabTableType struct { + size uintptr // length of entries array. Always a power of 2. + count uintptr // current number of filled entries. + entries [itabInitSize]*itab // really [size] large +} + +func itabHashFunc(inter *interfacetype, typ *_type) uintptr { // compiler has provided some good hash codes for us. - h := inter.typ.hash - h += 17 * typ.hash - // TODO(rsc): h += 23 * x.mhash ? - return h % hashSize + return uintptr(inter.typ.hash ^ typ.hash) } func getitab(inter *interfacetype, typ *_type, canfail bool) *itab { @@ -41,50 +44,137 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab { panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), name.name()}) } - h := itabhash(inter, typ) - - // look twice - once without lock, once with. - // common case will be no lock contention. var m *itab - var locked int - for locked = 0; locked < 2; locked++ { - if locked != 0 { - lock(&ifaceLock) - } - for m = (*itab)(atomic.Loadp(unsafe.Pointer(&hash[h]))); m != nil; m = m.link { - if m.inter == inter && m._type == typ { - if m.bad { - if !canfail { - // this can only happen if the conversion - // was already done once using the , ok form - // and we have a cached negative result. - // the cached result doesn't record which - // interface function was missing, so try - // adding the itab again, which will throw an error. - additab(m, locked != 0, false) - } - m = nil - } - if locked != 0 { - unlock(&ifaceLock) - } - return m - } - } + + // First, look in the existing table to see if we can find the itab we need. + // This is by far the most common case, so do it without locks. + // Use atomic to ensure we see any previous writes done by the thread + // that updates the itabTable field (with atomic.Storep in itabAdd). + t := (*itabTableType)(atomic.Loadp(unsafe.Pointer(&itabTable))) + if m = t.find(inter, typ); m != nil { + goto finish } + // Not found. Grab the lock and try again. + lock(&itabLock) + if m = itabTable.find(inter, typ); m != nil { + unlock(&itabLock) + goto finish + } + + // Entry doesn't exist yet. Make a new entry & add it. m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys)) m.inter = inter m._type = typ - additab(m, true, canfail) - unlock(&ifaceLock) - if m.bad { + m.init() + itabAdd(m) + unlock(&itabLock) +finish: + if m.fun[0] != 0 { + return m + } + if canfail { return nil } - return m + // this can only happen if the conversion + // was already done once using the , ok form + // and we have a cached negative result. + // The cached result doesn't record which + // interface function was missing, so initialize + // the itab again to get the missing function name. + panic(&TypeAssertionError{concreteString: typ.string(), assertedString: inter.typ.string(), missingMethod: m.init()}) +} + +// find finds the given interface/type pair in t. +// Returns nil if the given interface/type pair isn't present. +func (t *itabTableType) find(inter *interfacetype, typ *_type) *itab { + // Implemented using quadratic probing. + // Probe sequence is h(i) = h0 + i*(i+1)/2 mod 2^k. + // We're guaranteed to hit all table entries using this probe sequence. + mask := t.size - 1 + h := itabHashFunc(inter, typ) & mask + for i := uintptr(1); ; i++ { + p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize)) + // Use atomic read here so if we see m != nil, we also see + // the initializations of the fields of m. + // m := *p + m := (*itab)(atomic.Loadp(unsafe.Pointer(p))) + if m == nil { + return nil + } + if m.inter == inter && m._type == typ { + return m + } + h += i + h &= mask + } +} + +// itabAdd adds the given itab to the itab hash table. +// itabLock must be held. +func itabAdd(m *itab) { + t := itabTable + if t.count >= 3*(t.size/4) { // 75% load factor + // Grow hash table. + // t2 = new(itabTableType) + some additional entries + // We lie and tell malloc we want pointer-free memory because + // all the pointed-to values are not in the heap. + t2 := (*itabTableType)(mallocgc((2+2*t.size)*sys.PtrSize, nil, true)) + t2.size = t.size * 2 + + // Copy over entries. + // Note: while copying, other threads may look for an itab and + // fail to find it. That's ok, they will then try to get the itab lock + // and as a consequence wait until this copying is complete. + iterate_itabs(t2.add) + if t2.count != t.count { + throw("mismatched count during itab table copy") + } + // Publish new hash table. Use an atomic write: see comment in getitab. + atomicstorep(unsafe.Pointer(&itabTable), unsafe.Pointer(t2)) + // Adopt the new table as our own. + t = itabTable + // Note: the old table can be GC'ed here. + } + t.add(m) } -func additab(m *itab, locked, canfail bool) { +// add adds the given itab to itab table t. +// itabLock must be held. +func (t *itabTableType) add(m *itab) { + // See comment in find about the probe sequence. + // Insert new itab in the first empty spot in the probe sequence. + mask := t.size - 1 + h := itabHashFunc(m.inter, m._type) & mask + for i := uintptr(1); ; i++ { + p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize)) + m2 := *p + if m2 == m { + // A given itab may be used in more than one module + // and thanks to the way global symbol resolution works, the + // pointed-to itab may already have been inserted into the + // global 'hash'. + return + } + if m2 == nil { + // Use atomic write here so if a reader sees m, it also + // sees the correctly initialized fields of m. + // NoWB is ok because m is not in heap memory. + // *p = m + atomic.StorepNoWB(unsafe.Pointer(p), unsafe.Pointer(m)) + t.count++ + return + } + h += i + h &= mask + } +} + +// init fills in the m.fun array with all the code pointers for +// the m.inter/m._type pair. If the type does not implement the interface, +// it sets m.fun[0] to 0 and returns the name of an interface function that is missing. +// It is ok to call this multiple times on the same m, even concurrently. +func (m *itab) init() string { inter := m.inter typ := m._type x := typ.uncommon() @@ -97,6 +187,7 @@ func additab(m *itab, locked, canfail bool) { nt := int(x.mcount) xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt] j := 0 +imethods: for k := 0; k < ni; k++ { i := &inter.mhdr[k] itype := inter.typ.typeOff(i.ityp) @@ -119,45 +210,26 @@ func additab(m *itab, locked, canfail bool) { ifn := typ.textOff(t.ifn) *(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = ifn } - goto nextimethod + continue imethods } } } // didn't find method - if !canfail { - if locked { - unlock(&ifaceLock) - } - panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), iname}) - } - m.bad = true - break - nextimethod: + m.fun[0] = 0 + return iname } - if !locked { - throw("invalid itab locking") - } - h := itabhash(inter, typ) - m.link = hash[h] - m.inhash = true - atomicstorep(unsafe.Pointer(&hash[h]), unsafe.Pointer(m)) + m.hash = typ.hash + return "" } func itabsinit() { - lock(&ifaceLock) + lock(&itabLock) for _, md := range activeModules() { for _, i := range md.itablinks { - // itablinks is a slice of pointers to the itabs used in this - // module. A given itab may be used in more than one module - // and thanks to the way global symbol resolution works, the - // pointed-to itab may already have been inserted into the - // global 'hash'. - if !i.inhash { - additab(i, true, false) - } + itabAdd(i) } } - unlock(&ifaceLock) + unlock(&itabLock) } // panicdottypeE is called when doing an e.(T) conversion and the conversion fails. @@ -200,7 +272,7 @@ func panicnildottype(want *_type) { func convT2E(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E)) } if msanenabled { msanread(elem, t.size) @@ -216,7 +288,7 @@ func convT2E(t *_type, elem unsafe.Pointer) (e eface) { func convT2E16(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E16)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E16)) } if msanenabled { msanread(elem, t.size) @@ -235,7 +307,7 @@ func convT2E16(t *_type, elem unsafe.Pointer) (e eface) { func convT2E32(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E32)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E32)) } if msanenabled { msanread(elem, t.size) @@ -254,7 +326,7 @@ func convT2E32(t *_type, elem unsafe.Pointer) (e eface) { func convT2E64(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E64)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2E64)) } if msanenabled { msanread(elem, t.size) @@ -273,7 +345,7 @@ func convT2E64(t *_type, elem unsafe.Pointer) (e eface) { func convT2Estring(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Estring)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Estring)) } if msanenabled { msanread(elem, t.size) @@ -292,7 +364,7 @@ func convT2Estring(t *_type, elem unsafe.Pointer) (e eface) { func convT2Eslice(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Eslice)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Eslice)) } if msanenabled { msanread(elem, t.size) @@ -311,7 +383,7 @@ func convT2Eslice(t *_type, elem unsafe.Pointer) (e eface) { func convT2Enoptr(t *_type, elem unsafe.Pointer) (e eface) { if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Enoptr)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Enoptr)) } if msanenabled { msanread(elem, t.size) @@ -326,7 +398,7 @@ func convT2Enoptr(t *_type, elem unsafe.Pointer) (e eface) { func convT2I(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I)) } if msanenabled { msanread(elem, t.size) @@ -341,7 +413,7 @@ func convT2I(tab *itab, elem unsafe.Pointer) (i iface) { func convT2I16(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I16)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I16)) } if msanenabled { msanread(elem, t.size) @@ -361,7 +433,7 @@ func convT2I16(tab *itab, elem unsafe.Pointer) (i iface) { func convT2I32(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I32)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I32)) } if msanenabled { msanread(elem, t.size) @@ -381,7 +453,7 @@ func convT2I32(tab *itab, elem unsafe.Pointer) (i iface) { func convT2I64(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I64)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I64)) } if msanenabled { msanread(elem, t.size) @@ -401,7 +473,7 @@ func convT2I64(tab *itab, elem unsafe.Pointer) (i iface) { func convT2Istring(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Istring)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Istring)) } if msanenabled { msanread(elem, t.size) @@ -421,7 +493,7 @@ func convT2Istring(tab *itab, elem unsafe.Pointer) (i iface) { func convT2Islice(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Islice)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Islice)) } if msanenabled { msanread(elem, t.size) @@ -441,7 +513,7 @@ func convT2Islice(tab *itab, elem unsafe.Pointer) (i iface) { func convT2Inoptr(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { - raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Inoptr)) + raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2Inoptr)) } if msanenabled { msanread(elem, t.size) @@ -533,9 +605,13 @@ func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) { } func iterate_itabs(fn func(*itab)) { - for _, h := range &hash { - for ; h != nil; h = h.link { - fn(h) + // Note: only runs during stop the world or with itabLock held, + // so no other locks/atomics needed. + t := itabTable + for i := uintptr(0); i < t.size; i++ { + m := *(**itab)(add(unsafe.Pointer(&t.entries), i*sys.PtrSize)) + if m != nil { + fn(m) } } } diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index 879a82f9c8..b697aa8bd3 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -52,7 +52,7 @@ func TestXadduintptr(t *testing.T) { // Tests that xadduintptr correctly updates 64-bit values. The place where // we actually do so is mstats.go, functions mSysStat{Inc,Dec}. func TestXadduintptrOnUint64(t *testing.T) { - if sys.BigEndian != 0 { + if sys.BigEndian { // On big endian architectures, we never use xadduintptr to update // 64-bit values and hence we skip the test. (Note that functions // mSysStat{Inc,Dec} in mstats.go have explicit checks for diff --git a/src/runtime/internal/sys/arch_386.go b/src/runtime/internal/sys/arch_386.go index 61d6722cca..5fb1fba02b 100644 --- a/src/runtime/internal/sys/arch_386.go +++ b/src/runtime/internal/sys/arch_386.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = I386 - BigEndian = 0 + BigEndian = false CacheLineSize = 64 DefaultPhysPageSize = GoosNacl*65536 + (1-GoosNacl)*4096 // 4k normally; 64k on NaCl PCQuantum = 1 diff --git a/src/runtime/internal/sys/arch_amd64.go b/src/runtime/internal/sys/arch_amd64.go index 1f2114a736..2f32bc469f 100644 --- a/src/runtime/internal/sys/arch_amd64.go +++ b/src/runtime/internal/sys/arch_amd64.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = AMD64 - BigEndian = 0 + BigEndian = false CacheLineSize = 64 DefaultPhysPageSize = 4096 PCQuantum = 1 diff --git a/src/runtime/internal/sys/arch_amd64p32.go b/src/runtime/internal/sys/arch_amd64p32.go index 07798557de..c560907c67 100644 --- a/src/runtime/internal/sys/arch_amd64p32.go +++ b/src/runtime/internal/sys/arch_amd64p32.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = AMD64 - BigEndian = 0 + BigEndian = false CacheLineSize = 64 DefaultPhysPageSize = 65536*GoosNacl + 4096*(1-GoosNacl) PCQuantum = 1 diff --git a/src/runtime/internal/sys/arch_arm.go b/src/runtime/internal/sys/arch_arm.go index 899010bfa1..f383d82027 100644 --- a/src/runtime/internal/sys/arch_arm.go +++ b/src/runtime/internal/sys/arch_arm.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = ARM - BigEndian = 0 + BigEndian = false CacheLineSize = 32 DefaultPhysPageSize = 65536 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_arm64.go b/src/runtime/internal/sys/arch_arm64.go index 2d57ddae19..cb83ecc445 100644 --- a/src/runtime/internal/sys/arch_arm64.go +++ b/src/runtime/internal/sys/arch_arm64.go @@ -6,8 +6,8 @@ package sys const ( ArchFamily = ARM64 - BigEndian = 0 - CacheLineSize = 32 + BigEndian = false + CacheLineSize = 64 DefaultPhysPageSize = 65536 PCQuantum = 4 Int64Align = 8 diff --git a/src/runtime/internal/sys/arch_mips.go b/src/runtime/internal/sys/arch_mips.go index 65fc4f8a60..e12f32d0ee 100644 --- a/src/runtime/internal/sys/arch_mips.go +++ b/src/runtime/internal/sys/arch_mips.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = MIPS - BigEndian = 1 + BigEndian = true CacheLineSize = 32 DefaultPhysPageSize = 65536 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_mips64.go b/src/runtime/internal/sys/arch_mips64.go index 0f6de74e6f..973ec10e17 100644 --- a/src/runtime/internal/sys/arch_mips64.go +++ b/src/runtime/internal/sys/arch_mips64.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = MIPS64 - BigEndian = 1 + BigEndian = true CacheLineSize = 32 DefaultPhysPageSize = 16384 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_mips64le.go b/src/runtime/internal/sys/arch_mips64le.go index 4ced35bfde..e96d962f36 100644 --- a/src/runtime/internal/sys/arch_mips64le.go +++ b/src/runtime/internal/sys/arch_mips64le.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = MIPS64 - BigEndian = 0 + BigEndian = false CacheLineSize = 32 DefaultPhysPageSize = 16384 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_mipsle.go b/src/runtime/internal/sys/arch_mipsle.go index 33e9764037..25742ae9d3 100644 --- a/src/runtime/internal/sys/arch_mipsle.go +++ b/src/runtime/internal/sys/arch_mipsle.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = MIPS - BigEndian = 0 + BigEndian = false CacheLineSize = 32 DefaultPhysPageSize = 65536 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_ppc64.go b/src/runtime/internal/sys/arch_ppc64.go index 80595ee195..a538bbdec0 100644 --- a/src/runtime/internal/sys/arch_ppc64.go +++ b/src/runtime/internal/sys/arch_ppc64.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = PPC64 - BigEndian = 1 + BigEndian = true CacheLineSize = 128 DefaultPhysPageSize = 65536 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_ppc64le.go b/src/runtime/internal/sys/arch_ppc64le.go index f68e777055..aa50689181 100644 --- a/src/runtime/internal/sys/arch_ppc64le.go +++ b/src/runtime/internal/sys/arch_ppc64le.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = PPC64 - BigEndian = 0 + BigEndian = false CacheLineSize = 128 DefaultPhysPageSize = 65536 PCQuantum = 4 diff --git a/src/runtime/internal/sys/arch_s390x.go b/src/runtime/internal/sys/arch_s390x.go index 4ec4bf8fec..e42c420a54 100644 --- a/src/runtime/internal/sys/arch_s390x.go +++ b/src/runtime/internal/sys/arch_s390x.go @@ -6,7 +6,7 @@ package sys const ( ArchFamily = S390X - BigEndian = 1 + BigEndian = true CacheLineSize = 256 DefaultPhysPageSize = 4096 PCQuantum = 2 diff --git a/src/runtime/internal/sys/gengoos.go b/src/runtime/internal/sys/gengoos.go index 4c45c0af02..a9f86256bf 100644 --- a/src/runtime/internal/sys/gengoos.go +++ b/src/runtime/internal/sys/gengoos.go @@ -30,7 +30,7 @@ func main() { if strings.HasPrefix(line, goosPrefix) { text, err := strconv.Unquote(strings.TrimPrefix(line, goosPrefix)) if err != nil { - log.Fatalf("parsing goosList %#q: %v", strings.TrimPrefix(line, goosPrefix), err) + log.Fatalf("parsing goosList: %v", err) } gooses = strings.Fields(text) } @@ -45,10 +45,11 @@ func main() { for _, target := range gooses { var buf bytes.Buffer - fmt.Fprintf(&buf, "// generated by gengoos.go using 'go generate'\n\n") + fmt.Fprintf(&buf, "// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.\n\n") if target == "linux" { - fmt.Fprintf(&buf, "// +build !android\n\n") // must explicitly exclude android for linux + fmt.Fprintf(&buf, "// +build !android\n") // must explicitly exclude android for linux } + fmt.Fprintf(&buf, "// +build %s\n\n", target) // must explicitly include target for bootstrapping purposes fmt.Fprintf(&buf, "package sys\n\n") fmt.Fprintf(&buf, "const GOOS = `%s`\n\n", target) for _, goos := range gooses { @@ -66,7 +67,8 @@ func main() { for _, target := range goarches { var buf bytes.Buffer - fmt.Fprintf(&buf, "// generated by gengoos.go using 'go generate'\n\n") + fmt.Fprintf(&buf, "// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.\n\n") + fmt.Fprintf(&buf, "// +build %s\n\n", target) // must explicitly include target for bootstrapping purposes fmt.Fprintf(&buf, "package sys\n\n") fmt.Fprintf(&buf, "const GOARCH = `%s`\n\n", target) for _, goarch := range goarches { diff --git a/src/runtime/internal/sys/sys.go b/src/runtime/internal/sys/sys.go index 586a763717..9d9ac4507f 100644 --- a/src/runtime/internal/sys/sys.go +++ b/src/runtime/internal/sys/sys.go @@ -6,9 +6,9 @@ // constants used by the runtime. package sys -// The next line makes 'go generate' write the zgen_*.go files with +// The next line makes 'go generate' write the zgo*.go files with // per-OS and per-arch information, including constants -// named goos_$GOOS and goarch_$GOARCH for every +// named Goos$GOOS and Goarch$GOARCH for every // known GOOS and GOARCH. The constant is 1 on the // current system, 0 otherwise; multiplying by them is // useful for defining GOOS- or GOARCH-specific constants. diff --git a/src/runtime/internal/sys/zgoarch_386.go b/src/runtime/internal/sys/zgoarch_386.go index 3bcf83b8e3..b07abbedc6 100644 --- a/src/runtime/internal/sys/zgoarch_386.go +++ b/src/runtime/internal/sys/zgoarch_386.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build 386 package sys diff --git a/src/runtime/internal/sys/zgoarch_amd64.go b/src/runtime/internal/sys/zgoarch_amd64.go index 699f191fba..bfdcb00bd9 100644 --- a/src/runtime/internal/sys/zgoarch_amd64.go +++ b/src/runtime/internal/sys/zgoarch_amd64.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build amd64 package sys diff --git a/src/runtime/internal/sys/zgoarch_amd64p32.go b/src/runtime/internal/sys/zgoarch_amd64p32.go index cc2d658406..b61617d4d9 100644 --- a/src/runtime/internal/sys/zgoarch_amd64p32.go +++ b/src/runtime/internal/sys/zgoarch_amd64p32.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build amd64p32 package sys diff --git a/src/runtime/internal/sys/zgoarch_arm.go b/src/runtime/internal/sys/zgoarch_arm.go index a5fd789f13..79595d545a 100644 --- a/src/runtime/internal/sys/zgoarch_arm.go +++ b/src/runtime/internal/sys/zgoarch_arm.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build arm package sys diff --git a/src/runtime/internal/sys/zgoarch_arm64.go b/src/runtime/internal/sys/zgoarch_arm64.go index 084d2c7330..c839b8fc03 100644 --- a/src/runtime/internal/sys/zgoarch_arm64.go +++ b/src/runtime/internal/sys/zgoarch_arm64.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build arm64 package sys diff --git a/src/runtime/internal/sys/zgoarch_arm64be.go b/src/runtime/internal/sys/zgoarch_arm64be.go new file mode 100644 index 0000000000..58b4ef198b --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_arm64be.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build arm64be + +package sys + +const GOARCH = `arm64be` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 1 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_armbe.go b/src/runtime/internal/sys/zgoarch_armbe.go new file mode 100644 index 0000000000..e9e2c314d8 --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_armbe.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build armbe + +package sys + +const GOARCH = `armbe` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 1 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_mips.go b/src/runtime/internal/sys/zgoarch_mips.go index 2f733d2788..b0bf4ffec3 100644 --- a/src/runtime/internal/sys/zgoarch_mips.go +++ b/src/runtime/internal/sys/zgoarch_mips.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mips package sys diff --git a/src/runtime/internal/sys/zgoarch_mips64.go b/src/runtime/internal/sys/zgoarch_mips64.go index 2ad62bd68c..093e88ceaa 100644 --- a/src/runtime/internal/sys/zgoarch_mips64.go +++ b/src/runtime/internal/sys/zgoarch_mips64.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mips64 package sys diff --git a/src/runtime/internal/sys/zgoarch_mips64le.go b/src/runtime/internal/sys/zgoarch_mips64le.go index 047c8b425a..3bad7cfd38 100644 --- a/src/runtime/internal/sys/zgoarch_mips64le.go +++ b/src/runtime/internal/sys/zgoarch_mips64le.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mips64le package sys diff --git a/src/runtime/internal/sys/zgoarch_mips64p32.go b/src/runtime/internal/sys/zgoarch_mips64p32.go new file mode 100644 index 0000000000..c5f69fc687 --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_mips64p32.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mips64p32 + +package sys + +const GOARCH = `mips64p32` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 1 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_mips64p32le.go b/src/runtime/internal/sys/zgoarch_mips64p32le.go new file mode 100644 index 0000000000..014ef84ce8 --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_mips64p32le.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mips64p32le + +package sys + +const GOARCH = `mips64p32le` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 1 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_mipsle.go b/src/runtime/internal/sys/zgoarch_mipsle.go index 95f3d5aab9..75814be787 100644 --- a/src/runtime/internal/sys/zgoarch_mipsle.go +++ b/src/runtime/internal/sys/zgoarch_mipsle.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build mipsle package sys diff --git a/src/runtime/internal/sys/zgoarch_ppc.go b/src/runtime/internal/sys/zgoarch_ppc.go new file mode 100644 index 0000000000..2a891b8477 --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_ppc.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build ppc + +package sys + +const GOARCH = `ppc` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 1 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_ppc64.go b/src/runtime/internal/sys/zgoarch_ppc64.go index 748b5b562c..847db4bdb2 100644 --- a/src/runtime/internal/sys/zgoarch_ppc64.go +++ b/src/runtime/internal/sys/zgoarch_ppc64.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build ppc64 package sys diff --git a/src/runtime/internal/sys/zgoarch_ppc64le.go b/src/runtime/internal/sys/zgoarch_ppc64le.go index d3dcba467d..5195797b29 100644 --- a/src/runtime/internal/sys/zgoarch_ppc64le.go +++ b/src/runtime/internal/sys/zgoarch_ppc64le.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build ppc64le package sys diff --git a/src/runtime/internal/sys/zgoarch_s390.go b/src/runtime/internal/sys/zgoarch_s390.go new file mode 100644 index 0000000000..cd215da577 --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_s390.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build s390 + +package sys + +const GOARCH = `s390` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 1 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_s390x.go b/src/runtime/internal/sys/zgoarch_s390x.go index 1ead5d573c..b9368ff4e3 100644 --- a/src/runtime/internal/sys/zgoarch_s390x.go +++ b/src/runtime/internal/sys/zgoarch_s390x.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build s390x package sys diff --git a/src/runtime/internal/sys/zgoarch_sparc.go b/src/runtime/internal/sys/zgoarch_sparc.go new file mode 100644 index 0000000000..e9afe0131b --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_sparc.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build sparc + +package sys + +const GOARCH = `sparc` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 1 +const GoarchSparc64 = 0 diff --git a/src/runtime/internal/sys/zgoarch_sparc64.go b/src/runtime/internal/sys/zgoarch_sparc64.go new file mode 100644 index 0000000000..b6004efe1c --- /dev/null +++ b/src/runtime/internal/sys/zgoarch_sparc64.go @@ -0,0 +1,28 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build sparc64 + +package sys + +const GOARCH = `sparc64` + +const Goarch386 = 0 +const GoarchAmd64 = 0 +const GoarchAmd64p32 = 0 +const GoarchArm = 0 +const GoarchArmbe = 0 +const GoarchArm64 = 0 +const GoarchArm64be = 0 +const GoarchPpc64 = 0 +const GoarchPpc64le = 0 +const GoarchMips = 0 +const GoarchMipsle = 0 +const GoarchMips64 = 0 +const GoarchMips64le = 0 +const GoarchMips64p32 = 0 +const GoarchMips64p32le = 0 +const GoarchPpc = 0 +const GoarchS390 = 0 +const GoarchS390x = 0 +const GoarchSparc = 0 +const GoarchSparc64 = 1 diff --git a/src/runtime/internal/sys/zgoos_android.go b/src/runtime/internal/sys/zgoos_android.go index 6503b15246..01ebe753c5 100644 --- a/src/runtime/internal/sys/zgoos_android.go +++ b/src/runtime/internal/sys/zgoos_android.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build android package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_darwin.go b/src/runtime/internal/sys/zgoos_darwin.go index 6a285984bd..1303d71ee9 100644 --- a/src/runtime/internal/sys/zgoos_darwin.go +++ b/src/runtime/internal/sys/zgoos_darwin.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build darwin package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_dragonfly.go b/src/runtime/internal/sys/zgoos_dragonfly.go index 886ac2698f..64325c7a42 100644 --- a/src/runtime/internal/sys/zgoos_dragonfly.go +++ b/src/runtime/internal/sys/zgoos_dragonfly.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build dragonfly package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_freebsd.go b/src/runtime/internal/sys/zgoos_freebsd.go index 0bf2403eab..37449713f9 100644 --- a/src/runtime/internal/sys/zgoos_freebsd.go +++ b/src/runtime/internal/sys/zgoos_freebsd.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build freebsd package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_linux.go b/src/runtime/internal/sys/zgoos_linux.go index c8664db15d..c726465350 100644 --- a/src/runtime/internal/sys/zgoos_linux.go +++ b/src/runtime/internal/sys/zgoos_linux.go @@ -1,6 +1,7 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. // +build !android +// +build linux package sys @@ -17,3 +18,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_nacl.go b/src/runtime/internal/sys/zgoos_nacl.go index 054122638a..53b394c631 100644 --- a/src/runtime/internal/sys/zgoos_nacl.go +++ b/src/runtime/internal/sys/zgoos_nacl.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build nacl package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_netbsd.go b/src/runtime/internal/sys/zgoos_netbsd.go index 5c509a1250..8bfdf45d4a 100644 --- a/src/runtime/internal/sys/zgoos_netbsd.go +++ b/src/runtime/internal/sys/zgoos_netbsd.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build netbsd package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_openbsd.go b/src/runtime/internal/sys/zgoos_openbsd.go index dc43157d49..fc6acb761c 100644 --- a/src/runtime/internal/sys/zgoos_openbsd.go +++ b/src/runtime/internal/sys/zgoos_openbsd.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build openbsd package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 1 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_plan9.go b/src/runtime/internal/sys/zgoos_plan9.go index 4b0934f77a..75baeb34f6 100644 --- a/src/runtime/internal/sys/zgoos_plan9.go +++ b/src/runtime/internal/sys/zgoos_plan9.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build plan9 package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 1 const GoosSolaris = 0 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_solaris.go b/src/runtime/internal/sys/zgoos_solaris.go index 42511a36ad..c18f34f398 100644 --- a/src/runtime/internal/sys/zgoos_solaris.go +++ b/src/runtime/internal/sys/zgoos_solaris.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build solaris package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 1 const GoosWindows = 0 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_windows.go b/src/runtime/internal/sys/zgoos_windows.go index d77f62c396..b9f0d4e584 100644 --- a/src/runtime/internal/sys/zgoos_windows.go +++ b/src/runtime/internal/sys/zgoos_windows.go @@ -1,4 +1,6 @@ -// generated by gengoos.go using 'go generate' +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build windows package sys @@ -15,3 +17,4 @@ const GoosOpenbsd = 0 const GoosPlan9 = 0 const GoosSolaris = 0 const GoosWindows = 1 +const GoosZos = 0 diff --git a/src/runtime/internal/sys/zgoos_zos.go b/src/runtime/internal/sys/zgoos_zos.go new file mode 100644 index 0000000000..2563ebea23 --- /dev/null +++ b/src/runtime/internal/sys/zgoos_zos.go @@ -0,0 +1,20 @@ +// Code generated by gengoos.go using 'go generate'. DO NOT EDIT. + +// +build zos + +package sys + +const GOOS = `zos` + +const GoosAndroid = 0 +const GoosDarwin = 0 +const GoosDragonfly = 0 +const GoosFreebsd = 0 +const GoosLinux = 0 +const GoosNacl = 0 +const GoosNetbsd = 0 +const GoosOpenbsd = 0 +const GoosPlan9 = 0 +const GoosSolaris = 0 +const GoosWindows = 0 +const GoosZos = 1 diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go index 5b0169d572..b41f805cee 100644 --- a/src/runtime/lock_sema.go +++ b/src/runtime/lock_sema.go @@ -71,7 +71,7 @@ Loop: // for this lock, chained through m->nextwaitm. // Queue this M. for { - gp.m.nextwaitm = v &^ locked + gp.m.nextwaitm = muintptr(v &^ locked) if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) { break } @@ -103,8 +103,8 @@ func unlock(l *mutex) { } else { // Other M's are waiting for the lock. // Dequeue an M. - mp = (*m)(unsafe.Pointer(v &^ locked)) - if atomic.Casuintptr(&l.key, v, mp.nextwaitm) { + mp = muintptr(v &^ locked).ptr() + if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) { // Dequeued an M. Wake it. semawakeup(mp) break @@ -140,7 +140,7 @@ func notewakeup(n *note) { case v == 0: // Nothing was waiting. Done. case v == locked: - // Two notewakeups! Not allowed. + // Two notewakeups! Not allowed. throw("notewakeup - double wakeup") default: // Must be the waiting m. Wake it up. diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 0ebd2c0ab2..72b8f40b96 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -529,9 +529,8 @@ func nextFreeFast(s *mspan) gclinkptr { } s.allocCache >>= uint(theBit + 1) s.freeindex = freeidx - v := gclinkptr(result*s.elemsize + s.base()) s.allocCount++ - return v + return gclinkptr(result*s.elemsize + s.base()) } } return 0 @@ -847,6 +846,9 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer { // newarray allocates an array of n elements of type typ. func newarray(typ *_type, n int) unsafe.Pointer { + if n == 1 { + return mallocgc(typ.size, typ, true) + } if n < 0 || uintptr(n) > maxSliceCap(typ.size) { panic(plainError("runtime: allocation size out of range")) } @@ -863,11 +865,13 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { mProf_Malloc(x, size) } -// nextSample returns the next sampling point for heap profiling. -// It produces a random variable with a geometric distribution and -// mean MemProfileRate. This is done by generating a uniformly -// distributed random number and applying the cumulative distribution -// function for an exponential. +// nextSample returns the next sampling point for heap profiling. The goal is +// to sample allocations on average every MemProfileRate bytes, but with a +// completely random distribution over the allocation timeline; this +// corresponds to a Poisson process with parameter MemProfileRate. In Poisson +// processes, the distance between two samples follows the exponential +// distribution (exp(MemProfileRate)), so the best return value is a random +// number taken from an exponential distribution whose mean is MemProfileRate. func nextSample() int32 { if GOOS == "plan9" { // Plan 9 doesn't support floating point in note handler. @@ -876,25 +880,29 @@ func nextSample() int32 { } } - period := MemProfileRate + return fastexprand(MemProfileRate) +} - // make nextSample not overflow. Maximum possible step is - // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period. +// fastexprand returns a random number from an exponential distribution with +// the specified mean. +func fastexprand(mean int) int32 { + // Avoid overflow. Maximum possible step is + // -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean. switch { - case period > 0x7000000: - period = 0x7000000 - case period == 0: + case mean > 0x7000000: + mean = 0x7000000 + case mean == 0: return 0 } - // Let m be the sample rate, - // the probability distribution function is m*exp(-mx), so the CDF is - // p = 1 - exp(-mx), so - // q = 1 - p == exp(-mx) - // log_e(q) = -mx - // -log_e(q)/m = x - // x = -log_e(q) * period - // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency + // Take a random sample of the exponential distribution exp(-mean*x). + // The probability distribution function is mean*exp(-mean*x), so the CDF is + // p = 1 - exp(-mean*x), so + // q = 1 - p == exp(-mean*x) + // log_e(q) = -mean*x + // -log_e(q)/mean = x + // x = -log_e(q) * mean + // x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency const randomBitCount = 26 q := fastrand()%(1<<randomBitCount) + 1 qlog := fastlog2(float64(q)) - randomBitCount @@ -902,7 +910,7 @@ func nextSample() int32 { qlog = 0 } const minusLog2 = -0.6931471805599453 // -ln(2) - return int32(qlog*(minusLog2*float64(period))) + 1 + return int32(qlog*(minusLog2*float64(mean))) + 1 } // nextSampleNoFP is similar to nextSample, but uses older, @@ -920,7 +928,7 @@ func nextSampleNoFP() int32 { } type persistentAlloc struct { - base unsafe.Pointer + base *notInHeap off uintptr } @@ -937,17 +945,17 @@ var globalAlloc struct { // // Consider marking persistentalloc'd types go:notinheap. func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { - var p unsafe.Pointer + var p *notInHeap systemstack(func() { p = persistentalloc1(size, align, sysStat) }) - return p + return unsafe.Pointer(p) } // Must run on system stack because stack growth can (re)invoke it. // See issue 9174. //go:systemstack -func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { +func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { const ( chunk = 256 << 10 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows @@ -968,7 +976,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } if size >= maxBlock { - return sysAlloc(size, sysStat) + return (*notInHeap)(sysAlloc(size, sysStat)) } mp := acquirem() @@ -981,7 +989,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } persistent.off = round(persistent.off, align) if persistent.off+size > chunk || persistent.base == nil { - persistent.base = sysAlloc(chunk, &memstats.other_sys) + persistent.base = (*notInHeap)(sysAlloc(chunk, &memstats.other_sys)) if persistent.base == nil { if persistent == &globalAlloc.persistentAlloc { unlock(&globalAlloc.mutex) @@ -990,7 +998,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } persistent.off = 0 } - p := add(persistent.base, persistent.off) + p := persistent.base.add(persistent.off) persistent.off += size releasem(mp) if persistent == &globalAlloc.persistentAlloc { @@ -1003,3 +1011,19 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { } return p } + +// notInHeap is off-heap memory allocated by a lower-level allocator +// like sysAlloc or persistentAlloc. +// +// In general, it's better to use real types marked as go:notinheap, +// but this serves as a generic type for situations where that isn't +// possible (like in the allocators). +// +// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc? +// +//go:notinheap +type notInHeap struct{} + +func (p *notInHeap) add(bytes uintptr) *notInHeap { + return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes)) +} diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index d9487eed3a..93aa56dbd5 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -46,9 +46,6 @@ func TestMemStats(t *testing.T) { } // Of the uint fields, HeapReleased, HeapIdle can be 0. // PauseTotalNs can be 0 if timer resolution is poor. - // - // TODO: Test that GCCPUFraction is <= 0.99. This currently - // fails on windows/386. (Issue #19319) fields := map[string][]func(interface{}) error{ "Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)}, "Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)}, @@ -61,7 +58,7 @@ func TestMemStats(t *testing.T) { "NextGC": {nz, le(1e10)}, "LastGC": {nz}, "PauseTotalNs": {le(1e11)}, "PauseNs": nil, "PauseEnd": nil, "NumGC": {nz, le(1e9)}, "NumForcedGC": {nz, le(1e9)}, - "GCCPUFraction": nil, "EnableGC": {eq(true)}, "DebugGC": {eq(false)}, + "GCCPUFraction": {le(0.99)}, "EnableGC": {eq(true)}, "DebugGC": {eq(false)}, "BySize": nil, } diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go index 81f05a0613..6ed655de0a 100644 --- a/src/runtime/map_test.go +++ b/src/runtime/map_test.go @@ -244,7 +244,7 @@ func testConcurrentReadsAfterGrowth(t *testing.T, useReflect bool) { numGrowStep := 250 numReader := 16 if testing.Short() { - numLoop, numGrowStep = 2, 500 + numLoop, numGrowStep = 2, 100 } for i := 0; i < numLoop; i++ { m := make(map[int]int, 0) @@ -596,6 +596,134 @@ func TestIgnoreBogusMapHint(t *testing.T) { } } +var mapSink map[int]int + +var mapBucketTests = [...]struct { + n int // n is the number of map elements + noescape int // number of expected buckets for non-escaping map + escape int // number of expected buckets for escaping map +}{ + {-(1 << 30), 1, 1}, + {-1, 1, 1}, + {0, 1, 1}, + {1, 1, 1}, + {8, 1, 1}, + {9, 2, 2}, + {13, 2, 2}, + {14, 4, 4}, + {26, 4, 4}, +} + +func TestMapBuckets(t *testing.T) { + // Test that maps of different sizes have the right number of buckets. + // Non-escaping maps with small buckets (like map[int]int) never + // have a nil bucket pointer due to starting with preallocated buckets + // on the stack. Escaping maps start with a non-nil bucket pointer if + // hint size is above bucketCnt and thereby have more than one bucket. + // These tests depend on bucketCnt and loadFactor* in hashmap.go. + t.Run("mapliteral", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := map[int]int{} + if runtime.MapBucketsPointerIsNil(localMap) { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := map[int]int{} + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("nohint", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int) + if runtime.MapBucketsPointerIsNil(localMap) { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("makemap", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int, tt.n) + if runtime.MapBucketsPointerIsNil(localMap) { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int, tt.n) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + t.Run("makemap64", func(t *testing.T) { + for _, tt := range mapBucketTests { + localMap := make(map[int]int, int64(tt.n)) + if runtime.MapBucketsPointerIsNil(localMap) { + t.Errorf("no escape: buckets pointer is nil for non-escaping map") + } + for i := 0; i < tt.n; i++ { + localMap[i] = i + } + if got := runtime.MapBucketsCount(localMap); got != tt.noescape { + t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got) + } + escapingMap := make(map[int]int, tt.n) + if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) { + t.Errorf("escape: buckets pointer is nil for n=%d buckets", count) + } + for i := 0; i < tt.n; i++ { + escapingMap[i] = i + } + if got := runtime.MapBucketsCount(escapingMap); got != tt.escape { + t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got) + } + mapSink = escapingMap + } + }) + +} + func benchmarkMapPop(b *testing.B, n int) { m := map[int]int{} for i := 0; i < b.N; i++ { @@ -617,14 +745,38 @@ func BenchmarkMapPop100(b *testing.B) { benchmarkMapPop(b, 100) } func BenchmarkMapPop1000(b *testing.B) { benchmarkMapPop(b, 1000) } func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) } +var testNonEscapingMapVariable int = 8 + func TestNonEscapingMap(t *testing.T) { n := testing.AllocsPerRun(1000, func() { + m := map[int]int{} + m[0] = 0 + }) + if n != 0 { + t.Fatalf("mapliteral: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { m := make(map[int]int) m[0] = 0 }) if n != 0 { - t.Fatalf("want 0 allocs, got %v", n) + t.Fatalf("no hint: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { + m := make(map[int]int, 8) + m[0] = 0 + }) + if n != 0 { + t.Fatalf("with small hint: want 0 allocs, got %v", n) + } + n = testing.AllocsPerRun(1000, func() { + m := make(map[int]int, testNonEscapingMapVariable) + m[0] = 0 + }) + if n != 0 { + t.Fatalf("with variable hint: want 0 allocs, got %v", n) } + } func benchmarkMapAssignInt32(b *testing.B, n int) { @@ -635,12 +787,16 @@ func benchmarkMapAssignInt32(b *testing.B, n int) { } func benchmarkMapDeleteInt32(b *testing.B, n int) { - a := make(map[int32]int) - for i := 0; i < n*b.N; i++ { - a[int32(i)] = i - } + a := make(map[int32]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := i; j < i+n; j++ { + a[int32(j)] = j + } + b.StartTimer() + } delete(a, int32(i)) } } @@ -653,12 +809,16 @@ func benchmarkMapAssignInt64(b *testing.B, n int) { } func benchmarkMapDeleteInt64(b *testing.B, n int) { - a := make(map[int64]int) - for i := 0; i < n*b.N; i++ { - a[int64(i)] = i - } + a := make(map[int64]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := i; j < i+n; j++ { + a[int64(j)] = j + } + b.StartTimer() + } delete(a, int64(i)) } } @@ -676,17 +836,23 @@ func benchmarkMapAssignStr(b *testing.B, n int) { } func benchmarkMapDeleteStr(b *testing.B, n int) { - k := make([]string, n*b.N) - for i := 0; i < n*b.N; i++ { - k[i] = strconv.Itoa(i) - } - a := make(map[string]int) - for i := 0; i < n*b.N; i++ { - a[k[i]] = i + i2s := make([]string, n) + for i := 0; i < n; i++ { + i2s[i] = strconv.Itoa(i) } + a := make(map[string]int, n) b.ResetTimer() - for i := 0; i < n*b.N; i = i + n { - delete(a, k[i]) + k := 0 + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := 0; j < n; j++ { + a[i2s[j]] = j + } + k = i + b.StartTimer() + } + delete(a, i2s[i-k]) } } @@ -705,7 +871,7 @@ func BenchmarkMapAssign(b *testing.B) { } func BenchmarkMapDelete(b *testing.B) { - b.Run("Int32", runWith(benchmarkMapDeleteInt32, 1, 2, 4)) - b.Run("Int64", runWith(benchmarkMapDeleteInt64, 1, 2, 4)) - b.Run("Str", runWith(benchmarkMapDeleteStr, 1, 2, 4)) + b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000)) + b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000)) + b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000)) } diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index 3713c50c39..e28bdb8b8d 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -182,6 +182,8 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) { func writebarrierptr_prewrite1(dst *uintptr, src uintptr) { mp := acquirem() if mp.inwb || mp.dying > 0 { + // We explicitly allow write barriers in startpanic_m, + // since we're going down anyway. Ignore them here. releasem(mp) return } @@ -237,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) { // typedmemmove copies a value of type t to dst from src. // Must be nosplit, see #16026. +// +// TODO: Perfect for go:nosplitrec since we can't have a safe point +// anywhere in the bulk barrier or memmove. +// //go:nosplit func typedmemmove(typ *_type, dst, src unsafe.Pointer) { if typ.kind&kindNoPointers == 0 { @@ -258,8 +264,8 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) { //go:linkname reflect_typedmemmove reflect.typedmemmove func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { if raceenabled { - raceWriteObjectPC(typ, dst, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) - raceReadObjectPC(typ, src, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) + raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove)) + raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove)) } if msanenabled { msanwrite(dst, typ.size) @@ -320,8 +326,12 @@ func typedslicecopy(typ *_type, dst, src slice) int { dstp := dst.array srcp := src.array + // The compiler emits calls to typedslicecopy before + // instrumentation runs, so unlike the other copying and + // assignment operations, it's not instrumented in the calling + // code and needs its own instrumentation. if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&typ)) + callerpc := getcallerpc() pc := funcPC(slicecopy) racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc) racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc) @@ -339,41 +349,13 @@ func typedslicecopy(typ *_type, dst, src slice) int { // compiler only emits calls to typedslicecopy for types with pointers, // and growslice and reflect_typedslicecopy check for pointers // before calling typedslicecopy. - if !writeBarrier.needed { - memmove(dstp, srcp, uintptr(n)*typ.size) - return n + size := uintptr(n) * typ.size + if writeBarrier.needed { + bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size) } - - systemstack(func() { - if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) { - // Overlap with src before dst. - // Copy backward, being careful not to move dstp/srcp - // out of the array they point into. - dstp = add(dstp, uintptr(n-1)*typ.size) - srcp = add(srcp, uintptr(n-1)*typ.size) - i := 0 - for { - typedmemmove(typ, dstp, srcp) - if i++; i >= n { - break - } - dstp = add(dstp, -typ.size) - srcp = add(srcp, -typ.size) - } - } else { - // Copy forward, being careful not to move dstp/srcp - // out of the array they point into. - i := 0 - for { - typedmemmove(typ, dstp, srcp) - if i++; i >= n { - break - } - dstp = add(dstp, typ.size) - srcp = add(srcp, typ.size) - } - } - }) + // See typedmemmove for a discussion of the race between the + // barrier and memmove. + memmove(dstp, srcp, size) return n } @@ -390,7 +372,7 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int { size := uintptr(n) * elemType.size if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&elemType)) + callerpc := getcallerpc() pc := funcPC(reflect_typedslicecopy) racewriterangepc(dst.array, size, callerpc, pc) racereadrangepc(src.array, size, callerpc, pc) diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 2a9f1b83e5..6e2f12db15 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -449,11 +449,6 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits return } -// prefetch the bits. -func (h heapBits) prefetch() { - prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) -} - // next returns the heapBits describing the next pointer-sized word in memory. // That is, if h describes address p, h.next() describes p+ptrSize. // Note that next does not modify h. The caller must record the result. @@ -528,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) { atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) } -// bulkBarrierPreWrite executes writebarrierptr_prewrite1 +// bulkBarrierPreWrite executes a write barrier // for every pointer slot in the memory range [src, src+size), // using pointer/scalar information from [dst, dst+size). // This executes the write barriers necessary before a memmove. // src, dst, and size must be pointer-aligned. // The range [dst, dst+size) must lie within a single object. +// It does not perform the actual writes. // // As a special case, src == 0 indicates that this is being used for a // memclr. bulkBarrierPreWrite will pass 0 for the src of each write @@ -583,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { return } + buf := &getg().m.p.ptr().wbBuf h := heapBitsForAddr(dst) if src == 0 { for i := uintptr(0); i < size; i += sys.PtrSize { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -597,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -617,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { bits = addb(bits, word/8) mask := uint8(1) << (word % 8) + buf := &getg().m.p.ptr().wbBuf for i := uintptr(0); i < size; i += sys.PtrSize { if mask == 0 { bits = addb(bits, 1) @@ -630,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { if *bits&mask != 0 { dstx := (*uintptr)(unsafe.Pointer(dst + i)) if src == 0 { - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } else { srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } } mask <<= 1 diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 96fb273337..6c24650dac 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -104,7 +104,7 @@ func freemcache(c *mcache) { // Gets a span that has a free object in it and assigns it // to be the cached span for the given sizeclass. Returns this span. -func (c *mcache) refill(spc spanClass) *mspan { +func (c *mcache) refill(spc spanClass) { _g_ := getg() _g_.m.locks++ @@ -131,7 +131,6 @@ func (c *mcache) refill(spc spanClass) *mspan { c.alloc[spc] = s _g_.m.locks-- - return s } func (c *mcache) releaseAll() { diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go index e0d234715f..23872b9a63 100644 --- a/src/runtime/mem_bsd.go +++ b/src/runtime/mem_bsd.go @@ -15,8 +15,8 @@ import ( // which prevents us from allocating more stack. //go:nosplit func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { - v := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(v) < 4096 { + v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return nil } mSysStatInc(sysStat, n) @@ -51,8 +51,8 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { return v } - p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { + p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return nil } *reserved = true @@ -76,22 +76,22 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { // to do this - we do not on other platforms. flags |= _MAP_FIXED } - p := mmap(v, n, _PROT_READ|_PROT_WRITE, flags, -1, 0) - if uintptr(p) == _ENOMEM || (GOOS == "solaris" && uintptr(p) == _sunosEAGAIN) { + p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, flags, -1, 0) + if err == _ENOMEM || (GOOS == "solaris" && err == _sunosEAGAIN) { throw("runtime: out of memory") } - if p != v { - print("runtime: address space conflict: map(", v, ") = ", p, "\n") + if p != v || err != 0 { + print("runtime: address space conflict: map(", v, ") = ", p, "(err ", err, ")\n") throw("runtime: address space conflict") } return } - p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) - if uintptr(p) == _ENOMEM || (GOOS == "solaris" && uintptr(p) == _sunosEAGAIN) { + p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) + if err == _ENOMEM || (GOOS == "solaris" && err == _sunosEAGAIN) { throw("runtime: out of memory") } - if p != v { + if p != v || err != 0 { throw("runtime: cannot map pages in arena address space") } } diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go index 3f1c4d76f3..e41452a2c0 100644 --- a/src/runtime/mem_darwin.go +++ b/src/runtime/mem_darwin.go @@ -10,8 +10,8 @@ import "unsafe" // which prevents us from allocating more stack. //go:nosplit func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { - v := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(v) < 4096 { + v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return nil } mSysStatInc(sysStat, n) @@ -40,8 +40,8 @@ func sysFault(v unsafe.Pointer, n uintptr) { func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { *reserved = true - p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { + p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return nil } return p @@ -53,11 +53,11 @@ const ( func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { mSysStatInc(sysStat, n) - p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) - if uintptr(p) == _ENOMEM { + p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) + if err == _ENOMEM { throw("runtime: out of memory") } - if p != v { + if p != v || err != 0 { throw("runtime: cannot map pages in arena address space") } } diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go index 094658de51..16f44439f1 100644 --- a/src/runtime/mem_linux.go +++ b/src/runtime/mem_linux.go @@ -41,30 +41,30 @@ func addrspace_free(v unsafe.Pointer, n uintptr) bool { return true } -func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer { - p := mmap(v, n, prot, flags, fd, offset) +func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) (unsafe.Pointer, int) { + p, err := mmap(v, n, prot, flags, fd, offset) // On some systems, mmap ignores v without // MAP_FIXED, so retry if the address space is free. if p != v && addrspace_free(v, n) { - if uintptr(p) > 4096 { + if err == 0 { munmap(p, n) } - p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) + p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) } - return p + return p, err } // Don't split the stack as this method may be invoked without a valid G, which // prevents us from allocating more stack. //go:nosplit func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { - p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { - if uintptr(p) == _EACCES { + p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { + if err == _EACCES { print("runtime: mmap: access denied\n") exit(2) } - if uintptr(p) == _EAGAIN { + if err == _EAGAIN { print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") exit(2) } @@ -186,9 +186,9 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { // if we can reserve at least 64K and check the assumption in SysMap. // Only user-mode Linux (UML) rejects these requests. if sys.PtrSize == 8 && uint64(n) > 1<<32 { - p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if p != v { - if uintptr(p) >= 4096 { + p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if p != v || err != 0 { + if err == 0 { munmap(p, 64<<10) } return nil @@ -198,8 +198,8 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { return v } - p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { + p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return nil } *reserved = true @@ -211,22 +211,22 @@ func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { // On 64-bit, we don't actually have v reserved, so tread carefully. if !reserved { - p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) == _ENOMEM { + p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err == _ENOMEM { throw("runtime: out of memory") } - if p != v { - print("runtime: address space conflict: map(", v, ") = ", p, "\n") + if p != v || err != 0 { + print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n") throw("runtime: address space conflict") } return } - p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) - if uintptr(p) == _ENOMEM { + p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) + if err == _ENOMEM { throw("runtime: out of memory") } - if p != v { + if p != v || err != 0 { throw("runtime: cannot map pages in arena address space") } } diff --git a/src/runtime/memclr_amd64p32.s b/src/runtime/memclr_amd64p32.s new file mode 100644 index 0000000000..26171bfd4a --- /dev/null +++ b/src/runtime/memclr_amd64p32.s @@ -0,0 +1,23 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8 + MOVL ptr+0(FP), DI + MOVL n+4(FP), CX + MOVQ CX, BX + ANDQ $3, BX + SHRQ $2, CX + MOVQ $0, AX + CLD + REP + STOSL + MOVQ BX, CX + REP + STOSB + // Note: we zero only 4 bytes at a time so that the tail is at most + // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. + // See issue 13160. + RET diff --git a/src/runtime/memclr_arm64.s b/src/runtime/memclr_arm64.s index 9d756bcf6d..bf954e047f 100644 --- a/src/runtime/memclr_arm64.s +++ b/src/runtime/memclr_arm64.s @@ -6,32 +6,54 @@ // void runtime·memclrNoHeapPointers(void*, uintptr) TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 - MOVD ptr+0(FP), R3 - MOVD n+8(FP), R4 - // TODO(mwhudson): this is written this way to avoid tickling - // warnings from addpool when written as AND $7, R4, R6 (see - // https://golang.org/issue/12708) - AND $~7, R4, R5 // R5 is N&~7 - SUB R5, R4, R6 // R6 is N&7 + MOVD ptr+0(FP), R0 + MOVD n+8(FP), R1 + // If size is less than 16 bytes, use tail_zero to zero what remains + CMP $16, R1 + BLT tail_zero + // Get buffer offset into 16 byte aligned address for better performance + ANDS $15, R0, ZR + BNE unaligned_to_16 +aligned_to_16: + LSR $4, R1, R2 +zero_by_16: + STP.P (ZR, ZR), 16(R0) + SUBS $1, R2, R2 + BNE zero_by_16 - CMP $0, R5 - BEQ nowords + ANDS $15, R1, R1 + BEQ ending - ADD R3, R5, R5 + // Zero buffer with size=R1 < 16 +tail_zero: + TBZ $3, R1, tail_zero_4 + MOVD.P ZR, 8(R0) -wordloop: // TODO: Optimize for unaligned ptr. - MOVD.P $0, 8(R3) - CMP R3, R5 - BNE wordloop -nowords: - CMP $0, R6 - BEQ done +tail_zero_4: + TBZ $2, R1, tail_zero_2 + MOVW.P ZR, 4(R0) - ADD R3, R6, R6 +tail_zero_2: + TBZ $1, R1, tail_zero_1 + MOVH.P ZR, 2(R0) -byteloop: - MOVBU.P $0, 1(R3) - CMP R3, R6 - BNE byteloop -done: +tail_zero_1: + TBZ $0, R1, ending + MOVB ZR, (R0) + +ending: RET + +unaligned_to_16: + MOVD R0, R2 +head_loop: + MOVBU.P ZR, 1(R0) + ANDS $15, R0, ZR + BNE head_loop + // Adjust length for what remains + SUB R2, R0, R3 + SUB R3, R1 + // If size is less than 16 bytes, use tail_zero to zero what remains + CMP $16, R1 + BLT tail_zero + B aligned_to_16 diff --git a/src/runtime/memmove_nacl_amd64p32.s b/src/runtime/memmove_amd64p32.s index 13907a90b2..13907a90b2 100644 --- a/src/runtime/memmove_nacl_amd64p32.s +++ b/src/runtime/memmove_amd64p32.s diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index 74b8753b5f..62de604e69 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -9,6 +9,7 @@ import ( "encoding/binary" "fmt" "internal/race" + "internal/testenv" . "runtime" "testing" ) @@ -88,6 +89,10 @@ func TestMemmoveAlias(t *testing.T) { } func TestMemmoveLarge0x180000(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("-short") + } + t.Parallel() if race.Enabled { t.Skip("skipping large memmove test under race detector") @@ -96,6 +101,10 @@ func TestMemmoveLarge0x180000(t *testing.T) { } func TestMemmoveOverlapLarge0x120000(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("-short") + } + t.Parallel() if race.Enabled { t.Skip("skipping large memmove test under race detector") diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go index a8729b1aa4..c11a6f15a4 100644 --- a/src/runtime/mfinal.go +++ b/src/runtime/mfinal.go @@ -461,11 +461,7 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { return } -// Mark KeepAlive as noinline so that the current compiler will ensure -// that the argument is alive at the point of the function call. -// If it were inlined, it would disappear, and there would be nothing -// keeping the argument alive. Perhaps a future compiler will recognize -// runtime.KeepAlive specially and do something more efficient. +// Mark KeepAlive as noinline so that it is easily detectable as an intrinsic. //go:noinline // KeepAlive marks its argument as currently reachable. @@ -487,4 +483,11 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { // Without the KeepAlive call, the finalizer could run at the start of // syscall.Read, closing the file descriptor before syscall.Read makes // the actual system call. -func KeepAlive(interface{}) {} +func KeepAlive(x interface{}) { + // Introduce a use of x that the compiler can't eliminate. + // This makes sure x is alive on entry. We need x to be alive + // on entry for "defer runtime.KeepAlive(x)"; see issue 21402. + if cgoAlwaysFalse { + println(x) + } +} diff --git a/src/runtime/mfinal_test.go b/src/runtime/mfinal_test.go index e9e3601de6..3ca8d31c60 100644 --- a/src/runtime/mfinal_test.go +++ b/src/runtime/mfinal_test.go @@ -241,3 +241,24 @@ var ( Foo2 = &Object2{} Foo1 = &Object1{} ) + +func TestDeferKeepAlive(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } + + // See issue 21402. + t.Parallel() + type T *int // needs to be a pointer base type to avoid tinyalloc and its never-finalized behavior. + x := new(T) + finRun := false + runtime.SetFinalizer(x, func(x *T) { + finRun = true + }) + defer runtime.KeepAlive(x) + runtime.GC() + time.Sleep(time.Second) + if finRun { + t.Errorf("finalizer ran prematurely") + } +} diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b708720322..ab90c289a5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -230,6 +230,24 @@ func setGCPercent(in int32) (out int32) { // Update pacing in response to gcpercent change. gcSetTriggerRatio(memstats.triggerRatio) unlock(&mheap_.lock) + + // If we just disabled GC, wait for any concurrent GC to + // finish so we always return with no GC running. + if in < 0 { + // Disable phase transitions. + lock(&work.sweepWaiters.lock) + if gcphase == _GCmark { + // GC is active. Wait until we reach sweeping. + gp := getg() + gp.schedlink = work.sweepWaiters.head + work.sweepWaiters.head.set(gp) + goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) + } else { + // GC isn't active. + unlock(&work.sweepWaiters.lock) + } + } + return out } @@ -299,10 +317,10 @@ const ( // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker - // is necessary when GOMAXPROCS*gcGoalUtilization is not an - // integer. The fractional worker should run until it is + // is necessary when GOMAXPROCS*gcBackgroundUtilization is not + // an integer. The fractional worker should run until it is // preempted and will be scheduled to pick up the fractional - // part of GOMAXPROCS*gcGoalUtilization. + // part of GOMAXPROCS*gcBackgroundUtilization. gcMarkWorkerFractionalMode // gcMarkWorkerIdleMode indicates that a P is running the mark @@ -396,23 +414,18 @@ type gcControllerState struct { assistBytesPerWork float64 // fractionalUtilizationGoal is the fraction of wall clock - // time that should be spent in the fractional mark worker. - // For example, if the overall mark utilization goal is 25% - // and GOMAXPROCS is 6, one P will be a dedicated mark worker - // and this will be set to 0.5 so that 50% of the time some P - // is in a fractional mark worker. This is computed at the - // beginning of each cycle. + // time that should be spent in the fractional mark worker on + // each P that isn't running a dedicated worker. + // + // For example, if the utilization goal is 25% and there are + // no dedicated workers, this will be 0.25. If there goal is + // 25%, there is one dedicated worker, and GOMAXPROCS is 5, + // this will be 0.05 to make up the missing 5%. + // + // If this is zero, no fractional workers are needed. fractionalUtilizationGoal float64 _ [sys.CacheLineSize]byte - - // fractionalMarkWorkersNeeded is the number of fractional - // mark workers that need to be started. This is either 0 or - // 1. This is potentially updated atomically at every - // scheduling point (hence it gets its own cache line). - fractionalMarkWorkersNeeded int64 - - _ [sys.CacheLineSize]byte } // startCycle resets the GC controller's state and computes estimates @@ -453,23 +466,33 @@ func (c *gcControllerState) startCycle() { memstats.next_gc = memstats.heap_live + 1024*1024 } - // Compute the total mark utilization goal and divide it among - // dedicated and fractional workers. - totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization - c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) - c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) - if c.fractionalUtilizationGoal > 0 { - c.fractionalMarkWorkersNeeded = 1 + // Compute the background mark utilization goal. In general, + // this may not come out exactly. We round the number of + // dedicated workers so that the utilization is closest to + // 25%. For small GOMAXPROCS, this would introduce too much + // error, so we add fractional workers in that case. + totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization + c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5) + utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 + const maxUtilError = 0.3 + if utilError < -maxUtilError || utilError > maxUtilError { + // Rounding put us more than 30% off our goal. With + // gcBackgroundUtilization of 25%, this happens for + // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional + // workers to compensate. + if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal { + // Too many dedicated workers. + c.dedicatedMarkWorkersNeeded-- + } + c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) } else { - c.fractionalMarkWorkersNeeded = 0 + c.fractionalUtilizationGoal = 0 } // Clear per-P state - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp { p.gcAssistTime = 0 + p.gcFractionalMarkTime = 0 } // Compute initial values for controls that are updated @@ -482,7 +505,7 @@ func (c *gcControllerState) startCycle() { work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", " workers=", c.dedicatedMarkWorkersNeeded, - "+", c.fractionalMarkWorkersNeeded, "\n") + "+", c.fractionalUtilizationGoal, "\n") } } @@ -495,47 +518,73 @@ func (c *gcControllerState) startCycle() { // is when assists are enabled and the necessary statistics are // available). func (c *gcControllerState) revise() { - // Compute the expected scan work remaining. + gcpercent := gcpercent + if gcpercent < 0 { + // If GC is disabled but we're running a forced GC, + // act like GOGC is huge for the below calculations. + gcpercent = 100000 + } + live := atomic.Load64(&memstats.heap_live) + + var heapGoal, scanWorkExpected int64 + if live <= memstats.next_gc { + // We're under the soft goal. Pace GC to complete at + // next_gc assuming the heap is in steady-state. + heapGoal = int64(memstats.next_gc) + + // Compute the expected scan work remaining. + // + // This is estimated based on the expected + // steady-state scannable heap. For example, with + // GOGC=100, only half of the scannable heap is + // expected to be live, so that's what we target. + // + // (This is a float calculation to avoid overflowing on + // 100*heap_scan.) + scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + } else { + // We're past the soft goal. Pace GC so that in the + // worst case it will complete by the hard goal. + const maxOvershoot = 1.1 + heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + + // Compute the upper bound on the scan work remaining. + scanWorkExpected = int64(memstats.heap_scan) + } + + // Compute the remaining scan work estimate. // // Note that we currently count allocations during GC as both // scannable heap (heap_scan) and scan work completed - // (scanWork), so this difference won't be changed by - // allocations during GC. - // - // This particular estimate is a strict upper bound on the - // possible remaining scan work for the current heap. - // You might consider dividing this by 2 (or by - // (100+GOGC)/100) to counter this over-estimation, but - // benchmarks show that this has almost no effect on mean - // mutator utilization, heap size, or assist time and it - // introduces the danger of under-estimating and letting the - // mutator outpace the garbage collector. - scanWorkExpected := int64(memstats.heap_scan) - c.scanWork - if scanWorkExpected < 1000 { + // (scanWork), so allocation will change this difference will + // slowly in the soft regime and not at all in the hard + // regime. + scanWorkRemaining := scanWorkExpected - c.scanWork + if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, // we can miss by a little. // // We *do* need to enforce that this is at least 1, // since marking is racy and double-scanning objects - // may legitimately make the expected scan work - // negative. - scanWorkExpected = 1000 + // may legitimately make the remaining scan work + // negative, even in the hard goal regime. + scanWorkRemaining = 1000 } // Compute the heap distance remaining. - heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live)) - if heapDistance <= 0 { + heapRemaining := heapGoal - int64(live) + if heapRemaining <= 0 { // This shouldn't happen, but if it does, avoid // dividing by zero or setting the assist negative. - heapDistance = 1 + heapRemaining = 1 } // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) - c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) + c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) + c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) } // endCycle computes the trigger ratio for the next cycle. @@ -569,7 +618,7 @@ func (c *gcControllerState) endCycle() float64 { assistDuration := nanotime() - c.markStartTime // Assume background mark hit its utilization goal. - utilization := gcGoalUtilization + utilization := gcBackgroundUtilization // Add assist utilization; avoid divide by zero. if assistDuration > 0 { utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) @@ -688,51 +737,20 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { // This P is now dedicated to marking until the end of // the concurrent mark phase. _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode + } else if c.fractionalUtilizationGoal == 0 { + // No need for fractional workers. + return nil } else { - if !decIfPositive(&c.fractionalMarkWorkersNeeded) { - // No more workers are need right now. - return nil - } - - // This P has picked the token for the fractional worker. - // Is the GC currently under or at the utilization goal? - // If so, do more work. - // - // We used to check whether doing one time slice of work - // would remain under the utilization goal, but that has the - // effect of delaying work until the mutator has run for - // enough time slices to pay for the work. During those time - // slices, write barriers are enabled, so the mutator is running slower. - // Now instead we do the work whenever we're under or at the - // utilization work and pay for it by letting the mutator run later. - // This doesn't change the overall utilization averages, but it - // front loads the GC work so that the GC finishes earlier and - // write barriers can be turned off sooner, effectively giving - // the mutator a faster machine. + // Is this P behind on the fractional utilization + // goal? // - // The old, slower behavior can be restored by setting - // gcForcePreemptNS = forcePreemptNS. - const gcForcePreemptNS = 0 - - // TODO(austin): We could fast path this and basically - // eliminate contention on c.fractionalMarkWorkersNeeded by - // precomputing the minimum time at which it's worth - // next scheduling the fractional worker. Then Ps - // don't have to fight in the window where we've - // passed that deadline and no one has started the - // worker yet. - // - // TODO(austin): Shorter preemption interval for mark - // worker to improve fairness and give this - // finer-grained control over schedule? - now := nanotime() - gcController.markStartTime - then := now + gcForcePreemptNS - timeUsed := c.fractionalMarkTime + gcForcePreemptNS - if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { - // Nope, we'd overshoot the utilization goal - atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1) + // This should be kept in sync with pollFractionalWorkerExit. + delta := nanotime() - gcController.markStartTime + if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { + // Nope. No need to run a fractional worker. return nil } + // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } @@ -745,6 +763,24 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return gp } +// pollFractionalWorkerExit returns true if a fractional mark worker +// should self-preempt. It assumes it is called from the fractional +// worker. +func pollFractionalWorkerExit() bool { + // This should be kept in sync with the fractional worker + // scheduler logic in findRunnableGCWorker. + now := nanotime() + delta := now - gcController.markStartTime + if delta <= 0 { + return true + } + p := getg().m.p.ptr() + selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) + // Add some slack to the utilization goal so that the + // fractional worker isn't behind again the instant it exits. + return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal +} + // gcSetTriggerRatio sets the trigger ratio and updates everything // derived from it: the absolute trigger, the heap goal, mark pacing, // and sweep pacing. @@ -859,9 +895,22 @@ func gcSetTriggerRatio(triggerRatio float64) { } } -// gcGoalUtilization is the goal CPU utilization for background +// gcGoalUtilization is the goal CPU utilization for // marking as a fraction of GOMAXPROCS. -const gcGoalUtilization = 0.25 +const gcGoalUtilization = 0.30 + +// gcBackgroundUtilization is the fixed CPU utilization for background +// marking. It must be <= gcGoalUtilization. The difference between +// gcGoalUtilization and gcBackgroundUtilization will be made up by +// mark assists. The scheduler will aim to use within 50% of this +// goal. +// +// Setting this to < gcGoalUtilization avoids saturating the trigger +// feedback controller when there are no assists, which allows it to +// better control CPU and heap growth. However, the larger the gap, +// the more mutator assists are expected to happen, which impact +// mutator latency. +const gcBackgroundUtilization = 0.25 // gcCreditSlack is the amount of scan work credit that can can // accumulate locally before updating gcController.scanWork and, @@ -1238,7 +1287,7 @@ func gcStart(mode gcMode, trigger gcTrigger) { } } - // Ok, we're doing it! Stop everybody else + // Ok, we're doing it! Stop everybody else semacquire(&worldsema) if trace.enabled { @@ -1251,7 +1300,12 @@ func gcStart(mode gcMode, trigger gcTrigger) { gcResetMarkState() - work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs + work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs + if work.stwprocs > ncpu { + // This is used to compute CPU time of the STW phases, + // so it can't be more than ncpu, even if GOMAXPROCS is. + work.stwprocs = ncpu + } work.heap0 = atomic.Load64(&memstats.heap_live) work.pauseNS = 0 work.mode = mode @@ -1259,6 +1313,9 @@ func gcStart(mode gcMode, trigger gcTrigger) { now := nanotime() work.tSweepTerm = now work.pauseStart = now + if trace.enabled { + traceGCSTWStart(1) + } systemstack(stopTheWorldWithSema) // Finish sweep before we start concurrent scan. systemstack(func() { @@ -1311,11 +1368,17 @@ func gcStart(mode gcMode, trigger gcTrigger) { gcController.markStartTime = now // Concurrent mark. - systemstack(startTheWorldWithSema) - now = nanotime() + systemstack(func() { + now = startTheWorldWithSema(trace.enabled) + }) work.pauseNS += now - work.pauseStart work.tMark = now } else { + if trace.enabled { + // Switch to mark termination STW. + traceGCSTWDone() + traceGCSTWStart(0) + } t := nanotime() work.tMark, work.tMarkTerm = t, t work.heapGoal = work.heap0 @@ -1358,7 +1421,8 @@ top: // TODO(austin): Should dedicated workers keep an eye on this // and exit gcDrain promptly? atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) + prevFractionalGoal := gcController.fractionalUtilizationGoal + gcController.fractionalUtilizationGoal = 0 if !gcBlackenPromptly { // Transition from mark 1 to mark 2. @@ -1385,6 +1449,7 @@ top: // workers have exited their loop so we can // start new mark 2 workers. forEachP(func(_p_ *p) { + wbBufFlush1(_p_) _p_.gcw.dispose() }) }) @@ -1401,7 +1466,7 @@ top: // Now we can start up mark 2 workers. atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) + gcController.fractionalUtilizationGoal = prevFractionalGoal incnwait := atomic.Xadd(&work.nwait, +1) if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { @@ -1416,6 +1481,9 @@ top: work.tMarkTerm = now work.pauseStart = now getg().m.preemptoff = "gcing" + if trace.enabled { + traceGCSTWStart(0) + } systemstack(stopTheWorldWithSema) // The gcphase is _GCmark, it will transition to _GCmarktermination // below. The important thing is that the wb remains active until @@ -1576,7 +1644,7 @@ func gcMarkTermination(nextTriggerRatio float64) { // so events don't leak into the wrong cycle. mProf_NextCycle() - systemstack(startTheWorldWithSema) + systemstack(func() { startTheWorldWithSema(true) }) // Flush the heap profile so we can start a new cycle next GC. // This is relatively expensive, so we don't do it with the @@ -1650,10 +1718,7 @@ func gcMarkTermination(nextTriggerRatio float64) { func gcBgMarkStartWorkers() { // Background marking is performed by per-P G's. Ensure that // each P has a background GC G. - for _, p := range &allp { - if p == nil || p.status == _Pdead { - break - } + for _, p := range allp { if p.gcBgMarkWorker == 0 { go gcBgMarkWorker(p) notetsleepg(&work.bgMarkReady, -1) @@ -1753,6 +1818,7 @@ func gcBgMarkWorker(_p_ *p) { } startTime := nanotime() + _p_.gcMarkWorkerStartTime = startTime decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { @@ -1794,7 +1860,7 @@ func gcBgMarkWorker(_p_ *p) { // without preemption. gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: - gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) } @@ -1819,7 +1885,7 @@ func gcBgMarkWorker(_p_ *p) { atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) + atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } @@ -1917,10 +1983,6 @@ func gcMark(start_time int64) { work.helperDrainBlock = true } - if trace.enabled { - traceGCScanStart() - } - if work.nproc > 1 { noteclear(&work.alldone) helpgc(int32(work.nproc)) @@ -1954,8 +2016,8 @@ func gcMark(start_time int64) { // Double-check that all gcWork caches are empty. This should // be ensured by mark 2 before we enter mark termination. - for i := 0; i < int(gomaxprocs); i++ { - gcw := &allp[i].gcw + for _, p := range allp { + gcw := &p.gcw if !gcw.empty() { throw("P has cached GC work at end of mark termination") } @@ -1964,10 +2026,6 @@ func gcMark(start_time int64) { } } - if trace.enabled { - traceGCScanDone() - } - cachestats() // Update the marked heap stat. @@ -2097,18 +2155,19 @@ func clearpools() { unlock(&sched.deferlock) } -// Timing - -//go:nowritebarrier +// gchelper runs mark termination tasks on Ps other than the P +// coordinating mark termination. +// +// The caller is responsible for ensuring that this has a P to run on, +// even though it's running during STW. Because of this, it's allowed +// to have write barriers. +// +//go:yeswritebarrierrec func gchelper() { _g_ := getg() _g_.m.traceback = 2 gchelperstart() - if trace.enabled { - traceGCScanStart() - } - // Parallel mark over GC roots and heap if gcphase == _GCmarktermination { gcw := &_g_.m.p.ptr().gcw @@ -2120,10 +2179,6 @@ func gchelper() { gcw.dispose() } - if trace.enabled { - traceGCScanDone() - } - nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone if atomic.Xadd(&work.ndone, +1) == nproc-1 { notewakeup(&work.alldone) @@ -2142,6 +2197,8 @@ func gchelperstart() { } } +// Timing + // itoaDiv formats val/(10**dec) into buf. func itoaDiv(buf []byte, val uint64, dec int) []byte { i := len(buf) - 1 diff --git a/src/runtime/mgclarge.go b/src/runtime/mgclarge.go index 757e88d1d9..fe437bf5e8 100644 --- a/src/runtime/mgclarge.go +++ b/src/runtime/mgclarge.go @@ -164,11 +164,10 @@ func (root *mTreap) insert(span *mspan) { } } -func (root *mTreap) removeNode(t *treapNode) *mspan { +func (root *mTreap) removeNode(t *treapNode) { if t.spanKey.npages != t.npagesKey { throw("span and treap node npages do not match") } - result := t.spanKey // Rotate t down to be leaf of tree for removal, respecting priorities. for t.right != nil || t.left != nil { @@ -192,7 +191,6 @@ func (root *mTreap) removeNode(t *treapNode) *mspan { t.spanKey = nil t.npagesKey = 0 mheap_.treapalloc.free(unsafe.Pointer(t)) - return result } // remove searches for, finds, removes from the treap, and returns the smallest diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 9029d19d43..5664390eae 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -34,13 +34,13 @@ const ( // span base. maxObletBytes = 128 << 10 - // idleCheckThreshold specifies how many units of work to do - // between run queue checks in an idle worker. Assuming a scan + // drainCheckThreshold specifies how many units of work to do + // between self-preemption checks in gcDrain. Assuming a scan // rate of 1 MB/ms, this is ~100 µs. Lower values have higher // overhead in the scan loop (the scheduler check may perform // a syscall, so its overhead is nontrivial). Higher values // make the system less responsive to incoming work. - idleCheckThreshold = 100000 + drainCheckThreshold = 100000 ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and @@ -770,6 +770,13 @@ func scanstack(gp *g, gcw *gcWork) { shrinkstack(gp) } + // Scan the saved context register. This is effectively a live + // register that gets moved back and forth between the + // register and sched.ctxt without a write barrier. + if gp.sched.ctxt != nil { + scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw) + } + // Scan the stack. var cache pcvalueCache scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { @@ -861,6 +868,7 @@ const ( gcDrainNoBlock gcDrainFlushBgCredit gcDrainIdle + gcDrainFractional // gcDrainBlock means neither gcDrainUntilPreempt or // gcDrainNoBlock. It is the default, but callers should use @@ -877,6 +885,10 @@ const ( // If flags&gcDrainIdle != 0, gcDrain returns when there is other work // to do. This implies gcDrainNoBlock. // +// If flags&gcDrainFractional != 0, gcDrain self-preempts when +// pollFractionalWorkerExit() returns true. This implies +// gcDrainNoBlock. +// // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is // unable to get more work. Otherwise, it will block until all // blocking calls are blocked in gcDrain. @@ -893,14 +905,24 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gp := getg().m.curg preemptible := flags&gcDrainUntilPreempt != 0 - blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0 + blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0 flushBgCredit := flags&gcDrainFlushBgCredit != 0 idle := flags&gcDrainIdle != 0 initScanWork := gcw.scanWork - // idleCheck is the scan work at which to perform the next - // idle check with the scheduler. - idleCheck := initScanWork + idleCheckThreshold + + // checkWork is the scan work before performing the next + // self-preempt check. + checkWork := int64(1<<63 - 1) + var check func() bool + if flags&(gcDrainIdle|gcDrainFractional) != 0 { + checkWork = initScanWork + drainCheckThreshold + if idle { + check = pollWork + } else if flags&gcDrainFractional != 0 { + check = pollFractionalWorkerExit + } + } // Drain root marking jobs. if work.markrootNext < work.markrootJobs { @@ -910,7 +932,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { break } markroot(gcw, job) - if idle && pollWork() { + if check != nil && check() { goto done } } @@ -951,12 +973,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gcFlushBgCredit(gcw.scanWork - initScanWork) initScanWork = 0 } - idleCheck -= gcw.scanWork + checkWork -= gcw.scanWork gcw.scanWork = 0 - if idle && idleCheck <= 0 { - idleCheck += idleCheckThreshold - if pollWork() { + if checkWork <= 0 { + checkWork += drainCheckThreshold + if check != nil && check() { break } } @@ -1212,6 +1234,9 @@ func shade(b uintptr) { // obj is the start of an object with mark mbits. // If it isn't already marked, mark it and enqueue into gcw. // base and off are for debugging only and could be removed. +// +// See also wbBufFlush1, which partially duplicates this logic. +// //go:nowritebarrierrec func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr) { // obj should be start of allocation, and so must be at least pointer-aligned. @@ -1356,10 +1381,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) { // // The world must be stopped. func gcMarkTinyAllocs() { - for _, p := range &allp { - if p == nil || p.status == _Pdead { - break - } + for _, p := range allp { c := p.mcache if c == nil || c.tiny == 0 { continue diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index 461679b934..c6634fc78c 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -85,6 +85,13 @@ type gcWork struct { scanWork int64 } +// Most of the methods of gcWork are go:nowritebarrierrec because the +// write barrier itself can invoke gcWork methods but the methods are +// not generally re-entrant. Hence, if a gcWork method invoked the +// write barrier while the gcWork was in an inconsistent state, and +// the write barrier in turn invoked a gcWork method, it could +// permanently corrupt the gcWork. + func (w *gcWork) init() { w.wbuf1 = getempty() wbuf2 := trygetfull() @@ -96,7 +103,7 @@ func (w *gcWork) init() { // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { flushed := false wbuf := w.wbuf1 @@ -129,7 +136,7 @@ func (w *gcWork) put(obj uintptr) { // putFast does a put and returns true if it can be done quickly // otherwise it returns false and the caller needs to call put. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { wbuf := w.wbuf1 if wbuf == nil { @@ -143,12 +150,45 @@ func (w *gcWork) putFast(obj uintptr) bool { return true } +// putBatch performs a put on every pointer in obj. See put for +// constraints on these pointers. +// +//go:nowritebarrierrec +func (w *gcWork) putBatch(obj []uintptr) { + if len(obj) == 0 { + return + } + + flushed := false + wbuf := w.wbuf1 + if wbuf == nil { + w.init() + wbuf = w.wbuf1 + } + + for len(obj) > 0 { + for wbuf.nobj == len(wbuf.obj) { + putfull(wbuf) + w.wbuf1, w.wbuf2 = w.wbuf2, getempty() + wbuf = w.wbuf1 + flushed = true + } + n := copy(wbuf.obj[wbuf.nobj:], obj) + wbuf.nobj += n + obj = obj[n:] + } + + if flushed && gcphase == _GCmark { + gcController.enlistWorker() + } +} + // tryGet dequeues a pointer for the garbage collector to trace. // // If there are no pointers remaining in this gcWork or in the global // queue, tryGet returns 0. Note that there may still be pointers in // other gcWork instances or other caches. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) tryGet() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -177,7 +217,7 @@ func (w *gcWork) tryGet() uintptr { // tryGetFast dequeues a pointer for the garbage collector to trace // if one is readily available. Otherwise it returns 0 and // the caller is expected to call tryGet(). -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) tryGetFast() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -194,7 +234,7 @@ func (w *gcWork) tryGetFast() uintptr { // get dequeues a pointer for the garbage collector to trace, blocking // if necessary to ensure all pointers from all queues and caches have // been retrieved. get returns 0 if there are no pointers remaining. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) get() uintptr { wbuf := w.wbuf1 if wbuf == nil { @@ -228,7 +268,7 @@ func (w *gcWork) get() uintptr { // GC can inspect them. This helps reduce the mutator's // ability to hide pointers during the concurrent mark phase. // -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) dispose() { if wbuf := w.wbuf1; wbuf != nil { if wbuf.nobj == 0 { @@ -262,7 +302,7 @@ func (w *gcWork) dispose() { // balance moves some work that's cached in this gcWork back on the // global queue. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) balance() { if w.wbuf1 == nil { return @@ -282,7 +322,7 @@ func (w *gcWork) balance() { } // empty returns true if w has no mark work available. -//go:nowritebarrier +//go:nowritebarrierrec func (w *gcWork) empty() bool { return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0) } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 893587e5d2..12cf29a01d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -56,6 +56,12 @@ type mheap struct { // Internal pages map to an arbitrary span. // For pages that have never been allocated, spans entries are nil. // + // Modifications are protected by mheap.lock. Reads can be + // performed without locking, but ONLY from indexes that are + // known to contain in-use or stack spans. This means there + // must not be a safe-point between establishing that an + // address is live and looking it up in the spans array. + // // This is backed by a reserved region of the address space so // it can grow without moving. The memory up to len(spans) is // mapped. cap(spans) indicates the total reserved memory. @@ -154,6 +160,8 @@ type mheap struct { specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for special record allocators. + + unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF } var mheap_ mheap @@ -311,6 +319,17 @@ func (s *mspan) layout() (size, n, total uintptr) { return } +// recordspan adds a newly allocated span to h.allspans. +// +// This only happens the first time a span is allocated from +// mheap.spanalloc (it is not called when a span is reused). +// +// Write barriers are disallowed here because it can be called from +// gcWork when allocating new workbufs. However, because it's an +// indirect call from the fixalloc initializer, the compiler can't see +// this. +// +//go:nowritebarrierrec func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { h := (*mheap)(vh) s := (*mspan)(p) @@ -331,12 +350,13 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { copy(new, h.allspans) } oldAllspans := h.allspans - h.allspans = new + *(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new)) if len(oldAllspans) != 0 { sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys) } } - h.allspans = append(h.allspans, s) + h.allspans = h.allspans[:len(h.allspans)+1] + h.allspans[len(h.allspans)-1] = s } // A spanClass represents the size class and noscan-ness of a span. @@ -857,7 +877,7 @@ HaveSpan: // Large spans have a minimum size of 1MByte. The maximum number of large spans to support // 1TBytes is 1 million, experimentation using random sizes indicates that the depth of // the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced -// by a perfectly balanced tree with a a depth of 20. Twice that is an acceptable 40. +// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40. func (h *mheap) isLargeSpan(npages uintptr) bool { return npages >= uintptr(len(h.free)) } @@ -1123,34 +1143,35 @@ func scavengelist(list *mSpanList, now, limit uint64) uintptr { var sumreleased uintptr for s := list.first; s != nil; s = s.next { - if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages { - start := s.base() - end := start + s.npages<<_PageShift - if physPageSize > _PageSize { - // We can only release pages in - // physPageSize blocks, so round start - // and end in. (Otherwise, madvise - // will round them *out* and release - // more memory than we want.) - start = (start + physPageSize - 1) &^ (physPageSize - 1) - end &^= physPageSize - 1 - if end <= start { - // start and end don't span a - // whole physical page. - continue - } - } - len := end - start - - released := len - (s.npreleased << _PageShift) - if physPageSize > _PageSize && released == 0 { + if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages { + continue + } + start := s.base() + end := start + s.npages<<_PageShift + if physPageSize > _PageSize { + // We can only release pages in + // physPageSize blocks, so round start + // and end in. (Otherwise, madvise + // will round them *out* and release + // more memory than we want.) + start = (start + physPageSize - 1) &^ (physPageSize - 1) + end &^= physPageSize - 1 + if end <= start { + // start and end don't span a + // whole physical page. continue } - memstats.heap_released += uint64(released) - sumreleased += released - s.npreleased = len >> _PageShift - sysUnused(unsafe.Pointer(start), len) } + len := end - start + + released := len - (s.npreleased << _PageShift) + if physPageSize > _PageSize && released == 0 { + continue + } + memstats.heap_released += uint64(released) + sumreleased += released + s.npreleased = len >> _PageShift + sysUnused(unsafe.Pointer(start), len) } return sumreleased } diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index d15f1f7346..fb7cbc28fd 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -70,7 +70,7 @@ func zeroAMD64(w io.Writer) { fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)") fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)") fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)") - fmt.Fprintln(w, "\tADDQ\t$64,DI") + fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags fmt.Fprintln(w) } fmt.Fprintln(w, "\tRET") @@ -151,12 +151,13 @@ func copyARM(w io.Writer) { func zeroARM64(w io.Writer) { // ZR: always zero - // R16 (aka REGRT1): ptr to memory to be zeroed - 8 + // R16 (aka REGRT1): ptr to memory to be zeroed // On return, R16 points to the last zeroed dword. fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0") - for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)") + for i := 0; i < 63; i++ { + fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)") } + fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)") fmt.Fprintln(w, "\tRET") } diff --git a/src/runtime/mksizeclasses.go b/src/runtime/mksizeclasses.go index 0cb2b33a8c..b146dbcd6c 100644 --- a/src/runtime/mksizeclasses.go +++ b/src/runtime/mksizeclasses.go @@ -24,8 +24,8 @@ // In practice, only one of the wastes comes into play for a // given size (sizes < 512 waste mainly on the round-up, // sizes > 512 waste mainly on the page chopping). -// -// TODO(rsc): Compute max waste for any given size. +// For really small sizes, alignment constraints force the +// overhead higher. package main @@ -242,15 +242,18 @@ nextk: } func printComment(w io.Writer, classes []class) { - fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-11s\n", "class", "bytes/obj", "bytes/span", "objects", "waste bytes") + fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste") + prevSize := 0 for i, c := range classes { if i == 0 { continue } spanSize := c.npages * pageSize objects := spanSize / c.size - waste := spanSize - c.size*(spanSize/c.size) - fmt.Fprintf(w, "// %5d %9d %10d %7d %11d\n", i, c.size, spanSize, objects, waste) + tailWaste := spanSize - c.size*(spanSize/c.size) + maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize) + prevSize = c.size + fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%%\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste) } fmt.Fprintf(w, "\n") } diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go index 62f3780db8..e1333c62fe 100644 --- a/src/runtime/mmap.go +++ b/src/runtime/mmap.go @@ -16,7 +16,8 @@ import "unsafe" // We only pass the lower 32 bits of file offset to the // assembly routine; the higher bits (if required), should be provided // by the assembly routine as 0. -func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer +// The err result is an OS error code such as ENOMEM. +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) // munmap calls the munmap system call. It is implemented in assembly. func munmap(addr unsafe.Pointer, n uintptr) diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 2bd09b6a26..259473c9ce 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -596,7 +596,7 @@ func record(r *MemProfileRecord, b *bucket) { r.AllocObjects = int64(mp.active.allocs) r.FreeObjects = int64(mp.active.frees) if raceenabled { - racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(unsafe.Pointer(&r)), funcPC(MemProfile)) + racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(MemProfile)) } if msanenabled { msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) @@ -644,7 +644,7 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { r.Count = bp.count r.Cycles = bp.cycles if raceenabled { - racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(unsafe.Pointer(&p)), funcPC(BlockProfile)) + racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(BlockProfile)) } if msanenabled { msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) @@ -741,7 +741,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { // Save current goroutine. sp := getcallersp(unsafe.Pointer(&p)) - pc := getcallerpc(unsafe.Pointer(&p)) + pc := getcallerpc() systemstack(func() { saveg(pc, sp, gp, &r[0]) }) @@ -786,7 +786,7 @@ func Stack(buf []byte, all bool) int { if len(buf) > 0 { gp := getg() sp := getcallersp(unsafe.Pointer(&buf)) - pc := getcallerpc(unsafe.Pointer(&buf)) + pc := getcallerpc() systemstack(func() { g0 := getg() // Force traceback=1 to override GOTRACEBACK setting, @@ -826,7 +826,7 @@ func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) { } if gp.m.curg == nil || gp == gp.m.curg { goroutineheader(gp) - pc := getcallerpc(unsafe.Pointer(&p)) + pc := getcallerpc() sp := getcallersp(unsafe.Pointer(&p)) systemstack(func() { traceback(pc, sp, 0, gp) @@ -846,7 +846,7 @@ func tracefree(p unsafe.Pointer, size uintptr) { gp.m.traceback = 2 print("tracefree(", p, ", ", hex(size), ")\n") goroutineheader(gp) - pc := getcallerpc(unsafe.Pointer(&p)) + pc := getcallerpc() sp := getcallersp(unsafe.Pointer(&p)) systemstack(func() { traceback(pc, sp, 0, gp) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 1cb44a15dd..53caefc0fe 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -589,12 +589,13 @@ func updatememstats() { memstats.heap_objects = memstats.nmalloc - memstats.nfree } +// cachestats flushes all mcache stats. +// +// The world must be stopped. +// //go:nowritebarrier func cachestats() { - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp { c := p.mcache if c == nil { continue @@ -610,9 +611,6 @@ func cachestats() { //go:nowritebarrier func flushmcache(i int) { p := allp[i] - if p == nil { - return - } c := p.mcache if c == nil { return @@ -666,7 +664,7 @@ func purgecachedstats(c *mcache) { // overflow errors. //go:nosplit func mSysStatInc(sysStat *uint64, n uintptr) { - if sys.BigEndian != 0 { + if sys.BigEndian { atomic.Xadd64(sysStat, int64(n)) return } @@ -680,7 +678,7 @@ func mSysStatInc(sysStat *uint64, n uintptr) { // mSysStatInc apply. //go:nosplit func mSysStatDec(sysStat *uint64, n uintptr) { - if sys.BigEndian != 0 { + if sys.BigEndian { atomic.Xadd64(sysStat, -int64(n)) return } diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go new file mode 100644 index 0000000000..2c06996210 --- /dev/null +++ b/src/runtime/mwbbuf.go @@ -0,0 +1,248 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This implements the write barrier buffer. The write barrier itself +// is gcWriteBarrier and is implemented in assembly. +// +// The write barrier has a fast path and a slow path. The fast path +// simply enqueues to a per-P write barrier buffer. It's written in +// assembly and doesn't clobber any general purpose registers, so it +// doesn't have the usual overheads of a Go call. +// +// When the buffer fills up, the write barrier invokes the slow path +// (wbBufFlush) to flush the buffer to the GC work queues. In this +// path, since the compiler didn't spill registers, we spill *all* +// registers and disallow any GC safe points that could observe the +// stack frame (since we don't know the types of the spilled +// registers). + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// testSmallBuf forces a small write barrier buffer to stress write +// barrier flushing. +const testSmallBuf = false + +// wbBuf is a per-P buffer of pointers queued by the write barrier. +// This buffer is flushed to the GC workbufs when it fills up and on +// various GC transitions. +// +// This is closely related to a "sequential store buffer" (SSB), +// except that SSBs are usually used for maintaining remembered sets, +// while this is used for marking. +type wbBuf struct { + // next points to the next slot in buf. It must not be a + // pointer type because it can point past the end of buf and + // must be updated without write barriers. + // + // This is a pointer rather than an index to optimize the + // write barrier assembly. + next uintptr + + // end points to just past the end of buf. It must not be a + // pointer type because it points past the end of buf and must + // be updated without write barriers. + end uintptr + + // buf stores a series of pointers to execute write barriers + // on. This must be a multiple of wbBufEntryPointers because + // the write barrier only checks for overflow once per entry. + buf [wbBufEntryPointers * wbBufEntries]uintptr +} + +const ( + // wbBufEntries is the number of write barriers between + // flushes of the write barrier buffer. + // + // This trades latency for throughput amortization. Higher + // values amortize flushing overhead more, but increase the + // latency of flushing. Higher values also increase the cache + // footprint of the buffer. + // + // TODO: What is the latency cost of this? Tune this value. + wbBufEntries = 256 + + // wbBufEntryPointers is the number of pointers added to the + // buffer by each write barrier. + wbBufEntryPointers = 2 +) + +// reset empties b by resetting its next and end pointers. +func (b *wbBuf) reset() { + start := uintptr(unsafe.Pointer(&b.buf[0])) + b.next = start + if gcBlackenPromptly || writeBarrier.cgo { + // Effectively disable the buffer by forcing a flush + // on every barrier. + b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) + } else if testSmallBuf { + // For testing, allow two barriers in the buffer. If + // we only did one, then barriers of non-heap pointers + // would be no-ops. This lets us combine a buffered + // barrier with a flush at a later time. + b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers])) + } else { + b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0]) + } + + if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 { + throw("bad write barrier buffer bounds") + } +} + +// putFast adds old and new to the write barrier buffer and returns +// false if a flush is necessary. Callers should use this as: +// +// buf := &getg().m.p.ptr().wbBuf +// if !buf.putFast(old, new) { +// wbBufFlush(...) +// } +// +// The arguments to wbBufFlush depend on whether the caller is doing +// its own cgo pointer checks. If it is, then this can be +// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and +// new. +// +// Since buf is a per-P resource, the caller must ensure there are no +// preemption points while buf is in use. +// +// It must be nowritebarrierrec to because write barriers here would +// corrupt the write barrier buffer. It (and everything it calls, if +// it called anything) has to be nosplit to avoid scheduling on to a +// different P and a different buffer. +// +//go:nowritebarrierrec +//go:nosplit +func (b *wbBuf) putFast(old, new uintptr) bool { + p := (*[2]uintptr)(unsafe.Pointer(b.next)) + p[0] = old + p[1] = new + b.next += 2 * sys.PtrSize + return b.next != b.end +} + +// wbBufFlush flushes the current P's write barrier buffer to the GC +// workbufs. It is passed the slot and value of the write barrier that +// caused the flush so that it can implement cgocheck. +// +// This must not have write barriers because it is part of the write +// barrier implementation. +// +// This and everything it calls must be nosplit because 1) the stack +// contains untyped slots from gcWriteBarrier and 2) there must not be +// a GC safe point between the write barrier test in the caller and +// flushing the buffer. +// +// TODO: A "go:nosplitrec" annotation would be perfect for this. +// +//go:nowritebarrierrec +//go:nosplit +func wbBufFlush(dst *uintptr, src uintptr) { + if getg().m.dying > 0 { + // We're going down. Not much point in write barriers + // and this way we can allow write barriers in the + // panic path. + return + } + + if writeBarrier.cgo && dst != nil { + // This must be called from the stack that did the + // write. It's nosplit all the way down. + cgoCheckWriteBarrier(dst, src) + if !writeBarrier.needed { + // We were only called for cgocheck. + b := &getg().m.p.ptr().wbBuf + b.next = uintptr(unsafe.Pointer(&b.buf[0])) + return + } + } + + // Switch to the system stack so we don't have to worry about + // the untyped stack slots or safe points. + systemstack(func() { + wbBufFlush1(getg().m.p.ptr()) + }) +} + +// wbBufFlush1 flushes p's write barrier buffer to the GC work queue. +// +// This must not have write barriers because it is part of the write +// barrier implementation, so this may lead to infinite loops or +// buffer corruption. +// +// This must be non-preemptible because it uses the P's workbuf. +// +//go:nowritebarrierrec +//go:systemstack +func wbBufFlush1(_p_ *p) { + // Get the buffered pointers. + start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0])) + n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0]) + ptrs := _p_.wbBuf.buf[:n] + + // Reset the buffer. + _p_.wbBuf.reset() + + if useCheckmark { + // Slow path for checkmark mode. + for _, ptr := range ptrs { + shade(ptr) + } + return + } + + // Mark all of the pointers in the buffer and record only the + // pointers we greyed. We use the buffer itself to temporarily + // record greyed pointers. + // + // TODO: Should scanobject/scanblock just stuff pointers into + // the wbBuf? Then this would become the sole greying path. + gcw := &_p_.gcw + pos := 0 + arenaStart := mheap_.arena_start + for _, ptr := range ptrs { + if ptr < arenaStart { + // nil pointers are very common, especially + // for the "old" values. Filter out these and + // other "obvious" non-heap pointers ASAP. + // + // TODO: Should we filter out nils in the fast + // path to reduce the rate of flushes? + continue + } + // TODO: This doesn't use hbits, so calling + // heapBitsForObject seems a little silly. We could + // easily separate this out since heapBitsForObject + // just calls heapBitsForAddr(obj) to get hbits. + obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0) + if obj == 0 { + continue + } + // TODO: Consider making two passes where the first + // just prefetches the mark bits. + mbits := span.markBitsForIndex(objIndex) + if mbits.isMarked() { + continue + } + mbits.setMarked() + if span.spanclass.noscan() { + gcw.bytesMarked += uint64(span.elemsize) + continue + } + ptrs[pos] = obj + pos++ + } + + // Enqueue the greyed objects. + gcw.putBatch(ptrs[:pos]) + if gcphase == _GCmarktermination || gcBlackenPromptly { + // Ps aren't allowed to cache work during mark + // termination. + gcw.dispose() + } +} diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go index 71de98bcd6..4d5d1a4ea8 100644 --- a/src/runtime/netpoll_kqueue.go +++ b/src/runtime/netpoll_kqueue.go @@ -88,10 +88,23 @@ retry: for i := 0; i < int(n); i++ { ev := &events[i] var mode int32 - if ev.filter == _EVFILT_READ { + switch ev.filter { + case _EVFILT_READ: mode += 'r' - } - if ev.filter == _EVFILT_WRITE { + + // On some systems when the read end of a pipe + // is closed the write end will not get a + // _EVFILT_WRITE event, but will get a + // _EVFILT_READ event with EV_EOF set. + // Note that setting 'w' here just means that we + // will wake up a goroutine waiting to write; + // that goroutine will try the write again, + // and the appropriate thing will happen based + // on what that write returns (success, EPIPE, EAGAIN). + if ev.flags&_EV_EOF != 0 { + mode += 'w' + } + case _EVFILT_WRITE: mode += 'w' } if mode != 0 { diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go index 79dafb0279..134071f5e3 100644 --- a/src/runtime/netpoll_windows.go +++ b/src/runtime/netpoll_windows.go @@ -47,7 +47,7 @@ func netpolldescriptor() uintptr { func netpollopen(fd uintptr, pd *pollDesc) int32 { if stdcall4(_CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0 { - return -int32(getlasterror()) + return int32(getlasterror()) } return 0 } diff --git a/src/runtime/os3_plan9.go b/src/runtime/os3_plan9.go index 5d4b5a6698..3b65a2c9ba 100644 --- a/src/runtime/os3_plan9.go +++ b/src/runtime/os3_plan9.go @@ -153,3 +153,6 @@ func setThreadCPUProfiler(hz int32) { // TODO: Enable profiling interrupts. getg().m.profilehz = hz } + +// gsignalStack is unused on Plan 9. +type gsignalStack struct{} diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go index 067fb3bb0a..c53f6132ee 100644 --- a/src/runtime/os3_solaris.go +++ b/src/runtime/os3_solaris.go @@ -181,6 +181,12 @@ func newosproc(mp *m, _ unsafe.Pointer) { } } +func exitThread(wait *uint32) { + // We should never reach exitThread on Solaris because we let + // libc clean up threads. + throw("exitThread") +} + var urandom_dev = []byte("/dev/urandom\x00") //go:nosplit @@ -396,12 +402,12 @@ func madvise(addr unsafe.Pointer, n uintptr, flags int32) { } //go:nosplit -func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer { +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) { p, err := doMmap(uintptr(addr), n, uintptr(prot), uintptr(flags), uintptr(fd), uintptr(off)) if p == ^uintptr(0) { - return unsafe.Pointer(err) + return nil, int(err) } - return unsafe.Pointer(p) + return unsafe.Pointer(p), 0 } //go:nosplit diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index 15281674ae..4ab5a76373 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -135,7 +135,7 @@ func newosproc(mp *m, stk unsafe.Pointer) { // not safe to use after initialization as it does not pass an M as fnarg. // //go:nosplit -func newosproc0(stacksize uintptr, fn unsafe.Pointer, fnarg uintptr) { +func newosproc0(stacksize uintptr, fn uintptr) { stack := sysAlloc(stacksize, &memstats.stacks_sys) if stack == nil { write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack))) @@ -145,7 +145,7 @@ func newosproc0(stacksize uintptr, fn unsafe.Pointer, fnarg uintptr) { var oset sigset sigprocmask(_SIG_SETMASK, &sigset_all, &oset) - errno := bsdthread_create(stk, fn, fnarg) + errno := bsdthread_create(stk, nil, fn) sigprocmask(_SIG_SETMASK, &oset, nil) if errno < 0 { @@ -188,7 +188,11 @@ func minit() { // Called from dropm to undo the effect of an minit. //go:nosplit func unminit() { - unminitSignals() + // The alternate signal stack is buggy on arm and arm64. + // See minit. + if GOARCH != "arm" && GOARCH != "arm64" { + unminitSignals() + } } // Mach IPC, to get at semaphores diff --git a/src/runtime/os_darwin_arm64.go b/src/runtime/os_darwin_arm64.go index 01285afa19..8de132d8e2 100644 --- a/src/runtime/os_darwin_arm64.go +++ b/src/runtime/os_darwin_arm64.go @@ -4,8 +4,6 @@ package runtime -var supportCRC32 = false - //go:nosplit func cputicks() int64 { // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand(). diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go index 7c989de109..31708e2454 100644 --- a/src/runtime/os_freebsd.go +++ b/src/runtime/os_freebsd.go @@ -69,15 +69,19 @@ func sysctlnametomib(name []byte, mib *[_CTL_MAXNAME]uint32) uint32 { } const ( - _CPU_SETSIZE_MAX = 32 // Limited by _MaxGomaxprocs(256) in runtime2.go. _CPU_CURRENT_PID = -1 // Current process ID. ) //go:noescape func cpuset_getaffinity(level int, which int, id int64, size int, mask *byte) int32 +//go:systemstack func getncpu() int32 { - var mask [_CPU_SETSIZE_MAX]byte + // Use a large buffer for the CPU mask. We're on the system + // stack, so this is fine, and we can't allocate memory for a + // dynamically-sized buffer at this point. + const maxCPUs = 64 * 1024 + var mask [maxCPUs / 8]byte var mib [_CTL_MAXNAME]uint32 // According to FreeBSD's /usr/src/sys/kern/kern_cpuset.c, @@ -99,21 +103,20 @@ func getncpu() int32 { return 1 } - size := maxcpus / _NBBY - ptrsize := uint32(unsafe.Sizeof(uintptr(0))) - if size < ptrsize { - size = ptrsize + maskSize := int(maxcpus+7) / 8 + if maskSize < sys.PtrSize { + maskSize = sys.PtrSize } - if size > _CPU_SETSIZE_MAX { - return 1 + if maskSize > len(mask) { + maskSize = len(mask) } if cpuset_getaffinity(_CPU_LEVEL_WHICH, _CPU_WHICH_PID, _CPU_CURRENT_PID, - int(size), (*byte)(unsafe.Pointer(&mask[0]))) != 0 { + maskSize, (*byte)(unsafe.Pointer(&mask[0]))) != 0 { return 1 } n := int32(0) - for _, v := range mask[:size] { + for _, v := range mask[:maskSize] { for v != 0 { n += int32(v & 1) v >>= 1 diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 78899737b6..98e7f52b9e 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -89,13 +89,13 @@ func getproccount() int32 { // buffers, but we don't have a dynamic memory allocator at the // moment, so that's a bit tricky and seems like overkill. const maxCPUs = 64 * 1024 - var buf [maxCPUs / (sys.PtrSize * 8)]uintptr + var buf [maxCPUs / 8]byte r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0]) if r < 0 { return 1 } n := int32(0) - for _, v := range buf[:r/sys.PtrSize] { + for _, v := range buf[:r] { for v != 0 { n += int32(v & 1) v >>= 1 @@ -193,6 +193,8 @@ const ( var procAuxv = []byte("/proc/self/auxv\x00") +func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 + func sysargs(argc int32, argv **byte) { n := argc + 1 @@ -206,45 +208,46 @@ func sysargs(argc int32, argv **byte) { // now argv+n is auxv auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize)) - if sysauxv(auxv[:]) == 0 { - // In some situations we don't get a loader-provided - // auxv, such as when loaded as a library on Android. - // Fall back to /proc/self/auxv. - fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0) - if fd < 0 { - // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to - // try using mincore to detect the physical page size. - // mincore should return EINVAL when address is not a multiple of system page size. - const size = 256 << 10 // size of memory region to allocate - p := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) - if uintptr(p) < 4096 { - return - } - var n uintptr - for n = 4 << 10; n < size; n <<= 1 { - err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0]) - if err == 0 { - physPageSize = n - break - } - } - if physPageSize == 0 { - physPageSize = size - } - munmap(p, size) + if sysauxv(auxv[:]) != 0 { + return + } + // In some situations we don't get a loader-provided + // auxv, such as when loaded as a library on Android. + // Fall back to /proc/self/auxv. + fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0) + if fd < 0 { + // On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to + // try using mincore to detect the physical page size. + // mincore should return EINVAL when address is not a multiple of system page size. + const size = 256 << 10 // size of memory region to allocate + p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) + if err != 0 { return } - var buf [128]uintptr - n := read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf))) - closefd(fd) - if n < 0 { - return + var n uintptr + for n = 4 << 10; n < size; n <<= 1 { + err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0]) + if err == 0 { + physPageSize = n + break + } + } + if physPageSize == 0 { + physPageSize = size } - // Make sure buf is terminated, even if we didn't read - // the whole file. - buf[len(buf)-2] = _AT_NULL - sysauxv(buf[:]) + munmap(p, size) + return + } + var buf [128]uintptr + n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf))) + closefd(fd) + if n < 0 { + return } + // Make sure buf is terminated, even if we didn't read + // the whole file. + buf[len(buf)-2] = _AT_NULL + sysauxv(buf[:]) } func sysauxv(auxv []uintptr) int { @@ -382,7 +385,7 @@ func raise(sig uint32) func raiseproc(sig uint32) //go:noescape -func sched_getaffinity(pid, len uintptr, buf *uintptr) int32 +func sched_getaffinity(pid, len uintptr, buf *byte) int32 func osyield() //go:nosplit diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go index 986a34135e..96827e7c9f 100644 --- a/src/runtime/os_linux_arm64.go +++ b/src/runtime/os_linux_arm64.go @@ -2,14 +2,22 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build arm64 + package runtime -const ( - _ARM64_FEATURE_HAS_CRC32 = 0x80 -) +// For go:linkname +import _ "unsafe" var randomNumber uint32 -var supportCRC32 bool + +// arm64 doesn't have a 'cpuid' instruction equivalent and relies on +// HWCAP/HWCAP2 bits for hardware capabilities. + +//go:linkname cpu_hwcap internal/cpu.arm64_hwcap +//go:linkname cpu_hwcap2 internal/cpu.arm64_hwcap2 +var cpu_hwcap uint +var cpu_hwcap2 uint func archauxv(tag, val uintptr) { switch tag { @@ -20,7 +28,9 @@ func archauxv(tag, val uintptr) { randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24 case _AT_HWCAP: - supportCRC32 = val&_ARM64_FEATURE_HAS_CRC32 != 0 + cpu_hwcap = uint(val) + case _AT_HWCAP2: + cpu_hwcap2 = uint(val) } } diff --git a/src/runtime/os_linux_noauxv.go b/src/runtime/os_linux_noauxv.go index 5e9f03120d..db6e5a0530 100644 --- a/src/runtime/os_linux_noauxv.go +++ b/src/runtime/os_linux_noauxv.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le +// +build !386,!amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le package runtime diff --git a/src/runtime/os_linux_ppc64x.go b/src/runtime/os_linux_ppc64x.go index b0da98b0bd..e37bfc453a 100644 --- a/src/runtime/os_linux_ppc64x.go +++ b/src/runtime/os_linux_ppc64x.go @@ -6,55 +6,22 @@ package runtime -import ( - "runtime/internal/sys" -) +// For go:linkname +import _ "unsafe" -const ( - // ISA level - // Go currently requires POWER5 as a minimum for ppc64, so we need - // to check for ISA 2.03 and beyond. - _PPC_FEATURE_POWER5_PLUS = 0x00020000 // ISA 2.03 (POWER5+) - _PPC_FEATURE_ARCH_2_05 = 0x00001000 // ISA 2.05 (POWER6) - _PPC_FEATURE_POWER6_EXT = 0x00000200 // mffgpr/mftgpr extension (POWER6x) - _PPC_FEATURE_ARCH_2_06 = 0x00000100 // ISA 2.06 (POWER7) - _PPC_FEATURE2_ARCH_2_07 = 0x80000000 // ISA 2.07 (POWER8) +// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on +// HWCAP/HWCAP2 bits for hardware capabilities. - // Standalone capabilities - _PPC_FEATURE_HAS_ALTIVEC = 0x10000000 // SIMD/Vector unit - _PPC_FEATURE_HAS_VSX = 0x00000080 // Vector scalar unit -) - -type facilities struct { - _ [sys.CacheLineSize]byte - isPOWER5x bool // ISA 2.03 - isPOWER6 bool // ISA 2.05 - isPOWER6x bool // ISA 2.05 + mffgpr/mftgpr extension - isPOWER7 bool // ISA 2.06 - isPOWER8 bool // ISA 2.07 - hasVMX bool // Vector unit - hasVSX bool // Vector scalar unit - _ [sys.CacheLineSize]byte -} - -// cpu can be tested at runtime in go assembler code to check for -// a certain ISA level or hardware capability, for example: -// ·cpu+facilities_hasVSX(SB) for checking the availability of VSX -// or -// ·cpu+facilities_isPOWER7(SB) for checking if the processor implements -// ISA 2.06 instructions. -var cpu facilities +//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap +//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2 +var cpu_hwcap uint +var cpu_hwcap2 uint func archauxv(tag, val uintptr) { switch tag { case _AT_HWCAP: - cpu.isPOWER5x = val&_PPC_FEATURE_POWER5_PLUS != 0 - cpu.isPOWER6 = val&_PPC_FEATURE_ARCH_2_05 != 0 - cpu.isPOWER6x = val&_PPC_FEATURE_POWER6_EXT != 0 - cpu.isPOWER7 = val&_PPC_FEATURE_ARCH_2_06 != 0 - cpu.hasVMX = val&_PPC_FEATURE_HAS_ALTIVEC != 0 - cpu.hasVSX = val&_PPC_FEATURE_HAS_VSX != 0 + cpu_hwcap = uint(val) case _AT_HWCAP2: - cpu.isPOWER8 = val&_PPC_FEATURE2_ARCH_2_07 != 0 + cpu_hwcap2 = uint(val) } } diff --git a/src/runtime/os_nacl.go b/src/runtime/os_nacl.go index 18e6ce6232..6830da4c4f 100644 --- a/src/runtime/os_nacl.go +++ b/src/runtime/os_nacl.go @@ -33,7 +33,7 @@ func nacl_thread_create(fn uintptr, stk, tls, xx unsafe.Pointer) int32 //go:noescape func nacl_nanosleep(ts, extra *timespec) int32 func nanotime() int64 -func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) func exit(code int32) func osyield() @@ -168,6 +168,9 @@ func newosproc(mp *m, stk unsafe.Pointer) { } } +//go:noescape +func exitThread(wait *uint32) + //go:nosplit func semacreate(mp *m) { if mp.waitsema != 0 { @@ -285,6 +288,9 @@ func sigenable(uint32) {} func sigignore(uint32) {} func closeonexec(int32) {} +// gsignalStack is unused on nacl. +type gsignalStack struct{} + var writelock uint32 // test-and-set spin lock for write /* diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go index c26c3c9550..3778969318 100644 --- a/src/runtime/os_netbsd.go +++ b/src/runtime/os_netbsd.go @@ -21,6 +21,9 @@ const ( _UC_SIGMASK = 0x01 _UC_CPU = 0x04 + // From <sys/lwp.h> + _LWP_DETACHED = 0x00000040 + _EAGAIN = 35 ) @@ -55,7 +58,7 @@ func getcontext(ctxt unsafe.Pointer) func lwp_create(ctxt unsafe.Pointer, flags uintptr, lwpid unsafe.Pointer) int32 //go:noescape -func lwp_park(abstime *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32 +func lwp_park(clockid, flags int32, ts *timespec, unpark int32, hint, unparkhint unsafe.Pointer) int32 //go:noescape func lwp_unpark(lwp int32, hint unsafe.Pointer) int32 @@ -73,6 +76,9 @@ const ( _CLOCK_VIRTUAL = 1 _CLOCK_PROF = 2 _CLOCK_MONOTONIC = 3 + + _TIMER_RELTIME = 0 + _TIMER_ABSTIME = 1 ) var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} @@ -116,10 +122,9 @@ func semasleep(ns int64) int32 { // Compute sleep deadline. var tsp *timespec + var ts timespec if ns >= 0 { - var ts timespec var nsec int32 - ns += nanotime() ts.set_sec(timediv(ns, 1000000000, &nsec)) ts.set_nsec(nsec) tsp = &ts @@ -135,9 +140,18 @@ func semasleep(ns int64) int32 { } // Sleep until unparked by semawakeup or timeout. - ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil) + ret := lwp_park(_CLOCK_MONOTONIC, _TIMER_RELTIME, tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil) if ret == _ETIMEDOUT { return -1 + } else if ret == _EINTR && ns >= 0 { + // Avoid sleeping forever if we keep getting + // interrupted (for example by the profiling + // timer). It would be if tsp upon return had the + // remaining time to sleep, but this is good enough. + var nsec int32 + ns /= 2 + ts.set_sec(timediv(ns, 1000000000, &nsec)) + ts.set_nsec(nsec) } } } @@ -182,7 +196,7 @@ func newosproc(mp *m, stk unsafe.Pointer) { lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp.g0, funcPC(netbsdMstart)) - ret := lwp_create(unsafe.Pointer(&uc), 0, unsafe.Pointer(&mp.procid)) + ret := lwp_create(unsafe.Pointer(&uc), _LWP_DETACHED, unsafe.Pointer(&mp.procid)) sigprocmask(_SIG_SETMASK, &oset, nil) if ret < 0 { print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n") diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go index 45e881aa41..32fdabb29f 100644 --- a/src/runtime/os_plan9.go +++ b/src/runtime/os_plan9.go @@ -393,7 +393,7 @@ func postnote(pid uint64, msg []byte) int { } //go:nosplit -func exit(e int) { +func exit(e int32) { var status []byte if e == 0 { status = emptystatus @@ -421,6 +421,12 @@ func newosproc(mp *m, stk unsafe.Pointer) { } } +func exitThread(wait *uint32) { + // We should never reach exitThread on Plan 9 because we let + // the OS clean up threads. + throw("exitThread") +} + //go:nosplit func semacreate(mp *m) { } diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 233cc165aa..025ff50a08 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -640,6 +640,9 @@ func newosproc(mp *m, stk unsafe.Pointer) { print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", getlasterror(), ")\n") throw("runtime.newosproc") } + + // Close thandle to avoid leaking the thread object if it exits. + stdcall1(_CloseHandle, thandle) } // Used by the C library build mode. On Linux this function would allocate a @@ -651,6 +654,12 @@ func newosproc0(mp *m, stk unsafe.Pointer) { newosproc(mp, stk) } +func exitThread(wait *uint32) { + // We should never reach exitThread on Windows because we let + // the OS clean up threads. + throw("exitThread") +} + // Called to initialize a new m (including the bootstrap m). // Called on the parent thread (main thread in case of bootstrap), can allocate memory. func mpreinit(mp *m) { @@ -701,7 +710,7 @@ func stdcall(fn stdFunction) uintptr { if mp.profilehz != 0 { // leave pc/sp for cpu profiler mp.libcallg.set(gp) - mp.libcallpc = getcallerpc(unsafe.Pointer(&fn)) + mp.libcallpc = getcallerpc() // sp must be the last, because once async cpu profiler finds // all three values to be non-zero, it will use them mp.libcallsp = getcallersp(unsafe.Pointer(&fn)) diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 43bfdd7a1e..6fa99d6493 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -83,7 +83,7 @@ func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn // Until the copy completes, we can only call nosplit routines. sp := getcallersp(unsafe.Pointer(&siz)) argp := uintptr(unsafe.Pointer(&fn)) + unsafe.Sizeof(fn) - callerpc := getcallerpc(unsafe.Pointer(&siz)) + callerpc := getcallerpc() d := newdefer(siz) if d._panic != nil { @@ -244,36 +244,47 @@ func freedefer(d *_defer) { freedeferfn() } sc := deferclass(uintptr(d.siz)) - if sc < uintptr(len(p{}.deferpool)) { - pp := getg().m.p.ptr() - if len(pp.deferpool[sc]) == cap(pp.deferpool[sc]) { - // Transfer half of local cache to the central cache. - // - // Take this slow path on the system stack so - // we don't grow freedefer's stack. - systemstack(func() { - var first, last *_defer - for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 { - n := len(pp.deferpool[sc]) - d := pp.deferpool[sc][n-1] - pp.deferpool[sc][n-1] = nil - pp.deferpool[sc] = pp.deferpool[sc][:n-1] - if first == nil { - first = d - } else { - last.link = d - } - last = d + if sc >= uintptr(len(p{}.deferpool)) { + return + } + pp := getg().m.p.ptr() + if len(pp.deferpool[sc]) == cap(pp.deferpool[sc]) { + // Transfer half of local cache to the central cache. + // + // Take this slow path on the system stack so + // we don't grow freedefer's stack. + systemstack(func() { + var first, last *_defer + for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 { + n := len(pp.deferpool[sc]) + d := pp.deferpool[sc][n-1] + pp.deferpool[sc][n-1] = nil + pp.deferpool[sc] = pp.deferpool[sc][:n-1] + if first == nil { + first = d + } else { + last.link = d } - lock(&sched.deferlock) - last.link = sched.deferpool[sc] - sched.deferpool[sc] = first - unlock(&sched.deferlock) - }) - } - *d = _defer{} - pp.deferpool[sc] = append(pp.deferpool[sc], d) + last = d + } + lock(&sched.deferlock) + last.link = sched.deferpool[sc] + sched.deferpool[sc] = first + unlock(&sched.deferlock) + }) } + + // These lines used to be simply `*d = _defer{}` but that + // started causing a nosplit stack overflow via typedmemmove. + d.siz = 0 + d.started = false + d.sp = 0 + d.pc = 0 + d.fn = nil + d._panic = nil + d.link = nil + + pp.deferpool[sc] = append(pp.deferpool[sc], d) } // Separate function so that it can split stack. @@ -336,7 +347,7 @@ func deferreturn(arg0 uintptr) { // Goexit terminates the goroutine that calls it. No other goroutine is affected. // Goexit runs all deferred calls before terminating the goroutine. Because Goexit -// is not panic, however, any recover calls in those deferred functions will return nil. +// is not a panic, any recover calls in those deferred functions will return nil. // // Calling Goexit from the main goroutine terminates that goroutine // without func main returning. Since func main has not returned, @@ -580,7 +591,7 @@ func startpanic() { //go:nosplit func dopanic(unused int) { - pc := getcallerpc(unsafe.Pointer(&unused)) + pc := getcallerpc() sp := getcallersp(unsafe.Pointer(&unused)) gp := getg() systemstack(func() { @@ -643,6 +654,12 @@ func recovery(gp *g) { gogo(&gp.sched) } +// startpanic_m implements unrecoverable panic. +// +// It can have write barriers because the write barrier explicitly +// ignores writes once dying > 0. +// +//go:yeswritebarrierrec func startpanic_m() { _g_ := getg() if mheap_.cachealloc.size == 0 { // very early @@ -679,7 +696,7 @@ func startpanic_m() { exit(4) fallthrough default: - // Can't even print! Just exit. + // Can't even print! Just exit. exit(5) } } diff --git a/src/runtime/plugin.go b/src/runtime/plugin.go index 682caacb21..5e05be71ec 100644 --- a/src/runtime/plugin.go +++ b/src/runtime/plugin.go @@ -7,22 +7,29 @@ package runtime import "unsafe" //go:linkname plugin_lastmoduleinit plugin.lastmoduleinit -func plugin_lastmoduleinit() (path string, syms map[string]interface{}, mismatchpkg string) { - md := firstmoduledata.next +func plugin_lastmoduleinit() (path string, syms map[string]interface{}, errstr string) { + var md *moduledata + for pmd := firstmoduledata.next; pmd != nil; pmd = pmd.next { + if pmd.bad { + md = nil // we only want the last module + continue + } + md = pmd + } if md == nil { throw("runtime: no plugin module data") } - for md.next != nil { - md = md.next + if md.pluginpath == "" { + throw("runtime: plugin has empty pluginpath") } if md.typemap != nil { - throw("runtime: plugin already initialized") + return "", nil, "plugin already loaded" } for _, pmd := range activeModules() { if pmd.pluginpath == md.pluginpath { - println("plugin: plugin", md.pluginpath, "already loaded") - throw("plugin: plugin already loaded") + md.bad = true + return "", nil, "plugin already loaded" } if inRange(pmd.text, pmd.etext, md.text, md.etext) || @@ -43,7 +50,8 @@ func plugin_lastmoduleinit() (path string, syms map[string]interface{}, mismatch } for _, pkghash := range md.pkghashes { if pkghash.linktimehash != *pkghash.runtimehash { - return "", nil, pkghash.modulename + md.bad = true + return "", nil, "plugin was built with a different version of package " + pkghash.modulename } } @@ -54,13 +62,11 @@ func plugin_lastmoduleinit() (path string, syms map[string]interface{}, mismatch pluginftabverify(md) moduledataverify1(md) - lock(&ifaceLock) + lock(&itabLock) for _, i := range md.itablinks { - if !i.inhash { - additab(i, true, false) - } + itabAdd(i) } - unlock(&ifaceLock) + unlock(&itabLock) // Build a map of symbol names to symbols. Here in the runtime // we fill out the first word of the interface, the type. We diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 21ea25ce36..d3382a5589 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -18,7 +18,7 @@ // To add equivalent profiling support to a standalone program, add // code like the following to your main function: // -// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`") +// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") // var memprofile = flag.String("memprofile", "", "write memory profile to `file`") // // func main() { @@ -319,7 +319,15 @@ func (p *Profile) WriteTo(w io.Writer, debug int) error { p.mu.Unlock() // Map order is non-deterministic; make output deterministic. - sort.Sort(stackProfile(all)) + sort.Slice(all, func(i, j int) bool { + t, u := all[i], all[j] + for k := 0; k < len(t) && k < len(u); k++ { + if t[k] != u[k] { + return t[k] < u[k] + } + } + return len(t) < len(u) + }) return printCountProfile(w, debug, p.name, stackProfile(all)) } @@ -328,16 +336,6 @@ type stackProfile [][]uintptr func (x stackProfile) Len() int { return len(x) } func (x stackProfile) Stack(i int) []uintptr { return x[i] } -func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x stackProfile) Less(i, j int) bool { - t, u := x[i], x[j] - for k := 0; k < len(t) && k < len(u); k++ { - if t[k] != u[k] { - return t[k] < u[k] - } - } - return len(t) < len(u) -} // A countProfile is a set of stack traces to be printed as counts // grouped by stack trace. There are multiple implementations: @@ -348,6 +346,41 @@ type countProfile interface { Stack(i int) []uintptr } +// printCountCycleProfile outputs block profile records (for block or mutex profiles) +// as the pprof-proto format output. Translations from cycle count to time duration +// are done because The proto expects count and time (nanoseconds) instead of count +// and the number of cycles for block, contention profiles. +func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error { + // Output profile in protobuf form. + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, countName, "count") + b.pb.int64Opt(tagProfile_Period, 1) + b.pbValueType(tagProfile_SampleType, countName, "count") + b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") + + cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 + + values := []int64{0, 0} + var locs []uint64 + for _, r := range records { + values[0] = int64(r.Count) + values[1] = int64(float64(r.Cycles) / cpuGHz) // to nanoseconds + locs = locs[:0] + for _, addr := range r.Stack() { + // For count profiles, all stack addresses are + // return PCs, which is what locForPC expects. + l := b.locForPC(addr) + if l == 0 { // runtime.goexit + continue + } + locs = append(locs, l) + } + b.pbSample(values, locs, nil) + } + b.build() + return nil +} + // printCountProfile prints a countProfile at the specified debug level. // The profile will be in compressed proto format unless debug is nonzero. func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { @@ -476,6 +509,14 @@ func countHeap() int { // writeHeap writes the current runtime heap profile to w. func writeHeap(w io.Writer, debug int) error { + var memStats *runtime.MemStats + if debug != 0 { + // Read mem stats first, so that our other allocations + // do not appear in the statistics. + memStats = new(runtime.MemStats) + runtime.ReadMemStats(memStats) + } + // Find out how many records there are (MemProfile(nil, true)), // allocate that many records, and get the data. // There's a race—more records might be added between @@ -538,8 +579,7 @@ func writeHeap(w io.Writer, debug int) error { // Print memstats information too. // Pprof will ignore, but useful for people - s := new(runtime.MemStats) - runtime.ReadMemStats(s) + s := memStats fmt.Fprintf(w, "\n# runtime.MemStats\n") fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) @@ -765,14 +805,14 @@ func writeBlock(w io.Writer, debug int) error { sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) - b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw + if debug <= 0 { + return printCountCycleProfile(w, "contentions", "delay", p) } + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + fmt.Fprintf(w, "--- contention:\n") fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) for i := range p { @@ -809,14 +849,14 @@ func writeMutex(w io.Writer, debug int) error { sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) - b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw + if debug <= 0 { + return printCountCycleProfile(w, "contentions", "delay", p) } + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + fmt.Fprintf(w, "--- mutex:\n") fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index 955964c721..96fcfc9703 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -26,16 +26,18 @@ import ( "time" ) -func cpuHogger(f func() int, dur time.Duration) { +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { // We only need to get one 100 Hz clock tick, so we've got // a large safety buffer. // But do at least 500 iterations (which should take about 100ms), // otherwise TestCPUProfileMultithreaded can fail if only one // thread is scheduled during the testing period. t0 := time.Now() + accum := *y for i := 0; i < 500 || time.Since(t0) < dur; i++ { - f() + accum = f(accum) } + *y = accum } var ( @@ -46,8 +48,8 @@ var ( // The actual CPU hogging function. // Must not call other functions nor access heap/globals in the loop, // otherwise under race detector the samples will be in the race runtime. -func cpuHog1() int { - foo := salt1 +func cpuHog1(x int) int { + foo := x for i := 0; i < 1e5; i++ { if foo > 0 { foo *= foo @@ -58,8 +60,8 @@ func cpuHog1() int { return foo } -func cpuHog2() int { - foo := salt2 +func cpuHog2(x int) int { + foo := x for i := 0; i < 1e5; i++ { if foo > 0 { foo *= foo @@ -72,7 +74,7 @@ func cpuHog2() int { func TestCPUProfile(t *testing.T) { testCPUProfile(t, []string{"runtime/pprof.cpuHog1"}, func(dur time.Duration) { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) }) } @@ -81,29 +83,29 @@ func TestCPUProfileMultithreaded(t *testing.T) { testCPUProfile(t, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, func(dur time.Duration) { c := make(chan int) go func() { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) c <- 1 }() - cpuHogger(cpuHog2, dur) + cpuHogger(cpuHog2, &salt2, dur) <-c }) } func TestCPUProfileInlining(t *testing.T) { testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) { - cpuHogger(inlinedCaller, dur) + cpuHogger(inlinedCaller, &salt1, dur) }) } -func inlinedCaller() int { - inlinedCallee() - return 0 +func inlinedCaller(x int) int { + x = inlinedCallee(x) + return x } -func inlinedCallee() { +func inlinedCallee(x int) int { // We could just use cpuHog1, but for loops prevent inlining // right now. :( - foo := salt1 + foo := x i := 0 loop: if foo > 0 { @@ -114,7 +116,7 @@ loop: if i++; i < 1e5 { goto loop } - salt1 = foo + return foo } func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { @@ -177,9 +179,9 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { } } - if badOS[runtime.GOOS] { + switch runtime.GOOS { + case "darwin", "dragonfly", "netbsd", "solaris": t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS) - return } // Ignore the failure if the tests are running in a QEMU-based emulator, // QEMU is not perfect at emulating everything. @@ -187,7 +189,6 @@ func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) { // IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605. if os.Getenv("IN_QEMU") == "1" { t.Skip("ignore the failure in QEMU; see golang.org/issue/9605") - return } t.FailNow() } @@ -394,59 +395,107 @@ func TestMathBigDivide(t *testing.T) { }) } -// Operating systems that are expected to fail the tests. See issue 13841. -var badOS = map[string]bool{ - "darwin": true, - "netbsd": true, - "plan9": true, - "dragonfly": true, - "solaris": true, -} - func TestBlockProfile(t *testing.T) { type TestCase struct { name string f func() + stk []string re string } tests := [...]TestCase{ - {"chan recv", blockChanRecv, ` + { + name: "chan recv", + f: blockChanRecv, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanRecv", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"chan send", blockChanSend, ` + { + name: "chan send", + f: blockChanSend, + stk: []string{ + "runtime.chansend1", + "runtime/pprof.blockChanSend", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"chan close", blockChanClose, ` + { + name: "chan close", + f: blockChanClose, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanClose", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*/src/runtime/chan.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"select recv async", blockSelectRecvAsync, ` + { + name: "select recv async", + f: blockSelectRecvAsync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectRecvAsync", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"select send sync", blockSelectSendSync, ` + { + name: "select send sync", + f: blockSelectSendSync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectSendSync", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*/src/runtime/select.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"mutex", blockMutex, ` + { + name: "mutex", + f: blockMutex, + stk: []string{ + "sync.(*Mutex).Lock", + "runtime/pprof.blockMutex", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*/src/sync/mutex\.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ `}, - {"cond", blockCond, ` + { + name: "cond", + f: blockCond, + stk: []string{ + "sync.(*Cond).Wait", + "runtime/pprof.blockCond", + "runtime/pprof.TestBlockProfile", + }, + re: ` [0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ # 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*/src/sync/cond\.go:[0-9]+ # 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+ @@ -454,28 +503,84 @@ func TestBlockProfile(t *testing.T) { `}, } + // Generate block profile runtime.SetBlockProfileRate(1) defer runtime.SetBlockProfileRate(0) for _, test := range tests { test.f() } - var w bytes.Buffer - Lookup("block").WriteTo(&w, 1) - prof := w.String() - if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") { - t.Fatalf("Bad profile header:\n%v", prof) - } + t.Run("debug=1", func(t *testing.T) { + var w bytes.Buffer + Lookup("block").WriteTo(&w, 1) + prof := w.String() - if strings.HasSuffix(prof, "#\t0x0\n\n") { - t.Errorf("Useless 0 suffix:\n%v", prof) + if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") { + t.Fatalf("Bad profile header:\n%v", prof) + } + + if strings.HasSuffix(prof, "#\t0x0\n\n") { + t.Errorf("Useless 0 suffix:\n%v", prof) + } + + for _, test := range tests { + if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) { + t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) + } + } + }) + + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("block").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, test := range tests { + if !containsStack(stks, test.stk) { + t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk) + } + } + }) + +} + +func stacks(p *profile.Profile) (res [][]string) { + for _, s := range p.Sample { + var stk []string + for _, l := range s.Location { + for _, line := range l.Line { + stk = append(stk, line.Function.Name) + } + } + res = append(res, stk) } + return res +} - for _, test := range tests { - if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) { - t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) +func containsStack(got [][]string, want []string) bool { + for _, stk := range got { + if len(stk) < len(want) { + continue + } + for i, f := range want { + if f != stk[i] { + break + } + if i == len(want)-1 { + return true + } } } + return false } const blockDelay = 10 * time.Millisecond @@ -567,6 +672,8 @@ func blockCond() { } func TestMutexProfile(t *testing.T) { + // Generate mutex profile + old := runtime.SetMutexProfileFraction(1) defer runtime.SetMutexProfileFraction(old) if old != 0 { @@ -575,31 +682,57 @@ func TestMutexProfile(t *testing.T) { blockMutex() - var w bytes.Buffer - Lookup("mutex").WriteTo(&w, 1) - prof := w.String() + t.Run("debug=1", func(t *testing.T) { + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 1) + prof := w.String() + t.Logf("received profile: %v", prof) - if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") { - t.Errorf("Bad profile header:\n%v", prof) - } - prof = strings.Trim(prof, "\n") - lines := strings.Split(prof, "\n") - if len(lines) != 6 { - t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof) - } - if len(lines) < 6 { - return - } - // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" - r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+` - //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" - if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { - t.Errorf("%q didn't match %q", lines[3], r2) - } - r3 := "^#.*runtime/pprof.blockMutex.*$" - if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok { - t.Errorf("%q didn't match %q", lines[5], r3) - } + if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") { + t.Errorf("Bad profile header:\n%v", prof) + } + prof = strings.Trim(prof, "\n") + lines := strings.Split(prof, "\n") + if len(lines) != 6 { + t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof) + } + if len(lines) < 6 { + return + } + // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" + r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+` + //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" + if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[3], r2) + } + r3 := "^#.*runtime/pprof.blockMutex.*$" + if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[5], r3) + } + t.Logf(prof) + }) + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, want := range [][]string{ + {"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"}, + } { + if !containsStack(stks, want) { + t.Errorf("No matching stack entry for %+v", want) + } + } + }) } func func1(c chan int) { <-c } @@ -712,7 +845,7 @@ func TestEmptyCallStack(t *testing.T) { func TestCPUProfileLabel(t *testing.T) { testCPUProfile(t, []string{"runtime/pprof.cpuHogger;key=value"}, func(dur time.Duration) { Do(context.Background(), Labels("key", "value"), func(context.Context) { - cpuHogger(cpuHog1, dur) + cpuHogger(cpuHog1, &salt1, dur) }) }) } @@ -725,14 +858,15 @@ func TestLabelRace(t *testing.T) { start := time.Now() var wg sync.WaitGroup for time.Since(start) < dur { + var salts [10]int for i := 0; i < 10; i++ { wg.Add(1) - go func() { + go func(j int) { Do(context.Background(), Labels("key", "value"), func(context.Context) { - cpuHogger(cpuHog1, time.Millisecond) + cpuHogger(cpuHog1, &salts[j], time.Millisecond) }) wg.Done() - }() + }(i) } wg.Wait() } diff --git a/src/runtime/print.go b/src/runtime/print.go index 8fa3d39905..a698fcb0e0 100644 --- a/src/runtime/print.go +++ b/src/runtime/print.go @@ -56,7 +56,7 @@ var debuglock mutex // The compiler emits calls to printlock and printunlock around // the multiple calls that implement a single Go print or println -// statement. Some of the print helpers (printsp, for example) +// statement. Some of the print helpers (printslice, for example) // call print recursively. There is also the problem of a crash // happening during the print routines and needing to acquire // the print lock to print information about the crash. @@ -98,31 +98,31 @@ func gwrite(b []byte) { } func printsp() { - print(" ") + printstring(" ") } func printnl() { - print("\n") + printstring("\n") } func printbool(v bool) { if v { - print("true") + printstring("true") } else { - print("false") + printstring("false") } } func printfloat(v float64) { switch { case v != v: - print("NaN") + printstring("NaN") return case v+v == v && v > 0: - print("+Inf") + printstring("+Inf") return case v+v == v && v < 0: - print("-Inf") + printstring("-Inf") return } @@ -204,7 +204,7 @@ func printuint(v uint64) { func printint(v int64) { if v < 0 { - print("-") + printstring("-") v = -v } printuint(uint64(v)) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 5787991f07..ff441badde 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -176,6 +176,9 @@ func main() { if _cgo_notify_runtime_init_done == nil { throw("_cgo_notify_runtime_init_done missing") } + // Start the template thread in case we enter Go from + // a C-created thread and need to create a new thread. + startTemplateThread() cgocall(_cgo_notify_runtime_init_done, nil) } @@ -252,9 +255,10 @@ func forcegchelper() { } } +//go:nosplit + // Gosched yields the processor, allowing other goroutines to run. It does not // suspend the current goroutine, so execution resumes automatically. -//go:nosplit func Gosched() { mcall(gosched_m) } @@ -342,8 +346,8 @@ func releaseSudog(s *sudog) { if s.elem != nil { throw("runtime: sudog with non-nil elem") } - if s.selectdone != nil { - throw("runtime: sudog with non-nil selectdone") + if s.isSelect { + throw("runtime: sudog with non-false isSelect") } if s.next != nil { throw("runtime: sudog with non-nil next") @@ -432,7 +436,7 @@ func badctxt() { func lockedOSThread() bool { gp := getg() - return gp.lockedm != nil && gp.m.lockedg != nil + return gp.lockedm != 0 && gp.m.lockedg != 0 } var ( @@ -498,13 +502,21 @@ func schedinit() { if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { procs = n } - if procs > _MaxGomaxprocs { - procs = _MaxGomaxprocs - } if procresize(procs) != nil { throw("unknown runnable goroutine during bootstrap") } + // For cgocheck > 1, we turn on the write barrier at all times + // and check all pointer writes. We can't do this until after + // procresize because the write barrier needs a P. + if debug.cgocheck > 1 { + writeBarrier.cgo = true + writeBarrier.enabled = true + for _, p := range allp { + p.wbBuf.reset() + } + } + if buildVersion == "" { // Condition should never trigger. This code just serves // to ensure runtime·buildVersion is kept in the resulting binary. @@ -520,7 +532,7 @@ func dumpgstatus(gp *g) { func checkmcount() { // sched lock is held - if sched.mcount > sched.maxmcount { + if mcount() > sched.maxmcount { print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") throw("thread exhaustion") } @@ -534,15 +546,20 @@ func mcommoninit(mp *m) { callers(1, mp.createstack[:]) } - mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks()) - if mp.fastrand == 0 { - mp.fastrand = 0x49f6428a - } - lock(&sched.lock) - mp.id = sched.mcount - sched.mcount++ + if sched.mnext+1 < sched.mnext { + throw("runtime: thread ID overflow") + } + mp.id = sched.mnext + sched.mnext++ checkmcount() + + mp.fastrand[0] = 1597334677 * uint32(mp.id) + mp.fastrand[1] = uint32(cputicks()) + if mp.fastrand[0]|mp.fastrand[1] == 0 { + mp.fastrand[1] = 1 + } + mpreinit(mp) if mp.gsignal != nil { mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard @@ -765,8 +782,10 @@ func casgstatus(gp *g, oldval, newval uint32) { // _Grunning or _Grunning|_Gscan; either way, // we own gp.gcscanvalid, so it's safe to read. // gp.gcscanvalid must not be true when we are running. - print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n") - throw("casgstatus") + systemstack(func() { + print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n") + throw("casgstatus") + }) } // See http://golang.org/cl/21503 for justification of the yield delay. @@ -951,7 +970,7 @@ func stopTheWorld(reason string) { // startTheWorld undoes the effects of stopTheWorld. func startTheWorld() { - systemstack(startTheWorldWithSema) + systemstack(func() { startTheWorldWithSema(false) }) // worldsema must be held over startTheWorldWithSema to ensure // gomaxprocs cannot change while worldsema is held. semrelease(&worldsema) @@ -1001,8 +1020,7 @@ func stopTheWorldWithSema() { _g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. sched.stopwait-- // try to retake all P's in Psyscall status - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { s := p.status if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) { if trace.enabled { @@ -1042,8 +1060,7 @@ func stopTheWorldWithSema() { if sched.stopwait != 0 { bad = "stopTheWorld: not stopped (stopwait != 0)" } else { - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { if p.status != _Pgcstop { bad = "stopTheWorld: not stopped (status != _Pgcstop)" } @@ -1067,12 +1084,14 @@ func mhelpgc() { _g_.m.helpgc = -1 } -func startTheWorldWithSema() { +func startTheWorldWithSema(emitTraceEvent bool) int64 { _g_ := getg() - _g_.m.locks++ // disable preemption because it can be holding p in a local var - gp := netpoll(false) // non-blocking - injectglist(gp) + _g_.m.locks++ // disable preemption because it can be holding p in a local var + if netpollinited() { + gp := netpoll(false) // non-blocking + injectglist(gp) + } add := needaddgcproc() lock(&sched.lock) @@ -1107,6 +1126,12 @@ func startTheWorldWithSema() { } } + // Capture start-the-world time before doing clean-up tasks. + startTime := nanotime() + if emitTraceEvent { + traceGCSTWDone() + } + // Wakeup an additional proc in case we have excessive runnable goroutines // in local queues or in the global queue. If we don't, the proc will park itself. // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. @@ -1128,14 +1153,25 @@ func startTheWorldWithSema() { if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack _g_.stackguard0 = stackPreempt } + + return startTime } // Called to start an M. +// +// This must not split the stack because we may not even have stack +// bounds set up yet. +// +// May run during STW (because it doesn't have a P yet), so write +// barriers are not allowed. +// //go:nosplit +//go:nowritebarrierrec func mstart() { _g_ := getg() - if _g_.stack.lo == 0 { + osStack := _g_.stack.lo == 0 + if osStack { // Initialize stack bounds from system stack. // Cgo may have left stack size in stack.hi. size := _g_.stack.hi @@ -1149,33 +1185,37 @@ func mstart() { // both Go and C functions with stack growth prologues. _g_.stackguard0 = _g_.stack.lo + _StackGuard _g_.stackguard1 = _g_.stackguard0 - mstart1() + mstart1(0) + + // Exit this thread. + if GOOS == "windows" || GOOS == "solaris" || GOOS == "plan9" { + // Window, Solaris and Plan 9 always system-allocate + // the stack, but put it in _g_.stack before mstart, + // so the logic above hasn't set osStack yet. + osStack = true + } + mexit(osStack) } -func mstart1() { +func mstart1(dummy int32) { _g_ := getg() if _g_ != _g_.m.g0 { throw("bad runtime·mstart") } - // Record top of stack for use by mcall. - // Once we call schedule we're never coming back, - // so other calls can reuse this stack space. - gosave(&_g_.m.g0.sched) - _g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used + // Record the caller for use as the top of stack in mcall and + // for terminating the thread. + // We're never coming back to mstart1 after we call schedule, + // so other calls can reuse the current frame. + save(getcallerpc(), getcallersp(unsafe.Pointer(&dummy))) asminit() minit() // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. if _g_.m == &m0 { - // Create an extra M for callbacks on threads not created by Go. - if iscgo && !cgoHasExtraM { - cgoHasExtraM = true - newextram() - } - initsig(false) + mstartm0() } if fn := _g_.m.mstartfn; fn != nil { @@ -1192,6 +1232,114 @@ func mstart1() { schedule() } +// mstartm0 implements part of mstart1 that only runs on the m0. +// +// Write barriers are allowed here because we know the GC can't be +// running yet, so they'll be no-ops. +// +//go:yeswritebarrierrec +func mstartm0() { + // Create an extra M for callbacks on threads not created by Go. + if iscgo && !cgoHasExtraM { + cgoHasExtraM = true + newextram() + } + initsig(false) +} + +// mexit tears down and exits the current thread. +// +// Don't call this directly to exit the thread, since it must run at +// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to +// unwind the stack to the point that exits the thread. +// +// It is entered with m.p != nil, so write barriers are allowed. It +// will release the P before exiting. +// +//go:yeswritebarrierrec +func mexit(osStack bool) { + g := getg() + m := g.m + + if m == &m0 { + // This is the main thread. Just wedge it. + // + // On Linux, exiting the main thread puts the process + // into a non-waitable zombie state. On Plan 9, + // exiting the main thread unblocks wait even though + // other threads are still running. On Solaris we can + // neither exitThread nor return from mstart. Other + // bad things probably happen on other platforms. + // + // We could try to clean up this M more before wedging + // it, but that complicates signal handling. + handoffp(releasep()) + lock(&sched.lock) + sched.nmfreed++ + checkdead() + unlock(&sched.lock) + notesleep(&m.park) + throw("locked m0 woke up") + } + + sigblock() + unminit() + + // Free the gsignal stack. + if m.gsignal != nil { + stackfree(m.gsignal.stack) + } + + // Remove m from allm. + lock(&sched.lock) + for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { + if *pprev == m { + *pprev = m.alllink + goto found + } + } + throw("m not found in allm") +found: + if !osStack { + // Delay reaping m until it's done with the stack. + // + // If this is using an OS stack, the OS will free it + // so there's no need for reaping. + atomic.Store(&m.freeWait, 1) + // Put m on the free list, though it will not be reaped until + // freeWait is 0. Note that the free list must not be linked + // through alllink because some functions walk allm without + // locking, so may be using alllink. + m.freelink = sched.freem + sched.freem = m + } + unlock(&sched.lock) + + // Release the P. + handoffp(releasep()) + // After this point we must not have write barriers. + + // Invoke the deadlock detector. This must happen after + // handoffp because it may have started a new M to take our + // P's work. + lock(&sched.lock) + sched.nmfreed++ + checkdead() + unlock(&sched.lock) + + if osStack { + // Return from mstart and let the system thread + // library free the g0 stack and terminate the thread. + return + } + + // mstart is the thread's entry point, so there's nothing to + // return to. Exit the thread directly. exitThread will clear + // m.freeWait when it's done with the stack and the m can be + // reaped. + exitThread(&m.freeWait) +} + // forEachP calls fn(p) for every P p when p reaches a GC safe point. // If a P is currently executing code, this will bring the P to a GC // safe point and execute fn on that P. If the P is not executing code @@ -1215,7 +1363,7 @@ func forEachP(fn func(*p)) { sched.safePointFn = fn // Ask all Ps to run the safe point function. - for _, p := range allp[:gomaxprocs] { + for _, p := range allp { if p != _p_ { atomic.Store(&p.runSafePointFn, 1) } @@ -1243,8 +1391,7 @@ func forEachP(fn func(*p)) { // Force Ps currently in _Psyscall into _Pidle and hand them // off to induce safe point function execution. - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { s := p.status if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) { if trace.enabled { @@ -1273,8 +1420,7 @@ func forEachP(fn func(*p)) { if sched.safePointWait != 0 { throw("forEachP: not done") } - for i := 0; i < int(gomaxprocs); i++ { - p := allp[i] + for _, p := range allp { if p.runSafePointFn != 0 { throw("forEachP: P did not run fn") } @@ -1339,6 +1485,27 @@ func allocm(_p_ *p, fn func()) *m { if _g_.m.p == 0 { acquirep(_p_) // temporarily borrow p for mallocs in this function } + + // Release the free M list. We need to do this somewhere and + // this may free up a stack we can use. + if sched.freem != nil { + lock(&sched.lock) + var newList *m + for freem := sched.freem; freem != nil; { + if freem.freeWait != 0 { + next := freem.freelink + freem.freelink = newList + newList = freem + freem = next + continue + } + stackfree(freem.g0.stack) + freem = freem.freelink + } + sched.freem = newList + unlock(&sched.lock) + } + mp := new(m) mp.mstartfn = fn mcommoninit(mp) @@ -1498,9 +1665,9 @@ func oneNewExtraM() { casgstatus(gp, _Gidle, _Gdead) gp.m = mp mp.curg = gp - mp.locked = _LockInternal - mp.lockedg = gp - gp.lockedm = mp + mp.lockedInt++ + mp.lockedg.set(gp) + gp.lockedm.set(mp) gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1)) if raceenabled { gp.racectx = racegostart(funcPC(newextram) + sys.PCQuantum) @@ -1629,6 +1796,27 @@ func unlockextra(mp *m) { // around exec'ing while creating/destroying threads. See issue #19546. var execLock rwmutex +// newmHandoff contains a list of m structures that need new OS threads. +// This is used by newm in situations where newm itself can't safely +// start an OS thread. +var newmHandoff struct { + lock mutex + + // newm points to a list of M structures that need new OS + // threads. The list is linked through m.schedlink. + newm muintptr + + // waiting indicates that wake needs to be notified when an m + // is put on the list. + waiting bool + wake note + + // haveTemplateThread indicates that the templateThread has + // been started. This is not protected by lock. Use cas to set + // to 1. + haveTemplateThread uint32 +} + // Create a new m. It will start off with a call to fn, or else the scheduler. // fn needs to be static and not a heap allocated closure. // May run with m.p==nil, so write barriers are not allowed. @@ -1637,6 +1825,35 @@ func newm(fn func(), _p_ *p) { mp := allocm(_p_, fn) mp.nextp.set(_p_) mp.sigmask = initSigmask + if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { + // We're on a locked M or a thread that may have been + // started by C. The kernel state of this thread may + // be strange (the user may have locked it for that + // purpose). We don't want to clone that into another + // thread. Instead, ask a known-good thread to create + // the thread for us. + // + // This is disabled on Plan 9. See golang.org/issue/22227. + // + // TODO: This may be unnecessary on Windows, which + // doesn't model thread creation off fork. + lock(&newmHandoff.lock) + if newmHandoff.haveTemplateThread == 0 { + throw("on a locked thread with no template thread") + } + mp.schedlink = newmHandoff.newm + newmHandoff.newm.set(mp) + if newmHandoff.waiting { + newmHandoff.waiting = false + notewakeup(&newmHandoff.wake) + } + unlock(&newmHandoff.lock) + return + } + newm1(mp) +} + +func newm1(mp *m) { if iscgo { var ts cgothreadstart if _cgo_thread_start == nil { @@ -1658,6 +1875,56 @@ func newm(fn func(), _p_ *p) { execLock.runlock() } +// startTemplateThread starts the template thread if it is not already +// running. +// +// The calling thread must itself be in a known-good state. +func startTemplateThread() { + if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { + return + } + newm(templateThread, nil) +} + +// tmeplateThread is a thread in a known-good state that exists solely +// to start new threads in known-good states when the calling thread +// may not be a a good state. +// +// Many programs never need this, so templateThread is started lazily +// when we first enter a state that might lead to running on a thread +// in an unknown state. +// +// templateThread runs on an M without a P, so it must not have write +// barriers. +// +//go:nowritebarrierrec +func templateThread() { + lock(&sched.lock) + sched.nmsys++ + checkdead() + unlock(&sched.lock) + + for { + lock(&newmHandoff.lock) + for newmHandoff.newm != 0 { + newm := newmHandoff.newm.ptr() + newmHandoff.newm = 0 + unlock(&newmHandoff.lock) + for newm != nil { + next := newm.schedlink.ptr() + newm.schedlink = 0 + newm1(newm) + newm = next + } + lock(&newmHandoff.lock) + } + newmHandoff.waiting = true + noteclear(&newmHandoff.wake) + unlock(&newmHandoff.lock) + notesleep(&newmHandoff.wake) + } +} + // Stops execution of the current m until new work is available. // Returns with acquired P. func stopm() { @@ -1680,7 +1947,9 @@ retry: notesleep(&_g_.m.park) noteclear(&_g_.m.park) if _g_.m.helpgc != 0 { + // helpgc() set _g_.m.p and _g_.m.mcache, so we have a P. gchelper() + // Undo the effects of helpgc(). _g_.m.helpgc = 0 _g_.m.mcache = nil _g_.m.p = 0 @@ -1814,7 +2083,7 @@ func wakep() { func stoplockedm() { _g_ := getg() - if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m { + if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m { throw("stoplockedm: inconsistent locking") } if _g_.m.p != 0 { @@ -1826,7 +2095,7 @@ func stoplockedm() { // Wait until another thread schedules lockedg again. notesleep(&_g_.m.park) noteclear(&_g_.m.park) - status := readgstatus(_g_.m.lockedg) + status := readgstatus(_g_.m.lockedg.ptr()) if status&^_Gscan != _Grunnable { print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") dumpgstatus(_g_) @@ -1842,7 +2111,7 @@ func stoplockedm() { func startlockedm(gp *g) { _g_ := getg() - mp := gp.lockedm + mp := gp.lockedm.ptr() if mp == _g_.m { throw("startlockedm: locked to me") } @@ -1968,11 +2237,12 @@ top: // Poll network. // This netpoll is only an optimization before we resort to stealing. - // We can safely skip it if there a thread blocked in netpoll already. - // If there is any kind of logical race with that blocked thread - // (e.g. it has already returned from netpoll, but does not set lastpoll yet), - // this thread will do blocking netpoll below anyway. - if netpollinited() && sched.lastpoll != 0 { + // We can safely skip it if there are no waiters or a thread is blocked + // in netpoll already. If there is any kind of logical race with that + // blocked thread (e.g. it has already returned from netpoll, but does + // not set lastpoll yet), this thread will do blocking netpoll below + // anyway. + if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 { if gp := netpoll(false); gp != nil { // non-blocking // netpoll returns list of goroutines linked by schedlink. injectglist(gp.schedlink.ptr()) @@ -2068,9 +2338,8 @@ stop: } // check all runqueues once again - for i := 0; i < int(gomaxprocs); i++ { - _p_ := allp[i] - if _p_ != nil && !runqempty(_p_) { + for _, _p_ := range allp { + if !runqempty(_p_) { lock(&sched.lock) _p_ = pidleget() unlock(&sched.lock) @@ -2209,9 +2478,15 @@ func schedule() { throw("schedule: holding locks") } - if _g_.m.lockedg != nil { + if _g_.m.lockedg != 0 { stoplockedm() - execute(_g_.m.lockedg, false) // Never returns. + execute(_g_.m.lockedg.ptr(), false) // Never returns. + } + + // We should not schedule away from a g that is executing a cgo call, + // since the cgo call is using the m's g0 stack. + if _g_.m.incgo { + throw("schedule: in cgo") } top: @@ -2262,7 +2537,7 @@ top: resetspinning() } - if gp.lockedm != nil { + if gp.lockedm != 0 { // Hands off own p to the locked m, // then blocks waiting for a new p. startlockedm(gp) @@ -2381,8 +2656,9 @@ func goexit0(gp *g) { atomic.Xadd(&sched.ngsys, -1) } gp.m = nil - gp.lockedm = nil - _g_.m.lockedg = nil + locked := gp.lockedm != 0 + gp.lockedm = 0 + _g_.m.lockedg = 0 gp.paniconfault = false gp._defer = nil // should be true already but just in case. gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. @@ -2392,17 +2668,37 @@ func goexit0(gp *g) { gp.labels = nil gp.timer = nil + if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { + // Flush assist credit to the global pool. This gives + // better information to pacing if the application is + // rapidly creating an exiting goroutines. + scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes)) + atomic.Xaddint64(&gcController.bgScanCredit, scanCredit) + gp.gcAssistBytes = 0 + } + // Note that gp's stack scan is now "valid" because it has no // stack. gp.gcscanvalid = true dropg() - if _g_.m.locked&^_LockExternal != 0 { - print("invalid m->locked = ", _g_.m.locked, "\n") + if _g_.m.lockedInt != 0 { + print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n") throw("internal lockOSThread error") } - _g_.m.locked = 0 + _g_.m.lockedExt = 0 gfput(_g_.m.p.ptr(), gp) + if locked { + // The goroutine may have locked this thread because + // it put it in an unusual kernel state. Kill it + // rather than returning it to the thread pool. + + // Return to mstart, which will release the P and exit + // the thread. + if GOOS != "plan9" { // See golang.org/issue/22227. + gogo(&_g_.m.g0.sched) + } + } schedule() } @@ -2532,7 +2828,7 @@ func reentersyscall(pc, sp uintptr) { // Standard syscall entry used by the go syscall library and normal cgo calls. //go:nosplit func entersyscall(dummy int32) { - reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) + reentersyscall(getcallerpc(), getcallersp(unsafe.Pointer(&dummy))) } func entersyscall_sysmon() { @@ -2575,7 +2871,7 @@ func entersyscallblock(dummy int32) { _g_.m.p.ptr().syscalltick++ // Leave SP around for GC and traceback. - pc := getcallerpc(unsafe.Pointer(&dummy)) + pc := getcallerpc() sp := getcallersp(unsafe.Pointer(&dummy)) save(pc, sp) _g_.syscallsp = _g_.sched.sp @@ -2600,7 +2896,7 @@ func entersyscallblock(dummy int32) { systemstack(entersyscallblock_handoff) // Resave for traceback during blocked call. - save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) + save(getcallerpc(), getcallersp(unsafe.Pointer(&dummy))) _g_.m.locks-- } @@ -2639,7 +2935,9 @@ func exitsyscall(dummy int32) { oldp := _g_.m.p.ptr() if exitsyscallfast() { if _g_.m.mcache == nil { - throw("lost mcache") + systemstack(func() { + throw("lost mcache") + }) } if trace.enabled { if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { @@ -2686,7 +2984,9 @@ func exitsyscall(dummy int32) { mcall(exitsyscall0) if _g_.m.mcache == nil { - throw("lost mcache") + systemstack(func() { + throw("lost mcache") + }) } // Scheduler returned, so we're allowed to run now. @@ -2810,7 +3110,7 @@ func exitsyscall0(gp *g) { acquirep(_p_) execute(gp, false) // Never returns. } - if _g_.m.lockedg != nil { + if _g_.m.lockedg != 0 { // Wait until another thread schedules gp and so m again. stoplockedm() execute(gp, false) // Never returns. @@ -2928,17 +3228,16 @@ func malg(stacksize int32) *g { //go:nosplit func newproc(siz int32, fn *funcval) { argp := add(unsafe.Pointer(&fn), sys.PtrSize) - pc := getcallerpc(unsafe.Pointer(&siz)) + pc := getcallerpc() systemstack(func() { - newproc1(fn, (*uint8)(argp), siz, 0, pc) + newproc1(fn, (*uint8)(argp), siz, pc) }) } // Create a new g running fn with narg bytes of arguments starting -// at argp and returning nret bytes of results. callerpc is the -// address of the go statement that created this. The new g is put -// on the queue of g's waiting to run. -func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g { +// at argp. callerpc is the address of the go statement that created +// this. The new g is put on the queue of g's waiting to run. +func newproc1(fn *funcval, argp *uint8, narg int32, callerpc uintptr) { _g_ := getg() if fn == nil { @@ -2946,7 +3245,7 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr throw("go of nil func value") } _g_.m.locks++ // disable preemption because it can be holding p in a local var - siz := narg + nret + siz := narg siz = (siz + 7) &^ 7 // We could allocate a larger initial stack if necessary. @@ -3041,7 +3340,6 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack _g_.stackguard0 = stackPreempt } - return newg } // Put on gfree list. @@ -3160,23 +3458,41 @@ func Breakpoint() { //go:nosplit func dolockOSThread() { _g_ := getg() - _g_.m.lockedg = _g_ - _g_.lockedm = _g_.m + _g_.m.lockedg.set(_g_) + _g_.lockedm.set(_g_.m) } //go:nosplit // LockOSThread wires the calling goroutine to its current operating system thread. -// Until the calling goroutine exits or calls UnlockOSThread, it will always -// execute in that thread, and no other goroutine can. +// The calling goroutine will always execute in that thread, +// and no other goroutine will execute in it, +// until the calling goroutine has made as many calls to +// UnlockOSThread as to LockOSThread. +// If the calling goroutine exits without unlocking the thread, +// the thread will be terminated. +// +// A goroutine should call LockOSThread before calling OS services or +// non-Go library functions that depend on per-thread state. func LockOSThread() { - getg().m.locked |= _LockExternal + if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { + // If we need to start a new thread from the locked + // thread, we need the template thread. Start it now + // while we're in a known-good state. + startTemplateThread() + } + _g_ := getg() + _g_.m.lockedExt++ + if _g_.m.lockedExt == 0 { + _g_.m.lockedExt-- + panic("LockOSThread nesting overflow") + } dolockOSThread() } //go:nosplit func lockOSThread() { - getg().m.locked += _LockInternal + getg().m.lockedInt++ dolockOSThread() } @@ -3186,29 +3502,43 @@ func lockOSThread() { //go:nosplit func dounlockOSThread() { _g_ := getg() - if _g_.m.locked != 0 { + if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 { return } - _g_.m.lockedg = nil - _g_.lockedm = nil + _g_.m.lockedg = 0 + _g_.lockedm = 0 } //go:nosplit -// UnlockOSThread unwires the calling goroutine from its fixed operating system thread. -// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op. +// UnlockOSThread undoes an earlier call to LockOSThread. +// If this drops the number of active LockOSThread calls on the +// calling goroutine to zero, it unwires the calling goroutine from +// its fixed operating system thread. +// If there are no active LockOSThread calls, this is a no-op. +// +// Before calling UnlockOSThread, the caller must ensure that the OS +// thread is suitable for running other goroutines. If the caller made +// any permanent changes to the state of the thread that would affect +// other goroutines, it should not call this function and thus leave +// the goroutine locked to the OS thread until the goroutine (and +// hence the thread) exits. func UnlockOSThread() { - getg().m.locked &^= _LockExternal + _g_ := getg() + if _g_.m.lockedExt == 0 { + return + } + _g_.m.lockedExt-- dounlockOSThread() } //go:nosplit func unlockOSThread() { _g_ := getg() - if _g_.m.locked < _LockInternal { + if _g_.m.lockedInt == 0 { systemstack(badunlockosthread) } - _g_.m.locked -= _LockInternal + _g_.m.lockedInt-- dounlockOSThread() } @@ -3218,10 +3548,7 @@ func badunlockosthread() { func gcount() int32 { n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys)) - for _, _p_ := range &allp { - if _p_ == nil { - break - } + for _, _p_ := range allp { n -= _p_.gfreecnt } @@ -3234,7 +3561,7 @@ func gcount() int32 { } func mcount() int32 { - return sched.mcount + return int32(sched.mnext - sched.nmfreed) } var prof struct { @@ -3516,7 +3843,7 @@ func setcpuprofilerate(hz int32) { // Returns list of Ps with local work, they need to be scheduled by the caller. func procresize(nprocs int32) *p { old := gomaxprocs - if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs { + if old < 0 || nprocs <= 0 { throw("procresize: invalid arg") } if trace.enabled { @@ -3530,6 +3857,23 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now + // Grow allp if necessary. + if nprocs > int32(len(allp)) { + // Synchronize with retake, which could be running + // concurrently since it doesn't run on a P. + lock(&allpLock) + if nprocs <= int32(cap(allp)) { + allp = allp[:nprocs] + } else { + nallp := make([]*p, nprocs) + // Copy everything up to allp's cap so we + // never lose old allocated Ps. + copy(nallp, allp[:cap(allp)]) + allp = nallp + } + unlock(&allpLock) + } + // initialize new P's for i := int32(0); i < nprocs; i++ { pp := allp[i] @@ -3541,6 +3885,7 @@ func procresize(nprocs int32) *p { for i := range pp.deferpool { pp.deferpool[i] = pp.deferpoolbuf[i][:0] } + pp.wbBuf.reset() atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) } if pp.mcache == nil { @@ -3566,13 +3911,11 @@ func procresize(nprocs int32) *p { // free unused P's for i := nprocs; i < old; i++ { p := allp[i] - if trace.enabled { - if p == getg().m.p.ptr() { - // moving to p[0], pretend that we were descheduled - // and then scheduled again to keep the trace sane. - traceGoSched() - traceProcStop(p) - } + if trace.enabled && p == getg().m.p.ptr() { + // moving to p[0], pretend that we were descheduled + // and then scheduled again to keep the trace sane. + traceGoSched() + traceProcStop(p) } // move all runnable goroutines to the global queue for p.runqhead != p.runqtail { @@ -3598,6 +3941,11 @@ func procresize(nprocs int32) *p { // world is stopped. p.gcBgMarkWorker.set(nil) } + // Flush p's write barrier buffer. + if gcphase != _GCoff { + wbBufFlush1(p) + p.gcw.dispose() + } for i := range p.sudogbuf { p.sudogbuf[i] = nil } @@ -3616,10 +3964,18 @@ func procresize(nprocs int32) *p { raceprocdestroy(p.racectx) p.racectx = 0 } + p.gcAssistTime = 0 p.status = _Pdead // can't free P itself because it can be referenced by an M in syscall } + // Trim allp. + if int32(len(allp)) != nprocs { + lock(&allpLock) + allp = allp[:nprocs] + unlock(&allpLock) + } + _g_ := getg() if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs { // continue to use the current P @@ -3691,7 +4047,7 @@ func acquirep1(_p_ *p) { throw("acquirep: already in go") } if _p_.m != 0 || _p_.status != _Pidle { - id := int32(0) + id := int64(0) if _p_.m != 0 { id = _p_.m.ptr().id } @@ -3736,6 +4092,7 @@ func incidlelocked(v int32) { // Check for deadlock situation. // The check is based on number of running M's, if 0 -> deadlock. +// sched.lock must be held. func checkdead() { // For -buildmode=c-shared or -buildmode=c-archive it's OK if // there are no running goroutines. The calling program is @@ -3752,13 +4109,12 @@ func checkdead() { return } - // -1 for sysmon - run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1 + run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys if run > 0 { return } if run < 0 { - print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n") + print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") throw("checkdead: inconsistent counts") } @@ -3821,6 +4177,11 @@ var forcegcperiod int64 = 2 * 60 * 1e9 // //go:nowritebarrierrec func sysmon() { + lock(&sched.lock) + sched.nmsys++ + checkdead() + unlock(&sched.lock) + // If a heap span goes unused for 5 minutes after a garbage collection, // we hand it back to the operating system. scavengelimit := int64(5 * 60 * 1e9) @@ -3860,15 +4221,11 @@ func sysmon() { } shouldRelax := true if osRelaxMinNS > 0 { - lock(&timers.lock) - if timers.sleeping { - now := nanotime() - next := timers.sleepUntil - if next-now < osRelaxMinNS { - shouldRelax = false - } + next := timeSleepUntil() + now := nanotime() + if next-now < osRelaxMinNS { + shouldRelax = false } - unlock(&timers.lock) } if shouldRelax { osRelax(true) @@ -3892,7 +4249,7 @@ func sysmon() { // poll network if not polled for more than 10ms lastpoll := int64(atomic.Load64(&sched.lastpoll)) now := nanotime() - if lastpoll != 0 && lastpoll+10*1000*1000 < now { + if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now)) gp := netpoll(false) // non-blocking - returns list of goroutines if gp != nil { @@ -3949,9 +4306,17 @@ const forcePreemptNS = 10 * 1000 * 1000 // 10ms func retake(now int64) uint32 { n := 0 - for i := int32(0); i < gomaxprocs; i++ { + // Prevent allp slice changes. This lock will be completely + // uncontended unless we're already stopping the world. + lock(&allpLock) + // We can't use a range loop over allp because we may + // temporarily drop the allpLock. Hence, we need to re-fetch + // allp each time around the loop. + for i := 0; i < len(allp); i++ { _p_ := allp[i] if _p_ == nil { + // This can happen if procresize has grown + // allp but not yet created new Ps. continue } pd := &_p_.sysmontick @@ -3970,6 +4335,8 @@ func retake(now int64) uint32 { if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now { continue } + // Drop allpLock so we can take sched.lock. + unlock(&allpLock) // Need to decrement number of idle locked M's // (pretending that one more is running) before the CAS. // Otherwise the M from which we retake can exit the syscall, @@ -3985,6 +4352,7 @@ func retake(now int64) uint32 { handoffp(_p_) } incidlelocked(1) + lock(&allpLock) } else if s == _Prunning { // Preempt G if it's running for too long. t := int64(_p_.schedtick) @@ -3999,6 +4367,7 @@ func retake(now int64) uint32 { preemptone(_p_) } } + unlock(&allpLock) return uint32(n) } @@ -4009,9 +4378,8 @@ func retake(now int64) uint32 { // Returns true if preemption request was issued to at least one goroutine. func preemptall() bool { res := false - for i := int32(0); i < gomaxprocs; i++ { - _p_ := allp[i] - if _p_ == nil || _p_.status != _Prunning { + for _, _p_ := range allp { + if _p_.status != _Prunning { continue } if preemptone(_p_) { @@ -4060,23 +4428,19 @@ func schedtrace(detailed bool) { } lock(&sched.lock) - print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) + print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) if detailed { print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n") } // We must be careful while reading data from P's, M's and G's. // Even if we hold schedlock, most data can be changed concurrently. // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. - for i := int32(0); i < gomaxprocs; i++ { - _p_ := allp[i] - if _p_ == nil { - continue - } + for i, _p_ := range allp { mp := _p_.m.ptr() h := atomic.Load(&_p_.runqhead) t := atomic.Load(&_p_.runqtail) if detailed { - id := int32(-1) + id := int64(-1) if mp != nil { id = mp.id } @@ -4089,7 +4453,7 @@ func schedtrace(detailed bool) { print("[") } print(t - h) - if i == gomaxprocs-1 { + if i == len(allp)-1 { print("]\n") } } @@ -4103,7 +4467,7 @@ func schedtrace(detailed bool) { for mp := allm; mp != nil; mp = mp.alllink { _p_ := mp.p.ptr() gp := mp.curg - lockedg := mp.lockedg + lockedg := mp.lockedg.ptr() id1 := int32(-1) if _p_ != nil { id1 = _p_.id @@ -4123,12 +4487,12 @@ func schedtrace(detailed bool) { for gi := 0; gi < len(allgs); gi++ { gp := allgs[gi] mp := gp.m - lockedm := gp.lockedm - id1 := int32(-1) + lockedm := gp.lockedm.ptr() + id1 := int64(-1) if mp != nil { id1 = mp.id } - id2 := int32(-1) + id2 := int64(-1) if lockedm != nil { id2 = lockedm.id } @@ -4410,22 +4774,25 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool if stealRunNextG { // Try to steal from _p_.runnext. if next := _p_.runnext; next != 0 { - // Sleep to ensure that _p_ isn't about to run the g we - // are about to steal. - // The important use case here is when the g running on _p_ - // ready()s another g and then almost immediately blocks. - // Instead of stealing runnext in this window, back off - // to give _p_ a chance to schedule runnext. This will avoid - // thrashing gs between different Ps. - // A sync chan send/recv takes ~50ns as of time of writing, - // so 3us gives ~50x overshoot. - if GOOS != "windows" { - usleep(3) - } else { - // On windows system timer granularity is 1-15ms, - // which is way too much for this optimization. - // So just yield. - osyield() + if _p_.status == _Prunning { + // Sleep to ensure that _p_ isn't about to run the g + // we are about to steal. + // The important use case here is when the g running + // on _p_ ready()s another g and then almost + // immediately blocks. Instead of stealing runnext + // in this window, back off to give _p_ a chance to + // schedule runnext. This will avoid thrashing gs + // between different Ps. + // A sync chan send/recv takes ~50ns as of time of + // writing, so 3us gives ~50x overshoot. + if GOOS != "windows" { + usleep(3) + } else { + // On windows system timer granularity is + // 1-15ms, which is way too much for this + // optimization. So just yield. + osyield() + } } if !_p_.runnext.cas(next, 0) { continue diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index 90a6cab874..2ece829071 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -655,6 +655,116 @@ func BenchmarkClosureCall(b *testing.B) { _ = sum } +func benchmarkWakeupParallel(b *testing.B, spin func(time.Duration)) { + if runtime.GOMAXPROCS(0) == 1 { + b.Skip("skipping: GOMAXPROCS=1") + } + + wakeDelay := 5 * time.Microsecond + for _, delay := range []time.Duration{ + 0, + 1 * time.Microsecond, + 2 * time.Microsecond, + 5 * time.Microsecond, + 10 * time.Microsecond, + 20 * time.Microsecond, + 50 * time.Microsecond, + 100 * time.Microsecond, + } { + b.Run(delay.String(), func(b *testing.B) { + if b.N == 0 { + return + } + // Start two goroutines, which alternate between being + // sender and receiver in the following protocol: + // + // - The receiver spins for `delay` and then does a + // blocking receive on a channel. + // + // - The sender spins for `delay+wakeDelay` and then + // sends to the same channel. (The addition of + // `wakeDelay` improves the probability that the + // receiver will be blocking when the send occurs when + // the goroutines execute in parallel.) + // + // In each iteration of the benchmark, each goroutine + // acts once as sender and once as receiver, so each + // goroutine spins for delay twice. + // + // BenchmarkWakeupParallel is used to estimate how + // efficiently the scheduler parallelizes goroutines in + // the presence of blocking: + // + // - If both goroutines are executed on the same core, + // an increase in delay by N will increase the time per + // iteration by 4*N, because all 4 delays are + // serialized. + // + // - Otherwise, an increase in delay by N will increase + // the time per iteration by 2*N, and the time per + // iteration is 2 * (runtime overhead + chan + // send/receive pair + delay + wakeDelay). This allows + // the runtime overhead, including the time it takes + // for the unblocked goroutine to be scheduled, to be + // estimated. + ping, pong := make(chan struct{}), make(chan struct{}) + start := make(chan struct{}) + done := make(chan struct{}) + go func() { + <-start + for i := 0; i < b.N; i++ { + // sender + spin(delay + wakeDelay) + ping <- struct{}{} + // receiver + spin(delay) + <-pong + } + done <- struct{}{} + }() + go func() { + for i := 0; i < b.N; i++ { + // receiver + spin(delay) + <-ping + // sender + spin(delay + wakeDelay) + pong <- struct{}{} + } + done <- struct{}{} + }() + b.ResetTimer() + start <- struct{}{} + <-done + <-done + }) + } +} + +func BenchmarkWakeupParallelSpinning(b *testing.B) { + benchmarkWakeupParallel(b, func(d time.Duration) { + end := time.Now().Add(d) + for time.Now().Before(end) { + // do nothing + } + }) +} + +// sysNanosleep is defined by OS-specific files (such as runtime_linux_test.go) +// to sleep for the given duration. If nil, dependent tests are skipped. +// The implementation should invoke a blocking system call and not +// call time.Sleep, which would deschedule the goroutine. +var sysNanosleep func(d time.Duration) + +func BenchmarkWakeupParallelSyscall(b *testing.B) { + if sysNanosleep == nil { + b.Skipf("skipping on %v; sysNanosleep not defined", runtime.GOOS) + } + benchmarkWakeupParallel(b, func(d time.Duration) { + sysNanosleep(d) + }) +} + type Matrix [][]float64 func BenchmarkMatmult(b *testing.B) { @@ -722,3 +832,44 @@ func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, thres func TestStealOrder(t *testing.T) { runtime.RunStealOrderTest() } + +func TestLockOSThreadNesting(t *testing.T) { + go func() { + e, i := runtime.LockOSCounts() + if e != 0 || i != 0 { + t.Errorf("want locked counts 0, 0; got %d, %d", e, i) + return + } + runtime.LockOSThread() + runtime.LockOSThread() + runtime.UnlockOSThread() + e, i = runtime.LockOSCounts() + if e != 1 || i != 0 { + t.Errorf("want locked counts 1, 0; got %d, %d", e, i) + return + } + runtime.UnlockOSThread() + e, i = runtime.LockOSCounts() + if e != 0 || i != 0 { + t.Errorf("want locked counts 0, 0; got %d, %d", e, i) + return + } + }() +} + +func TestLockOSThreadExit(t *testing.T) { + testLockOSThreadExit(t, "testprog") +} + +func testLockOSThreadExit(t *testing.T, prog string) { + output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1") + want := "OK\n" + if output != want { + t.Errorf("want %s, got %s\n", want, output) + } + + output = runTestProg(t, prog, "LockOSThreadAlt") + if output != want { + t.Errorf("want %s, got %s\n", want, output) + } +} diff --git a/src/runtime/race.go b/src/runtime/race.go index 49495cc783..2f5713d30e 100644 --- a/src/runtime/race.go +++ b/src/runtime/race.go @@ -4,14 +4,14 @@ // +build race -// Public race detection API, present iff build with -race. - package runtime import ( "unsafe" ) +// Public race detection API, present iff build with -race. + func RaceRead(addr unsafe.Pointer) func RaceWrite(addr unsafe.Pointer) func RaceReadRange(addr unsafe.Pointer, len int) @@ -23,7 +23,69 @@ func RaceErrors() int { return int(n) } -// private interface for the runtime +//go:nosplit + +// RaceAcquire/RaceRelease/RaceReleaseMerge establish happens-before relations +// between goroutines. These inform the race detector about actual synchronization +// that it can't see for some reason (e.g. synchronization within RaceDisable/RaceEnable +// sections of code). +// RaceAcquire establishes a happens-before relation with the preceding +// RaceReleaseMerge on addr up to and including the last RaceRelease on addr. +// In terms of the C memory model (C11 §5.1.2.4, §7.17.3), +// RaceAcquire is equivalent to atomic_load(memory_order_acquire). +func RaceAcquire(addr unsafe.Pointer) { + raceacquire(addr) +} + +//go:nosplit + +// RaceRelease performs a release operation on addr that +// can synchronize with a later RaceAcquire on addr. +// +// In terms of the C memory model, RaceRelease is equivalent to +// atomic_store(memory_order_release). +func RaceRelease(addr unsafe.Pointer) { + racerelease(addr) +} + +//go:nosplit + +// RaceReleaseMerge is like RaceRelease, but also establishes a happens-before +// relation with the preceding RaceRelease or RaceReleaseMerge on addr. +// +// In terms of the C memory model, RaceReleaseMerge is equivalent to +// atomic_exchange(memory_order_release). +func RaceReleaseMerge(addr unsafe.Pointer) { + racereleasemerge(addr) +} + +//go:nosplit + +// RaceDisable disables handling of race synchronization events in the current goroutine. +// Handling is re-enabled with RaceEnable. RaceDisable/RaceEnable can be nested. +// Non-synchronization events (memory accesses, function entry/exit) still affect +// the race detector. +func RaceDisable() { + _g_ := getg() + if _g_.raceignore == 0 { + racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0) + } + _g_.raceignore++ +} + +//go:nosplit + +// RaceEnable re-enables handling of race events in the current goroutine. +func RaceEnable() { + _g_ := getg() + _g_.raceignore-- + if _g_.raceignore == 0 { + racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0) + } +} + +// Private interface for the runtime. + const raceenabled = true // For all functions accepting callerpc and pc, @@ -433,43 +495,3 @@ func racereleasemergeg(gp *g, addr unsafe.Pointer) { func racefingo() { racecall(&__tsan_finalizer_goroutine, getg().racectx, 0, 0, 0) } - -//go:nosplit - -func RaceAcquire(addr unsafe.Pointer) { - raceacquire(addr) -} - -//go:nosplit - -func RaceRelease(addr unsafe.Pointer) { - racerelease(addr) -} - -//go:nosplit - -func RaceReleaseMerge(addr unsafe.Pointer) { - racereleasemerge(addr) -} - -//go:nosplit - -// RaceDisable disables handling of race events in the current goroutine. -func RaceDisable() { - _g_ := getg() - if _g_.raceignore == 0 { - racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0) - } - _g_.raceignore++ -} - -//go:nosplit - -// RaceEnable re-enables handling of race events in the current goroutine. -func RaceEnable() { - _g_ := getg() - _g_.raceignore-- - if _g_.raceignore == 0 { - racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0) - } -} diff --git a/src/runtime/race/output_test.go b/src/runtime/race/output_test.go index 13dfc33b47..adf9ce8851 100644 --- a/src/runtime/race/output_test.go +++ b/src/runtime/race/output_test.go @@ -19,6 +19,16 @@ import ( ) func TestOutput(t *testing.T) { + pkgdir, err := ioutil.TempDir("", "go-build-race-output") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(pkgdir) + out, err := exec.Command(testenv.GoToolPath(t), "install", "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", "testing").CombinedOutput() + if err != nil { + t.Fatalf("go install -race: %v\n%s", err, out) + } + for _, test := range tests { if test.goos != "" && test.goos != runtime.GOOS { t.Logf("test %v runs only on %v, skipping: ", test.name, test.goos) @@ -47,7 +57,7 @@ func TestOutput(t *testing.T) { t.Fatalf("failed to close file: %v", err) } // Pass -l to the compiler to test stack traces. - cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-gcflags=-l", src) + cmd := exec.Command(testenv.GoToolPath(t), test.run, "-race", "-pkgdir="+pkgdir, "-gcflags=all=-l", src) // GODEBUG spoils program output, GOMAXPROCS makes it flaky. for _, env := range os.Environ() { if strings.HasPrefix(env, "GODEBUG=") || @@ -175,6 +185,7 @@ import "testing" func TestFail(t *testing.T) { done := make(chan bool) x := 0 + _ = x go func() { x = 42 done <- true @@ -186,7 +197,7 @@ func TestFail(t *testing.T) { `, ` ================== --- FAIL: TestFail \(0...s\) -.*main_test.go:13: true +.*main_test.go:14: true .*testing.go:.*: race detected during execution of test FAIL`}, @@ -253,7 +264,7 @@ Previous write at 0x[0-9,a-f]+ by goroutine [0-9]: main\.goCallback\(\) .*/main\.go:27 \+0x[0-9,a-f]+ main._cgoexpwrap_[0-9a-z]+_goCallback\(\) - .*/_cgo_gotypes\.go:[0-9]+ \+0x[0-9,a-f]+ + .*_cgo_gotypes\.go:[0-9]+ \+0x[0-9,a-f]+ Goroutine [0-9] \(running\) created at: runtime\.newextram\(\) @@ -265,6 +276,7 @@ import "testing" func TestFail(t *testing.T) { done := make(chan bool) x := 0 + _ = x go func() { x = 42 done <- true diff --git a/src/runtime/race/testdata/atomic_test.go b/src/runtime/race/testdata/atomic_test.go index 232744b3dd..769c8d7398 100644 --- a/src/runtime/race/testdata/atomic_test.go +++ b/src/runtime/race/testdata/atomic_test.go @@ -14,6 +14,7 @@ import ( func TestNoRaceAtomicAddInt64(t *testing.T) { var x1, x2 int8 + _ = x1 + x2 var s int64 ch := make(chan bool, 2) go func() { @@ -36,6 +37,7 @@ func TestNoRaceAtomicAddInt64(t *testing.T) { func TestRaceAtomicAddInt64(t *testing.T) { var x1, x2 int8 + _ = x1 + x2 var s int64 ch := make(chan bool, 2) go func() { @@ -58,6 +60,7 @@ func TestRaceAtomicAddInt64(t *testing.T) { func TestNoRaceAtomicAddInt32(t *testing.T) { var x1, x2 int8 + _ = x1 + x2 var s int32 ch := make(chan bool, 2) go func() { @@ -80,6 +83,7 @@ func TestNoRaceAtomicAddInt32(t *testing.T) { func TestNoRaceAtomicLoadAddInt32(t *testing.T) { var x int64 + _ = x var s int32 go func() { x = 2 @@ -93,6 +97,7 @@ func TestNoRaceAtomicLoadAddInt32(t *testing.T) { func TestNoRaceAtomicLoadStoreInt32(t *testing.T) { var x int64 + _ = x var s int32 go func() { x = 2 @@ -106,6 +111,7 @@ func TestNoRaceAtomicLoadStoreInt32(t *testing.T) { func TestNoRaceAtomicStoreCASInt32(t *testing.T) { var x int64 + _ = x var s int32 go func() { x = 2 @@ -119,6 +125,7 @@ func TestNoRaceAtomicStoreCASInt32(t *testing.T) { func TestNoRaceAtomicCASLoadInt32(t *testing.T) { var x int64 + _ = x var s int32 go func() { x = 2 @@ -134,6 +141,7 @@ func TestNoRaceAtomicCASLoadInt32(t *testing.T) { func TestNoRaceAtomicCASCASInt32(t *testing.T) { var x int64 + _ = x var s int32 go func() { x = 2 @@ -149,6 +157,7 @@ func TestNoRaceAtomicCASCASInt32(t *testing.T) { func TestNoRaceAtomicCASCASInt32_2(t *testing.T) { var x1, x2 int8 + _ = x1 + x2 var s int32 ch := make(chan bool, 2) go func() { @@ -171,6 +180,7 @@ func TestNoRaceAtomicCASCASInt32_2(t *testing.T) { func TestNoRaceAtomicLoadInt64(t *testing.T) { var x int32 + _ = x var s int64 go func() { x = 2 @@ -184,6 +194,7 @@ func TestNoRaceAtomicLoadInt64(t *testing.T) { func TestNoRaceAtomicCASCASUInt64(t *testing.T) { var x int64 + _ = x var s uint64 go func() { x = 2 @@ -199,6 +210,7 @@ func TestNoRaceAtomicCASCASUInt64(t *testing.T) { func TestNoRaceAtomicLoadStorePointer(t *testing.T) { var x int64 + _ = x var s unsafe.Pointer var y int = 2 var p unsafe.Pointer = unsafe.Pointer(&y) @@ -214,6 +226,7 @@ func TestNoRaceAtomicLoadStorePointer(t *testing.T) { func TestNoRaceAtomicStoreCASUint64(t *testing.T) { var x int64 + _ = x var s uint64 go func() { x = 2 diff --git a/src/runtime/race/testdata/chan_test.go b/src/runtime/race/testdata/chan_test.go index 449191639e..7f349c42ed 100644 --- a/src/runtime/race/testdata/chan_test.go +++ b/src/runtime/race/testdata/chan_test.go @@ -12,6 +12,7 @@ import ( func TestNoRaceChanSync(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { v = 1 @@ -23,6 +24,7 @@ func TestNoRaceChanSync(t *testing.T) { func TestNoRaceChanSyncRev(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { c <- 0 @@ -34,6 +36,7 @@ func TestNoRaceChanSyncRev(t *testing.T) { func TestNoRaceChanAsync(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -45,6 +48,7 @@ func TestNoRaceChanAsync(t *testing.T) { func TestRaceChanAsyncRev(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { c <- 0 @@ -56,6 +60,7 @@ func TestRaceChanAsyncRev(t *testing.T) { func TestNoRaceChanAsyncCloseRecv(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -72,6 +77,7 @@ func TestNoRaceChanAsyncCloseRecv(t *testing.T) { func TestNoRaceChanAsyncCloseRecv2(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -83,6 +89,7 @@ func TestNoRaceChanAsyncCloseRecv2(t *testing.T) { func TestNoRaceChanAsyncCloseRecv3(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -95,6 +102,7 @@ func TestNoRaceChanAsyncCloseRecv3(t *testing.T) { func TestNoRaceChanSyncCloseRecv(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { v = 1 @@ -111,6 +119,7 @@ func TestNoRaceChanSyncCloseRecv(t *testing.T) { func TestNoRaceChanSyncCloseRecv2(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { v = 1 @@ -122,6 +131,7 @@ func TestNoRaceChanSyncCloseRecv2(t *testing.T) { func TestNoRaceChanSyncCloseRecv3(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { v = 1 @@ -134,6 +144,7 @@ func TestNoRaceChanSyncCloseRecv3(t *testing.T) { func TestRaceChanSyncCloseSend(t *testing.T) { v := 0 + _ = v c := make(chan int) go func() { v = 1 @@ -150,6 +161,7 @@ func TestRaceChanSyncCloseSend(t *testing.T) { func TestRaceChanAsyncCloseSend(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -170,6 +182,7 @@ func TestRaceChanCloseClose(t *testing.T) { compl := make(chan bool, 2) v1 := 0 v2 := 0 + _ = v1 + v2 c := make(chan int) go func() { defer func() { @@ -197,6 +210,7 @@ func TestRaceChanCloseClose(t *testing.T) { func TestRaceChanSendLen(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) go func() { v = 1 @@ -210,6 +224,7 @@ func TestRaceChanSendLen(t *testing.T) { func TestRaceChanRecvLen(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) c <- 1 go func() { @@ -226,6 +241,7 @@ func TestRaceChanSendSend(t *testing.T) { compl := make(chan bool, 2) v1 := 0 v2 := 0 + _ = v1 + v2 c := make(chan int, 1) go func() { v1 = 1 @@ -264,6 +280,7 @@ func TestNoRaceChanPtr(t *testing.T) { func TestRaceChanWrongSend(t *testing.T) { v1 := 0 v2 := 0 + _ = v1 + v2 c := make(chan int, 2) go func() { v1 = 1 @@ -284,6 +301,7 @@ func TestRaceChanWrongSend(t *testing.T) { func TestRaceChanWrongClose(t *testing.T) { v1 := 0 v2 := 0 + _ = v1 + v2 c := make(chan int, 1) done := make(chan bool) go func() { @@ -561,6 +579,7 @@ func TestRaceChanItselfCap(t *testing.T) { func TestRaceChanCloseLen(t *testing.T) { v := 0 + _ = v c := make(chan int, 10) c <- 0 go func() { @@ -587,6 +606,7 @@ func TestNoRaceChanMutex(t *testing.T) { done := make(chan struct{}) mtx := make(chan struct{}, 1) data := 0 + _ = data go func() { mtx <- struct{}{} data = 42 @@ -604,6 +624,7 @@ func TestNoRaceSelectMutex(t *testing.T) { mtx := make(chan struct{}, 1) aux := make(chan bool) data := 0 + _ = data go func() { select { case mtx <- struct{}{}: @@ -632,6 +653,7 @@ func TestRaceChanSem(t *testing.T) { done := make(chan struct{}) mtx := make(chan bool, 2) data := 0 + _ = data go func() { mtx <- true data = 42 diff --git a/src/runtime/race/testdata/finalizer_test.go b/src/runtime/race/testdata/finalizer_test.go index 222cbf67a8..3ac33d2b59 100644 --- a/src/runtime/race/testdata/finalizer_test.go +++ b/src/runtime/race/testdata/finalizer_test.go @@ -53,6 +53,7 @@ func TestNoRaceFinGlobal(t *testing.T) { func TestRaceFin(t *testing.T) { c := make(chan bool) y := 0 + _ = y go func() { x := new(string) runtime.SetFinalizer(x, func(x *string) { diff --git a/src/runtime/race/testdata/map_test.go b/src/runtime/race/testdata/map_test.go index a8d8148d0e..88e735ecd3 100644 --- a/src/runtime/race/testdata/map_test.go +++ b/src/runtime/race/testdata/map_test.go @@ -130,6 +130,7 @@ func TestRaceMapLenDelete(t *testing.T) { func TestRaceMapVariable(t *testing.T) { ch := make(chan bool, 1) m := make(map[int]int) + _ = m go func() { m = make(map[int]int) ch <- true @@ -230,6 +231,7 @@ func TestRaceMapAssignMultipleReturn(t *testing.T) { conns[1] = []int{0} ch := make(chan bool, 1) var err error + _ = err go func() { conns[1][0], err = connect() ch <- true diff --git a/src/runtime/race/testdata/mop_test.go b/src/runtime/race/testdata/mop_test.go index 560a762315..b60cabfe86 100644 --- a/src/runtime/race/testdata/mop_test.go +++ b/src/runtime/race/testdata/mop_test.go @@ -60,6 +60,7 @@ func TestRaceIntRWGlobalFuncs(t *testing.T) { func TestRaceIntRWClosures(t *testing.T) { var x, y int + _ = y ch := make(chan int, 2) go func() { @@ -76,6 +77,7 @@ func TestRaceIntRWClosures(t *testing.T) { func TestNoRaceIntRWClosures(t *testing.T) { var x, y int + _ = y ch := make(chan int, 1) go func() { @@ -93,6 +95,7 @@ func TestNoRaceIntRWClosures(t *testing.T) { func TestRaceInt32RWClosures(t *testing.T) { var x, y int32 + _ = y ch := make(chan bool, 2) go func() { @@ -168,6 +171,7 @@ func TestRaceCaseCondition2(t *testing.T) { func TestRaceCaseBody(t *testing.T) { var x, y int + _ = y ch := make(chan int, 2) go func() { @@ -189,6 +193,7 @@ func TestRaceCaseBody(t *testing.T) { func TestNoRaceCaseFallthrough(t *testing.T) { var x, y, z int + _ = y ch := make(chan int, 2) z = 1 @@ -210,6 +215,7 @@ func TestNoRaceCaseFallthrough(t *testing.T) { func TestRaceCaseFallthrough(t *testing.T) { var x, y, z int + _ = y ch := make(chan int, 2) z = 1 @@ -323,6 +329,7 @@ func TestRaceRange(t *testing.T) { const N = 2 var a [N]int var x, y int + _ = x + y done := make(chan bool, N) for i, v := range a { go func(i int) { @@ -433,6 +440,7 @@ func TestNoRaceForIncr(t *testing.T) { func TestRacePlus(t *testing.T) { var x, y, z int + _ = y ch := make(chan int, 2) go func() { @@ -449,6 +457,7 @@ func TestRacePlus(t *testing.T) { func TestRacePlus2(t *testing.T) { var x, y, z int + _ = y ch := make(chan int, 2) go func() { @@ -465,6 +474,7 @@ func TestRacePlus2(t *testing.T) { func TestNoRacePlus(t *testing.T) { var x, y, z, f int + _ = x + y + f ch := make(chan int, 2) go func() { @@ -481,6 +491,7 @@ func TestNoRacePlus(t *testing.T) { func TestRaceComplement(t *testing.T) { var x, y, z int + _ = x ch := make(chan int, 2) go func() { @@ -497,6 +508,7 @@ func TestRaceComplement(t *testing.T) { func TestRaceDiv(t *testing.T) { var x, y, z int + _ = x ch := make(chan int, 2) go func() { @@ -513,6 +525,7 @@ func TestRaceDiv(t *testing.T) { func TestRaceDivConst(t *testing.T) { var x, y, z uint32 + _ = x ch := make(chan int, 2) go func() { @@ -529,6 +542,7 @@ func TestRaceDivConst(t *testing.T) { func TestRaceMod(t *testing.T) { var x, y, z int + _ = x ch := make(chan int, 2) go func() { @@ -545,6 +559,7 @@ func TestRaceMod(t *testing.T) { func TestRaceModConst(t *testing.T) { var x, y, z int + _ = x ch := make(chan int, 2) go func() { @@ -561,6 +576,7 @@ func TestRaceModConst(t *testing.T) { func TestRaceRotate(t *testing.T) { var x, y, z uint32 + _ = x ch := make(chan int, 2) go func() { @@ -932,6 +948,7 @@ func TestRaceFuncVariableRW(t *testing.T) { func TestRaceFuncVariableWW(t *testing.T) { var f func(x int) int + _ = f ch := make(chan bool, 1) go func() { f = func(x int) int { @@ -948,6 +965,7 @@ func TestRaceFuncVariableWW(t *testing.T) { // This one should not belong to mop_test func TestRacePanic(t *testing.T) { var x int + _ = x var zero int = 0 ch := make(chan bool, 2) go func() { @@ -1284,6 +1302,7 @@ func TestNoRaceFuncUnlock(t *testing.T) { ch := make(chan bool, 1) var mu sync.Mutex x := 0 + _ = x go func() { mu.Lock() x = 42 @@ -1812,6 +1831,7 @@ func TestNoRaceAsFunc4(t *testing.T) { c := make(chan bool, 1) var mu sync.Mutex x := 0 + _ = x go func() { x = func() int { // Write of x must be under the mutex. mu.Lock() @@ -2042,6 +2062,7 @@ func TestNoRaceTinyAlloc(t *testing.T) { const P = 4 const N = 1e6 var tinySink *byte + _ = tinySink done := make(chan bool) for p := 0; p < P; p++ { go func() { diff --git a/src/runtime/race/testdata/mutex_test.go b/src/runtime/race/testdata/mutex_test.go index 3cf03ae6b8..cbed2d370c 100644 --- a/src/runtime/race/testdata/mutex_test.go +++ b/src/runtime/race/testdata/mutex_test.go @@ -13,6 +13,7 @@ import ( func TestNoRaceMutex(t *testing.T) { var mu sync.Mutex var x int16 = 0 + _ = x ch := make(chan bool, 2) go func() { mu.Lock() @@ -33,6 +34,7 @@ func TestNoRaceMutex(t *testing.T) { func TestRaceMutex(t *testing.T) { var mu sync.Mutex var x int16 = 0 + _ = x ch := make(chan bool, 2) go func() { x = 1 @@ -54,6 +56,7 @@ func TestRaceMutex2(t *testing.T) { var mu1 sync.Mutex var mu2 sync.Mutex var x int8 = 0 + _ = x ch := make(chan bool, 2) go func() { mu1.Lock() @@ -74,6 +77,7 @@ func TestRaceMutex2(t *testing.T) { func TestNoRaceMutexPureHappensBefore(t *testing.T) { var mu sync.Mutex var x int16 = 0 + _ = x ch := make(chan bool, 2) go func() { x = 1 @@ -96,6 +100,7 @@ func TestNoRaceMutexSemaphore(t *testing.T) { var mu sync.Mutex ch := make(chan bool, 2) x := 0 + _ = x mu.Lock() go func() { x = 1 diff --git a/src/runtime/race/testdata/rwmutex_test.go b/src/runtime/race/testdata/rwmutex_test.go index 7ac829d759..39219e58ae 100644 --- a/src/runtime/race/testdata/rwmutex_test.go +++ b/src/runtime/race/testdata/rwmutex_test.go @@ -14,6 +14,7 @@ func TestRaceMutexRWMutex(t *testing.T) { var mu1 sync.Mutex var mu2 sync.RWMutex var x int16 = 0 + _ = x ch := make(chan bool, 2) go func() { mu1.Lock() @@ -34,6 +35,7 @@ func TestRaceMutexRWMutex(t *testing.T) { func TestNoRaceRWMutex(t *testing.T) { var mu sync.RWMutex var x, y int64 = 0, 1 + _ = y ch := make(chan bool, 2) go func() { mu.Lock() diff --git a/src/runtime/race/testdata/select_test.go b/src/runtime/race/testdata/select_test.go index 9969f47e8e..3827867687 100644 --- a/src/runtime/race/testdata/select_test.go +++ b/src/runtime/race/testdata/select_test.go @@ -11,6 +11,7 @@ import ( func TestNoRaceSelect1(t *testing.T) { var x int + _ = x compl := make(chan bool) c := make(chan bool) c1 := make(chan bool) @@ -36,6 +37,7 @@ func TestNoRaceSelect1(t *testing.T) { func TestNoRaceSelect2(t *testing.T) { var x int + _ = x compl := make(chan bool) c := make(chan bool) c1 := make(chan bool) @@ -55,6 +57,7 @@ func TestNoRaceSelect2(t *testing.T) { func TestNoRaceSelect3(t *testing.T) { var x int + _ = x compl := make(chan bool) c := make(chan bool, 10) c1 := make(chan bool) @@ -112,6 +115,7 @@ func TestNoRaceSelect4(t *testing.T) { func TestNoRaceSelect5(t *testing.T) { test := func(sel, needSched bool) { var x int + _ = x ch := make(chan bool) c1 := make(chan bool) @@ -158,6 +162,7 @@ func TestNoRaceSelect5(t *testing.T) { func TestRaceSelect1(t *testing.T) { var x int + _ = x compl := make(chan bool, 2) c := make(chan bool) c1 := make(chan bool) @@ -182,6 +187,7 @@ func TestRaceSelect1(t *testing.T) { func TestRaceSelect2(t *testing.T) { var x int + _ = x compl := make(chan bool) c := make(chan bool) c1 := make(chan bool) @@ -200,6 +206,7 @@ func TestRaceSelect2(t *testing.T) { func TestRaceSelect3(t *testing.T) { var x int + _ = x compl := make(chan bool) c := make(chan bool) c1 := make(chan bool) diff --git a/src/runtime/race/testdata/sync_test.go b/src/runtime/race/testdata/sync_test.go index d48680d5e6..2b2d95d76b 100644 --- a/src/runtime/race/testdata/sync_test.go +++ b/src/runtime/race/testdata/sync_test.go @@ -12,6 +12,7 @@ import ( func TestNoRaceCond(t *testing.T) { x := 0 + _ = x condition := 0 var mu sync.Mutex cond := sync.NewCond(&mu) @@ -35,6 +36,7 @@ func TestRaceCond(t *testing.T) { var mu sync.Mutex cond := sync.NewCond(&mu) x := 0 + _ = x condition := 0 go func() { time.Sleep(10 * time.Millisecond) // Enter cond.Wait loop @@ -67,6 +69,7 @@ func TestRaceAnnounceThreads(t *testing.T) { allDone := make(chan bool, N) var x int + _ = x var f, g, h func() f = func() { @@ -133,6 +136,7 @@ func TestNoRaceAfterFunc2(t *testing.T) { func TestNoRaceAfterFunc3(t *testing.T) { c := make(chan bool, 1) x := 0 + _ = x time.AfterFunc(1e7, func() { x = 1 c <- true @@ -143,6 +147,7 @@ func TestNoRaceAfterFunc3(t *testing.T) { func TestRaceAfterFunc3(t *testing.T) { c := make(chan bool, 2) x := 0 + _ = x time.AfterFunc(1e7, func() { x = 1 c <- true @@ -161,6 +166,7 @@ func TestRaceAfterFunc3(t *testing.T) { // comprehensible. func TestRaceGoroutineCreationStack(t *testing.T) { var x int + _ = x var ch = make(chan bool, 1) f1 := func() { diff --git a/src/runtime/race/testdata/waitgroup_test.go b/src/runtime/race/testdata/waitgroup_test.go index ff152b0abe..169337315b 100644 --- a/src/runtime/race/testdata/waitgroup_test.go +++ b/src/runtime/race/testdata/waitgroup_test.go @@ -13,6 +13,7 @@ import ( func TestNoRaceWaitGroup(t *testing.T) { var x int + _ = x var wg sync.WaitGroup n := 1 for i := 0; i < n; i++ { @@ -28,6 +29,7 @@ func TestNoRaceWaitGroup(t *testing.T) { func TestRaceWaitGroup(t *testing.T) { var x int + _ = x var wg sync.WaitGroup n := 2 for i := 0; i < n; i++ { @@ -43,6 +45,7 @@ func TestRaceWaitGroup(t *testing.T) { func TestNoRaceWaitGroup2(t *testing.T) { var x int + _ = x var wg sync.WaitGroup wg.Add(1) go func() { @@ -56,6 +59,7 @@ func TestNoRaceWaitGroup2(t *testing.T) { // incrementing counter in Add and locking wg's mutex func TestRaceWaitGroupAsMutex(t *testing.T) { var x int + _ = x var wg sync.WaitGroup c := make(chan bool, 2) go func() { @@ -82,6 +86,7 @@ func TestRaceWaitGroupAsMutex(t *testing.T) { func TestRaceWaitGroupWrongWait(t *testing.T) { c := make(chan bool, 2) var x int + _ = x var wg sync.WaitGroup go func() { wg.Add(1) @@ -187,6 +192,7 @@ func TestNoRaceWaitGroupMultipleWait3(t *testing.T) { // Correct usage but still a race func TestRaceWaitGroup2(t *testing.T) { var x int + _ = x var wg sync.WaitGroup wg.Add(2) go func() { @@ -202,6 +208,7 @@ func TestRaceWaitGroup2(t *testing.T) { func TestNoRaceWaitGroupPanicRecover(t *testing.T) { var x int + _ = x var wg sync.WaitGroup defer func() { err := recover() @@ -219,6 +226,7 @@ func TestNoRaceWaitGroupPanicRecover(t *testing.T) { // Is it possible to get a race by synchronization via panic? func TestNoRaceWaitGroupPanicRecover2(t *testing.T) { var x int + _ = x var wg sync.WaitGroup ch := make(chan bool, 1) var f func() = func() { diff --git a/src/runtime/rt0_android_386.s b/src/runtime/rt0_android_386.s index 9d20fc8f89..eabdf81235 100644 --- a/src/runtime/rt0_android_386.s +++ b/src/runtime/rt0_android_386.s @@ -4,21 +4,13 @@ #include "textflag.h" -TEXT _rt0_386_android(SB),NOSPLIT,$8 - MOVL 8(SP), AX // argc - LEAL 12(SP), BX // argv - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_android(SB),NOSPLIT,$0 + JMP _rt0_386(SB) TEXT _rt0_386_android_lib(SB),NOSPLIT,$0 PUSHL $_rt0_386_android_argv(SB) // argv PUSHL $1 // argc - CALL _rt0_386_linux_lib(SB) - POPL AX - POPL AX - RET + JMP _rt0_386_lib(SB) DATA _rt0_386_android_argv+0x00(SB)/4,$_rt0_386_android_argv0(SB) DATA _rt0_386_android_argv+0x04(SB)/4,$0 // argv terminate diff --git a/src/runtime/rt0_android_amd64.s b/src/runtime/rt0_android_amd64.s index 6420c9f35d..6bda3bfcc1 100644 --- a/src/runtime/rt0_android_amd64.s +++ b/src/runtime/rt0_android_amd64.s @@ -5,16 +5,12 @@ #include "textflag.h" TEXT _rt0_amd64_android(SB),NOSPLIT,$-8 - MOVQ 0(SP), DI // argc - LEAQ 8(SP), SI // argv - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) TEXT _rt0_amd64_android_lib(SB),NOSPLIT,$0 MOVQ $1, DI // argc MOVQ $_rt0_amd64_android_argv(SB), SI // argv - MOVQ $_rt0_amd64_linux_lib(SB), AX - JMP AX + JMP _rt0_amd64_lib(SB) DATA _rt0_amd64_android_argv+0x00(SB)/8,$_rt0_amd64_android_argv0(SB) DATA _rt0_amd64_android_argv+0x08(SB)/8,$0 // end argv diff --git a/src/runtime/rt0_android_arm.s b/src/runtime/rt0_android_arm.s index 189e290e35..1246238be0 100644 --- a/src/runtime/rt0_android_arm.s +++ b/src/runtime/rt0_android_arm.s @@ -10,13 +10,10 @@ TEXT _rt0_arm_android(SB),NOSPLIT,$-4 MOVW $_rt0_arm_linux1(SB), R4 B (R4) -// When building with -buildmode=c-shared, this symbol is called when the shared -// library is loaded. TEXT _rt0_arm_android_lib(SB),NOSPLIT,$0 MOVW $1, R0 // argc MOVW $_rt0_arm_android_argv(SB), R1 // **argv - BL _rt0_arm_linux_lib(SB) - RET + B _rt0_arm_lib(SB) DATA _rt0_arm_android_argv+0x00(SB)/4,$_rt0_arm_android_argv0(SB) DATA _rt0_arm_android_argv+0x04(SB)/4,$0 // end argv diff --git a/src/runtime/rt0_darwin_386.s b/src/runtime/rt0_darwin_386.s index 6b404db3a4..a8d3a796d4 100644 --- a/src/runtime/rt0_darwin_386.s +++ b/src/runtime/rt0_darwin_386.s @@ -4,72 +4,14 @@ #include "textflag.h" -TEXT _rt0_386_darwin(SB),NOSPLIT,$8 - MOVL 8(SP), AX - LEAL 12(SP), BX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_darwin(SB),NOSPLIT,$0 + JMP _rt0_386(SB) -// With -buildmode=c-archive, this symbol is called from a global constructor. TEXT _rt0_386_darwin_lib(SB),NOSPLIT,$0 - PUSHL BP - MOVL SP, BP - PUSHL BX - PUSHL SI - PUSHL DI - - MOVL 8(BP), AX - MOVL AX, _rt0_386_darwin_lib_argc<>(SB) - MOVL 12(BP), AX - MOVL AX, _rt0_386_darwin_lib_argv<>(SB) - - // Synchronous initialization. - MOVL $runtime·libpreinit(SB), AX - CALL AX - - SUBL $12, SP - - // Create a new thread to do the runtime initialization and return. - MOVL _cgo_sys_thread_create(SB), AX - TESTL AX, AX - JZ nocgo - MOVL $_rt0_386_darwin_lib_go(SB), BX - MOVL BX, 0(SP) - MOVL $0, 4(SP) - CALL AX - JMP restore - -nocgo: - MOVL $0x800000, 0(SP) // stacksize = 8192KB - MOVL $_rt0_386_darwin_lib_go(SB), AX - MOVL AX, 4(SP) // fn - MOVL $0, 8(SP) // fnarg - MOVL $runtime·newosproc0(SB), AX - CALL AX - -restore: - ADDL $12, SP - POPL DI - POPL SI - POPL BX - POPL BP - RET - -TEXT _rt0_386_darwin_lib_go(SB),NOSPLIT,$12 - MOVL _rt0_386_darwin_lib_argc<>(SB), AX - MOVL AX, 0(SP) - MOVL _rt0_386_darwin_lib_argv<>(SB), AX - MOVL AX, 4(SP) - MOVL $runtime·rt0_go(SB), AX - CALL AX - RET - -DATA _rt0_386_darwin_lib_argc<>(SB)/4, $0 -GLOBL _rt0_386_darwin_lib_argc<>(SB),NOPTR, $4 -DATA _rt0_386_darwin_lib_argv<>(SB)/4, $0 -GLOBL _rt0_386_darwin_lib_argv<>(SB),NOPTR, $4 + JMP _rt0_386_lib(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_darwin_amd64.s b/src/runtime/rt0_darwin_amd64.s index 655e77a86b..ed804d47c5 100644 --- a/src/runtime/rt0_darwin_amd64.s +++ b/src/runtime/rt0_darwin_amd64.s @@ -5,75 +5,9 @@ #include "textflag.h" TEXT _rt0_amd64_darwin(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) // When linking with -shared, this symbol is called when the shared library // is loaded. -TEXT _rt0_amd64_darwin_lib(SB),NOSPLIT,$0x58 - // Align stack. We don't know whether Go is adding a frame pointer here or not. - MOVQ SP, R8 - SUBQ $16, R8 - ANDQ $~15, R8 - XCHGQ SP, R8 - - MOVQ R8, 0x48(SP) - MOVQ BX, 0x18(SP) - MOVQ BP, 0x20(SP) - MOVQ R12, 0x28(SP) - MOVQ R13, 0x30(SP) - MOVQ R14, 0x38(SP) - MOVQ R15, 0x40(SP) - - MOVQ DI, _rt0_amd64_darwin_lib_argc<>(SB) - MOVQ SI, _rt0_amd64_darwin_lib_argv<>(SB) - - // Synchronous initialization. - MOVQ $runtime·libpreinit(SB), AX - CALL AX - - // Create a new thread to do the runtime initialization and return. - MOVQ _cgo_sys_thread_create(SB), AX - TESTQ AX, AX - JZ nocgo - MOVQ $_rt0_amd64_darwin_lib_go(SB), DI - MOVQ $0, SI - CALL AX - JMP restore - -nocgo: - MOVQ $8388608, 0(SP) // stacksize - MOVQ $_rt0_amd64_darwin_lib_go(SB), AX - MOVQ AX, 8(SP) // fn - MOVQ $0, 16(SP) // fnarg - MOVQ $runtime·newosproc0(SB), AX - CALL AX - -restore: - MOVQ 0x18(SP), BX - MOVQ 0x20(SP), BP - MOVQ 0x28(SP), R12 - MOVQ 0x30(SP), R13 - MOVQ 0x38(SP), R14 - MOVQ 0x40(SP), R15 - - MOVQ 0x48(SP), R8 - MOVQ R8, SP - RET - -TEXT _rt0_amd64_darwin_lib_go(SB),NOSPLIT,$0 - MOVQ _rt0_amd64_darwin_lib_argc<>(SB), DI - MOVQ _rt0_amd64_darwin_lib_argv<>(SB), SI - MOVQ $runtime·rt0_go(SB), AX - JMP AX - -DATA _rt0_amd64_darwin_lib_argc<>(SB)/8, $0 -GLOBL _rt0_amd64_darwin_lib_argc<>(SB),NOPTR, $8 -DATA _rt0_amd64_darwin_lib_argv<>(SB)/8, $0 -GLOBL _rt0_amd64_darwin_lib_argv<>(SB),NOPTR, $8 - -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_darwin_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_darwin_arm.s b/src/runtime/rt0_darwin_arm.s index 526d88f13d..71fbe5f68a 100644 --- a/src/runtime/rt0_darwin_arm.s +++ b/src/runtime/rt0_darwin_arm.s @@ -4,94 +4,8 @@ #include "textflag.h" -TEXT _rt0_arm_darwin(SB),7,$-4 - // prepare arguments for main (_rt0_go) - MOVW (R13), R0 // argc - MOVW $4(R13), R1 // argv - MOVW $main(SB), R4 - B (R4) +TEXT _rt0_arm_darwin(SB),7,$0 + B _rt0_asm(SB) -// When linking with -buildmode=c-archive or -buildmode=c-shared, -// this symbol is called from a global initialization function. -// -// Note that all currently shipping darwin/arm platforms require -// cgo and do not support c-shared. -TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$104 - // Preserve callee-save registers. - MOVW R4, 12(R13) - MOVW R5, 16(R13) - MOVW R6, 20(R13) - MOVW R7, 24(R13) - MOVW R8, 28(R13) - MOVW R11, 32(R13) - - MOVD F8, (32+8*1)(R13) - MOVD F9, (32+8*2)(R13) - MOVD F10, (32+8*3)(R13) - MOVD F11, (32+8*4)(R13) - MOVD F12, (32+8*5)(R13) - MOVD F13, (32+8*6)(R13) - MOVD F14, (32+8*7)(R13) - MOVD F15, (32+8*8)(R13) - - MOVW R0, _rt0_arm_darwin_lib_argc<>(SB) - MOVW R1, _rt0_arm_darwin_lib_argv<>(SB) - - // Synchronous initialization. - MOVW $runtime·libpreinit(SB), R3 - CALL (R3) - - // Create a new thread to do the runtime initialization and return. - MOVW _cgo_sys_thread_create(SB), R3 - CMP $0, R3 - B.EQ nocgo - MOVW $_rt0_arm_darwin_lib_go(SB), R0 - MOVW $0, R1 - BL (R3) - B rr -nocgo: - MOVW $0x400000, R0 - MOVW R0, (R13) // stacksize - MOVW $_rt0_arm_darwin_lib_go(SB), R0 - MOVW R0, 4(R13) // fn - MOVW $0, R0 - MOVW R0, 8(R13) // fnarg - MOVW $runtime·newosproc0(SB), R3 - BL (R3) -rr: - // Restore callee-save registers and return. - MOVW 12(R13), R4 - MOVW 16(R13), R5 - MOVW 20(R13), R6 - MOVW 24(R13), R7 - MOVW 28(R13), R8 - MOVW 32(R13), R11 - MOVD (32+8*1)(R13), F8 - MOVD (32+8*2)(R13), F9 - MOVD (32+8*3)(R13), F10 - MOVD (32+8*4)(R13), F11 - MOVD (32+8*5)(R13), F12 - MOVD (32+8*6)(R13), F13 - MOVD (32+8*7)(R13), F14 - MOVD (32+8*8)(R13), F15 - RET - - -TEXT _rt0_arm_darwin_lib_go(SB),NOSPLIT,$0 - MOVW _rt0_arm_darwin_lib_argc<>(SB), R0 - MOVW _rt0_arm_darwin_lib_argv<>(SB), R1 - MOVW R0, (R13) - MOVW R1, 4(R13) - MOVW $runtime·rt0_go(SB), R4 - B (R4) - -DATA _rt0_arm_darwin_lib_argc<>(SB)/4, $0 -GLOBL _rt0_arm_darwin_lib_argc<>(SB),NOPTR, $4 -DATA _rt0_arm_darwin_lib_argv<>(SB)/4, $0 -GLOBL _rt0_arm_darwin_lib_argv<>(SB),NOPTR, $4 - -TEXT main(SB),NOSPLIT,$-8 - // save argc and argv onto stack - MOVM.DB.W [R0-R1], (R13) - MOVW $runtime·rt0_go(SB), R4 - B (R4) +TEXT _rt0_arm_darwin_lib(SB),NOSPLIT,$0 + B _rt0_arm_lib(SB) diff --git a/src/runtime/rt0_dragonfly_amd64.s b/src/runtime/rt0_dragonfly_amd64.s index fb56618d8f..e76f9b9b52 100644 --- a/src/runtime/rt0_dragonfly_amd64.s +++ b/src/runtime/rt0_dragonfly_amd64.s @@ -4,12 +4,11 @@ #include "textflag.h" +// On Dragonfly argc/argv are passed in DI, not SP, so we can't use _rt0_amd64. TEXT _rt0_amd64_dragonfly(SB),NOSPLIT,$-8 LEAQ 8(DI), SI // argv MOVQ 0(DI), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP runtime·rt0_go(SB) -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_dragonfly_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_freebsd_386.s b/src/runtime/rt0_freebsd_386.s index cd7a915f84..1808059838 100644 --- a/src/runtime/rt0_freebsd_386.s +++ b/src/runtime/rt0_freebsd_386.s @@ -4,13 +4,14 @@ #include "textflag.h" -TEXT _rt0_386_freebsd(SB),NOSPLIT,$8 - MOVL 8(SP), AX - LEAL 12(SP), BX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_freebsd(SB),NOSPLIT,$0 + JMP _rt0_386(SB) + +TEXT _rt0_386_freebsd_lib(SB),NOSPLIT,$0 + JMP _rt0_386_lib(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_freebsd_amd64.s b/src/runtime/rt0_freebsd_amd64.s index 7989f7c3e9..ccc48f66b4 100644 --- a/src/runtime/rt0_freebsd_amd64.s +++ b/src/runtime/rt0_freebsd_amd64.s @@ -4,12 +4,11 @@ #include "textflag.h" +// On FreeBSD argc/argv are passed in DI, not SP, so we can't use _rt0_amd64. TEXT _rt0_amd64_freebsd(SB),NOSPLIT,$-8 LEAQ 8(DI), SI // argv MOVQ 0(DI), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP runtime·rt0_go(SB) -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_freebsd_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_freebsd_arm.s b/src/runtime/rt0_freebsd_arm.s index e1bb13d53a..62ecd9aeb5 100644 --- a/src/runtime/rt0_freebsd_arm.s +++ b/src/runtime/rt0_freebsd_arm.s @@ -4,13 +4,8 @@ #include "textflag.h" -TEXT _rt0_arm_freebsd(SB),NOSPLIT,$-4 - MOVW (R13), R0 // argc - MOVW $4(R13), R1 // argv - MOVM.DB.W [R0-R1], (R13) - B runtime·rt0_go(SB) +TEXT _rt0_arm_freebsd(SB),NOSPLIT,$0 + B _rt0_arm(SB) -TEXT main(SB),NOSPLIT,$-4 - MOVM.DB.W [R0-R1], (R13) - MOVW $runtime·rt0_go(SB), R4 - B (R4) +TEXT _rt0_arm_freebsd_lib(SB),NOSPLIT,$0 + B _rt0_arm_lib(SB) diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s index 23bfc98b10..325066fc1d 100644 --- a/src/runtime/rt0_linux_386.s +++ b/src/runtime/rt0_linux_386.s @@ -4,72 +4,14 @@ #include "textflag.h" -TEXT _rt0_386_linux(SB),NOSPLIT,$8 - MOVL 8(SP), AX - LEAL 12(SP), BX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_linux(SB),NOSPLIT,$0 + JMP _rt0_386(SB) -// When building with -buildmode=c-shared, this symbol is called when the shared -// library is loaded. TEXT _rt0_386_linux_lib(SB),NOSPLIT,$0 - PUSHL BP - MOVL SP, BP - PUSHL BX - PUSHL SI - PUSHL DI - - MOVL 8(BP), AX - MOVL AX, _rt0_386_linux_lib_argc<>(SB) - MOVL 12(BP), AX - MOVL AX, _rt0_386_linux_lib_argv<>(SB) - - // Synchronous initialization. - MOVL $runtime·libpreinit(SB), AX - CALL AX - - SUBL $8, SP - - // Create a new thread to do the runtime initialization. - MOVL _cgo_sys_thread_create(SB), AX - TESTL AX, AX - JZ nocgo - MOVL $_rt0_386_linux_lib_go(SB), BX - MOVL BX, 0(SP) - MOVL $0, 4(SP) - CALL AX - JMP restore - -nocgo: - MOVL $0x800000, 0(SP) // stacksize = 8192KB - MOVL $_rt0_386_linux_lib_go(SB), AX - MOVL AX, 4(SP) // fn - MOVL $runtime·newosproc0(SB), AX - CALL AX - -restore: - ADDL $8, SP - POPL DI - POPL SI - POPL BX - POPL BP - RET - -TEXT _rt0_386_linux_lib_go(SB),NOSPLIT,$12 - MOVL _rt0_386_linux_lib_argc<>(SB), AX - MOVL AX, 0(SP) - MOVL _rt0_386_linux_lib_argv<>(SB), AX - MOVL AX, 4(SP) - MOVL $runtime·rt0_go(SB), AX - CALL AX - RET - -DATA _rt0_386_linux_lib_argc<>(SB)/4, $0 -GLOBL _rt0_386_linux_lib_argc<>(SB),NOPTR, $4 -DATA _rt0_386_linux_lib_argv<>(SB)/4, $0 -GLOBL _rt0_386_linux_lib_argv<>(SB),NOPTR, $4 + JMP _rt0_386_lib(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_linux_amd64.s b/src/runtime/rt0_linux_amd64.s index ced471f5cb..94ff7094d6 100644 --- a/src/runtime/rt0_linux_amd64.s +++ b/src/runtime/rt0_linux_amd64.s @@ -5,70 +5,7 @@ #include "textflag.h" TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) -// When building with -buildmode=c-shared, this symbol is called when the shared -// library is loaded. -// Note: This function calls external C code, which might required 16-byte stack -// alignment after cmd/internal/obj applies its transformations. -TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0x50 - MOVQ SP, AX - ANDQ $-16, SP - MOVQ BX, 0x10(SP) - MOVQ BP, 0x18(SP) - MOVQ R12, 0x20(SP) - MOVQ R13, 0x28(SP) - MOVQ R14, 0x30(SP) - MOVQ R15, 0x38(SP) - MOVQ AX, 0x40(SP) - - MOVQ DI, _rt0_amd64_linux_lib_argc<>(SB) - MOVQ SI, _rt0_amd64_linux_lib_argv<>(SB) - - // Synchronous initialization. - MOVQ $runtime·libpreinit(SB), AX - CALL AX - - // Create a new thread to do the runtime initialization and return. - MOVQ _cgo_sys_thread_create(SB), AX - TESTQ AX, AX - JZ nocgo - MOVQ $_rt0_amd64_linux_lib_go(SB), DI - MOVQ $0, SI - CALL AX - JMP restore - -nocgo: - MOVQ $8388608, 0(SP) // stacksize - MOVQ $_rt0_amd64_linux_lib_go(SB), AX - MOVQ AX, 8(SP) // fn - MOVQ $runtime·newosproc0(SB), AX - CALL AX - -restore: - MOVQ 0x10(SP), BX - MOVQ 0x18(SP), BP - MOVQ 0x20(SP), R12 - MOVQ 0x28(SP), R13 - MOVQ 0x30(SP), R14 - MOVQ 0x38(SP), R15 - MOVQ 0x40(SP), SP - RET - -TEXT _rt0_amd64_linux_lib_go(SB),NOSPLIT,$0 - MOVQ _rt0_amd64_linux_lib_argc<>(SB), DI - MOVQ _rt0_amd64_linux_lib_argv<>(SB), SI - MOVQ $runtime·rt0_go(SB), AX - JMP AX - -DATA _rt0_amd64_linux_lib_argc<>(SB)/8, $0 -GLOBL _rt0_amd64_linux_lib_argc<>(SB),NOPTR, $8 -DATA _rt0_amd64_linux_lib_argv<>(SB)/8, $0 -GLOBL _rt0_amd64_linux_lib_argv<>(SB),NOPTR, $8 - -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_linux_arm.s b/src/runtime/rt0_linux_arm.s index 597e642adb..ba4ca2b10a 100644 --- a/src/runtime/rt0_linux_arm.s +++ b/src/runtime/rt0_linux_arm.s @@ -12,158 +12,22 @@ TEXT _rt0_arm_linux(SB),NOSPLIT,$-4 // When building with -buildmode=c-shared, this symbol is called when the shared // library is loaded. -TEXT _rt0_arm_linux_lib(SB),NOSPLIT,$104 - // Preserve callee-save registers. Raspberry Pi's dlopen(), for example, - // actually cares that R11 is preserved. - MOVW R4, 12(R13) - MOVW R5, 16(R13) - MOVW R6, 20(R13) - MOVW R7, 24(R13) - MOVW R8, 28(R13) - MOVW R11, 32(R13) - - // Skip floating point registers on GOARM < 6. - MOVB runtime·goarm(SB), R11 - CMP $6, R11 - BLT skipfpsave - MOVD F8, (32+8*1)(R13) - MOVD F9, (32+8*2)(R13) - MOVD F10, (32+8*3)(R13) - MOVD F11, (32+8*4)(R13) - MOVD F12, (32+8*5)(R13) - MOVD F13, (32+8*6)(R13) - MOVD F14, (32+8*7)(R13) - MOVD F15, (32+8*8)(R13) -skipfpsave: - // Save argc/argv. - MOVW R0, _rt0_arm_linux_lib_argc<>(SB) - MOVW R1, _rt0_arm_linux_lib_argv<>(SB) - - // Synchronous initialization. - MOVW $runtime·libpreinit(SB), R2 - CALL (R2) - - // Create a new thread to do the runtime initialization. - MOVW _cgo_sys_thread_create(SB), R2 - CMP $0, R2 - BEQ nocgo - MOVW $_rt0_arm_linux_lib_go<>(SB), R0 - MOVW $0, R1 - BL (R2) - B rr -nocgo: - MOVW $0x800000, R0 // stacksize = 8192KB - MOVW $_rt0_arm_linux_lib_go<>(SB), R1 // fn - MOVW R0, 4(R13) - MOVW R1, 8(R13) - BL runtime·newosproc0(SB) -rr: - // Restore callee-save registers and return. - MOVB runtime·goarm(SB), R11 - CMP $6, R11 - BLT skipfprest - MOVD (32+8*1)(R13), F8 - MOVD (32+8*2)(R13), F9 - MOVD (32+8*3)(R13), F10 - MOVD (32+8*4)(R13), F11 - MOVD (32+8*5)(R13), F12 - MOVD (32+8*6)(R13), F13 - MOVD (32+8*7)(R13), F14 - MOVD (32+8*8)(R13), F15 -skipfprest: - MOVW 12(R13), R4 - MOVW 16(R13), R5 - MOVW 20(R13), R6 - MOVW 24(R13), R7 - MOVW 28(R13), R8 - MOVW 32(R13), R11 - RET - -TEXT _rt0_arm_linux_lib_go<>(SB),NOSPLIT,$8 - MOVW _rt0_arm_linux_lib_argc<>(SB), R0 - MOVW _rt0_arm_linux_lib_argv<>(SB), R1 - MOVW R0, 0(R13) - MOVW R1, 4(R13) - B runtime·rt0_go(SB) - -DATA _rt0_arm_linux_lib_argc<>(SB)/4,$0 -GLOBL _rt0_arm_linux_lib_argc<>(SB),NOPTR,$4 -DATA _rt0_arm_linux_lib_argv<>(SB)/4,$0 -GLOBL _rt0_arm_linux_lib_argv<>(SB),NOPTR,$4 +TEXT _rt0_arm_linux_lib(SB),NOSPLIT,$0 + B _rt0_arm_lib(SB) TEXT _rt0_arm_linux1(SB),NOSPLIT,$-4 // We first need to detect the kernel ABI, and warn the user - // if the system only supports OABI + // if the system only supports OABI. // The strategy here is to call some EABI syscall to see if // SIGILL is received. - // To catch SIGILL, we have to first setup sigaction, this is - // a chicken-and-egg problem, because we can't do syscall if - // we don't know the kernel ABI... Oh, not really, we can do - // syscall in Thumb mode. + // If you get a SIGILL here, you have the wrong kernel. - // Save argc and argv + // Save argc and argv (syscall will clobber at least R0). MOVM.DB.W [R0-R1], (R13) - // Thumb mode OABI check disabled because there are some - // EABI systems that do not support Thumb execution. - // We can run on them except for this check! - - // // set up sa_handler - // MOVW $bad_abi<>(SB), R0 // sa_handler - // MOVW $0, R1 // sa_flags - // MOVW $0, R2 // sa_restorer - // MOVW $0, R3 // sa_mask - // MOVM.DB.W [R0-R3], (R13) - // MOVW $4, R0 // SIGILL - // MOVW R13, R1 // sa - // SUB $16, R13 - // MOVW R13, R2 // old_sa - // MOVW $8, R3 // c - // MOVW $174, R7 // sys_sigaction - // BL oabi_syscall<>(SB) - // do an EABI syscall MOVW $20, R7 // sys_getpid SWI $0 // this will trigger SIGILL on OABI systems - - // MOVW $4, R0 // SIGILL - // MOVW R13, R1 // sa - // MOVW $0, R2 // old_sa - // MOVW $8, R3 // c - // MOVW $174, R7 // sys_sigaction - // SWI $0 // restore signal handler - // ADD $32, R13 + MOVM.IA.W (R13), [R0-R1] B runtime·rt0_go(SB) - -TEXT bad_abi<>(SB),NOSPLIT,$-4 - // give diagnosis and exit - MOVW $2, R0 // stderr - MOVW $bad_abi_msg(SB), R1 // data - MOVW $45, R2 // len - MOVW $4, R7 // sys_write - BL oabi_syscall<>(SB) - MOVW $1, R0 - MOVW $1, R7 // sys_exit - BL oabi_syscall<>(SB) - B 0(PC) - -DATA bad_abi_msg+0x00(SB)/8, $"This pro" -DATA bad_abi_msg+0x08(SB)/8, $"gram can" -DATA bad_abi_msg+0x10(SB)/8, $" only be" -DATA bad_abi_msg+0x18(SB)/8, $" run on " -DATA bad_abi_msg+0x20(SB)/8, $"EABI ker" -DATA bad_abi_msg+0x28(SB)/4, $"nels" -DATA bad_abi_msg+0x2c(SB)/1, $0xa -GLOBL bad_abi_msg(SB), RODATA, $45 - -TEXT oabi_syscall<>(SB),NOSPLIT,$-4 - ADD $1, R15, R4 // R15 is hardware PC - WORD $0xe12fff14 //BX (R4) // enter thumb mode - // TODO(minux): only supports little-endian CPUs - WORD $0x4770df01 // swi $1; bx lr - -TEXT main(SB),NOSPLIT,$-4 - MOVW $_rt0_arm_linux1(SB), R4 - B (R4) - diff --git a/src/runtime/rt0_linux_ppc64le.s b/src/runtime/rt0_linux_ppc64le.s index 81b991349a..73b9ae392d 100644 --- a/src/runtime/rt0_linux_ppc64le.s +++ b/src/runtime/rt0_linux_ppc64le.s @@ -2,6 +2,7 @@ #include "textflag.h" TEXT _rt0_ppc64le_linux(SB),NOSPLIT,$0 + XOR R0, R0 // Make sure R0 is zero before _main BR _main<>(SB) TEXT _rt0_ppc64le_linux_lib(SB),NOSPLIT,$-8 @@ -10,7 +11,6 @@ TEXT _rt0_ppc64le_linux_lib(SB),NOSPLIT,$-8 MOVD R0, 16(R1) // Save LR in caller's frame. MOVW CR, R0 // Save CR in caller's frame MOVD R0, 8(R1) - MOVD R2, 24(R1) // Save TOC in caller's frame. MOVDU R1, -320(R1) // Allocate frame. // Preserve callee-save registers. @@ -121,7 +121,6 @@ done: FMOVD 304(R1), F31 ADD $320, R1 - MOVD 24(R1), R2 MOVD 8(R1), R0 MOVFL R0, $0xff MOVD 16(R1), R0 diff --git a/src/runtime/rt0_linux_s390x.s b/src/runtime/rt0_linux_s390x.s index aedd6c7ef2..4b62c5a65a 100644 --- a/src/runtime/rt0_linux_s390x.s +++ b/src/runtime/rt0_linux_s390x.s @@ -4,17 +4,20 @@ #include "textflag.h" -TEXT _rt0_s390x_linux(SB),NOSPLIT|NOFRAME,$0 +TEXT _rt0_s390x_linux(SB), NOSPLIT|NOFRAME, $0 // In a statically linked binary, the stack contains argc, // argv as argc string pointers followed by a NULL, envv as a // sequence of string pointers followed by a NULL, and auxv. // There is no TLS base pointer. - // - // TODO: Support dynamic linking entry point - MOVD 0(R15), R2 // argc - ADD $8, R15, R3 // argv - BR main(SB) -TEXT main(SB),NOSPLIT|NOFRAME,$0 - MOVD $runtime·rt0_go(SB), R11 - BR R11 + MOVD 0(R15), R2 // argc + ADD $8, R15, R3 // argv + BR main(SB) + +TEXT _rt0_s390x_linux_lib(SB), NOSPLIT, $0 + MOVD $_rt0_s390x_lib(SB), R1 + BR R1 + +TEXT main(SB), NOSPLIT|NOFRAME, $0 + MOVD $runtime·rt0_go(SB), R1 + BR R1 diff --git a/src/runtime/rt0_nacl_386.s b/src/runtime/rt0_nacl_386.s index d4ba06306a..4c990022f1 100644 --- a/src/runtime/rt0_nacl_386.s +++ b/src/runtime/rt0_nacl_386.s @@ -15,8 +15,10 @@ TEXT _rt0_386_nacl(SB),NOSPLIT,$8 LEAL argv+16(FP), BX MOVL AX, 0(SP) MOVL BX, 4(SP) - CALL main(SB) - INT $3 + JMP runtime·rt0_go(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_nacl_arm.s b/src/runtime/rt0_nacl_arm.s index eadb4782dd..2be8a0730f 100644 --- a/src/runtime/rt0_nacl_arm.s +++ b/src/runtime/rt0_nacl_arm.s @@ -13,8 +13,4 @@ TEXT _rt0_arm_nacl(SB),NOSPLIT,$-4 MOVW 8(R13), R0 MOVW $12(R13), R1 - MOVM.DB.W [R0-R1], (R13) - B main(SB) - -TEXT main(SB),NOSPLIT,$0 B runtime·rt0_go(SB) diff --git a/src/runtime/rt0_netbsd_386.s b/src/runtime/rt0_netbsd_386.s index 70b8532538..cefc04a815 100644 --- a/src/runtime/rt0_netbsd_386.s +++ b/src/runtime/rt0_netbsd_386.s @@ -4,13 +4,14 @@ #include "textflag.h" -TEXT _rt0_386_netbsd(SB),NOSPLIT,$8 - MOVL 8(SP), AX - LEAL 12(SP), BX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_netbsd(SB),NOSPLIT,$0 + JMP _rt0_386(SB) + +TEXT _rt0_386_netbsd_lib(SB),NOSPLIT,$0 + JMP _rt0_386_lib(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_netbsd_amd64.s b/src/runtime/rt0_netbsd_amd64.s index fad56614e5..77c7187bba 100644 --- a/src/runtime/rt0_netbsd_amd64.s +++ b/src/runtime/rt0_netbsd_amd64.s @@ -5,11 +5,7 @@ #include "textflag.h" TEXT _rt0_amd64_netbsd(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_netbsd_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_netbsd_arm.s b/src/runtime/rt0_netbsd_arm.s index 2cb1182c06..503c32adac 100644 --- a/src/runtime/rt0_netbsd_arm.s +++ b/src/runtime/rt0_netbsd_arm.s @@ -4,8 +4,8 @@ #include "textflag.h" -TEXT _rt0_arm_netbsd(SB),NOSPLIT,$-4 - MOVW (R13), R0 // argc - MOVW $4(R13), R1 // argv - MOVM.DB.W [R0-R1], (R13) - B runtime·rt0_go(SB) +TEXT _rt0_arm_netbsd(SB),NOSPLIT,$0 + B _rt0_arm(SB) + +TEXT _rt0_arm_netbsd_lib(SB),NOSPLIT,$0 + B _rt0_arm_lib(SB) diff --git a/src/runtime/rt0_openbsd_386.s b/src/runtime/rt0_openbsd_386.s index f25d2e1cf0..959f4d655a 100644 --- a/src/runtime/rt0_openbsd_386.s +++ b/src/runtime/rt0_openbsd_386.s @@ -4,13 +4,14 @@ #include "textflag.h" -TEXT _rt0_386_openbsd(SB),NOSPLIT,$8 - MOVL 8(SP), AX - LEAL 12(SP), BX - MOVL AX, 0(SP) - MOVL BX, 4(SP) - CALL main(SB) - INT $3 +TEXT _rt0_386_openbsd(SB),NOSPLIT,$0 + JMP _rt0_386(SB) + +TEXT _rt0_386_openbsd_lib(SB),NOSPLIT,$0 + JMP _rt0_386_lib(SB) TEXT main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_openbsd_amd64.s b/src/runtime/rt0_openbsd_amd64.s index 58fe666391..c2f3f23f37 100644 --- a/src/runtime/rt0_openbsd_amd64.s +++ b/src/runtime/rt0_openbsd_amd64.s @@ -5,11 +5,7 @@ #include "textflag.h" TEXT _rt0_amd64_openbsd(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_openbsd_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_openbsd_arm.s b/src/runtime/rt0_openbsd_arm.s index 6207e55982..3511c96abc 100644 --- a/src/runtime/rt0_openbsd_arm.s +++ b/src/runtime/rt0_openbsd_arm.s @@ -4,8 +4,8 @@ #include "textflag.h" -TEXT _rt0_arm_openbsd(SB),NOSPLIT,$-4 - MOVW (R13), R0 // argc - MOVW $4(R13), R1 // argv - MOVM.DB.W [R0-R1], (R13) - B runtime·rt0_go(SB) +TEXT _rt0_arm_openbsd(SB),NOSPLIT,$0 + B _rt0_arm(SB) + +TEXT _rt0_arm_openbsd_lib(SB),NOSPLIT,$0 + B _rt0_arm_lib(SB) diff --git a/src/runtime/rt0_plan9_386.s b/src/runtime/rt0_plan9_386.s index cbbf245632..64716158f1 100644 --- a/src/runtime/rt0_plan9_386.s +++ b/src/runtime/rt0_plan9_386.s @@ -14,7 +14,7 @@ TEXT _rt0_386_plan9(SB),NOSPLIT,$12 MOVL AX, 0(SP) LEAL inargv+0(FP), AX MOVL AX, 4(SP) - CALL runtime·rt0_go(SB) + JMP runtime·rt0_go(SB) GLOBL _tos(SB), NOPTR, $4 GLOBL _privates(SB), NOPTR, $4 diff --git a/src/runtime/rt0_plan9_arm.s b/src/runtime/rt0_plan9_arm.s index 2a35e4ef66..d6174a4df5 100644 --- a/src/runtime/rt0_plan9_arm.s +++ b/src/runtime/rt0_plan9_arm.s @@ -10,8 +10,6 @@ TEXT _rt0_arm_plan9(SB),NOSPLIT,$-4 MOVW R0, _tos(SB) MOVW 0(R13), R0 MOVW $4(R13), R1 - MOVW.W R1, -4(R13) - MOVW.W R0, -4(R13) B runtime·rt0_go(SB) GLOBL _tos(SB), NOPTR, $4 diff --git a/src/runtime/rt0_solaris_amd64.s b/src/runtime/rt0_solaris_amd64.s index e2d1e71bb4..5c46ded3ae 100644 --- a/src/runtime/rt0_solaris_amd64.s +++ b/src/runtime/rt0_solaris_amd64.s @@ -5,11 +5,7 @@ #include "textflag.h" TEXT _rt0_amd64_solaris(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX +TEXT _rt0_amd64_solaris_lib(SB),NOSPLIT,$0 + JMP _rt0_amd64_lib(SB) diff --git a/src/runtime/rt0_windows_386.s b/src/runtime/rt0_windows_386.s index b9407a9879..fa39edd787 100644 --- a/src/runtime/rt0_windows_386.s +++ b/src/runtime/rt0_windows_386.s @@ -4,13 +4,8 @@ #include "textflag.h" -TEXT _rt0_386_windows(SB),NOSPLIT,$12 - MOVL 12(SP), AX - LEAL 16(SP), BX - MOVL AX, 4(SP) - MOVL BX, 8(SP) - MOVL $-1, 0(SP) // return PC for main - JMP _main(SB) +TEXT _rt0_386_windows(SB),NOSPLIT,$0 + JMP _rt0_386(SB) // When building with -buildmode=(c-shared or c-archive), this // symbol is called. For dynamic libraries it is called when the @@ -41,10 +36,12 @@ TEXT _rt0_386_windows_lib(SB),NOSPLIT,$0x1C RET TEXT _rt0_386_windows_lib_go(SB),NOSPLIT,$0 - MOVL $0, DI - MOVL $0, SI - MOVL $runtime·rt0_go(SB), AX - JMP AX + PUSHL $0 + PUSHL $0 + JMP runtime·rt0_go(SB) TEXT _main(SB),NOSPLIT,$0 + // Remove the return address from the stack. + // rt0_go doesn't expect it to be there. + ADDL $4, SP JMP runtime·rt0_go(SB) diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s index 2f73b37f31..1604711cdb 100644 --- a/src/runtime/rt0_windows_amd64.s +++ b/src/runtime/rt0_windows_amd64.s @@ -7,10 +7,7 @@ #include "textflag.h" TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8 - LEAQ 8(SP), SI // argv - MOVQ 0(SP), DI // argc - MOVQ $main(SB), AX - JMP AX + JMP _rt0_amd64(SB) // When building with -buildmode=(c-shared or c-archive), this // symbol is called. For dynamic libraries it is called when the @@ -42,7 +39,3 @@ TEXT _rt0_amd64_windows_lib_go(SB),NOSPLIT,$0 MOVQ $0, SI MOVQ $runtime·rt0_go(SB), AX JMP AX - -TEXT main(SB),NOSPLIT,$-8 - MOVQ $runtime·rt0_go(SB), AX - JMP AX diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go index 1318babdea..5e0508631f 100644 --- a/src/runtime/runtime-gdb_test.go +++ b/src/runtime/runtime-gdb_test.go @@ -22,11 +22,15 @@ import ( func checkGdbEnvironment(t *testing.T) { testenv.MustHaveGoBuild(t) - if runtime.GOOS == "darwin" { + switch runtime.GOOS { + case "darwin": t.Skip("gdb does not work on darwin") - } - if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64" { - t.Skip("skipping gdb tests on linux/ppc64; see golang.org/issue/17366") + case "netbsd": + t.Skip("gdb does not work with threads on NetBSD; see golang.org/issue/22893 and gnats.netbsd.org/52548") + case "linux": + if runtime.GOARCH == "ppc64" { + t.Skip("skipping gdb tests on linux/ppc64; see golang.org/issue/17366") + } } if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final { t.Skip("gdb test can fail with GOROOT_FINAL pending") @@ -76,7 +80,7 @@ import "fmt" import "runtime" var gslice []string func main() { - mapvar := make(map[string]string,5) + mapvar := make(map[string]string, 13) mapvar["abc"] = "def" mapvar["ghi"] = "jkl" strvar := "abc" @@ -84,7 +88,7 @@ func main() { slicevar := make([]string, 0, 16) slicevar = append(slicevar, mapvar["abc"]) fmt.Println("hi") // line 13 - _ = ptrvar + runtime.KeepAlive(ptrvar) gslice = slicevar runtime.KeepAlive(mapvar) } @@ -106,8 +110,8 @@ func testGdbPython(t *testing.T, cgo bool) { t.Skip("skipping because cgo is not enabled") } - t.Parallel() checkGdbEnvironment(t) + t.Parallel() checkGdbVersion(t) checkGdbPython(t) @@ -132,7 +136,7 @@ func testGdbPython(t *testing.T, cgo bool) { cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") cmd.Dir = dir - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source %v\n%s", err, out) } @@ -198,8 +202,10 @@ func testGdbPython(t *testing.T, cgo bool) { t.Fatalf("info goroutines failed: %s", bl) } - printMapvarRe := regexp.MustCompile(`\Q = map[string]string = {["abc"] = "def", ["ghi"] = "jkl"}\E$`) - if bl := blocks["print mapvar"]; !printMapvarRe.MatchString(bl) { + printMapvarRe1 := regexp.MustCompile(`\Q = map[string]string = {["abc"] = "def", ["ghi"] = "jkl"}\E$`) + printMapvarRe2 := regexp.MustCompile(`\Q = map[string]string = {["ghi"] = "jkl", ["abc"] = "def"}\E$`) + if bl := blocks["print mapvar"]; !printMapvarRe1.MatchString(bl) && + !printMapvarRe2.MatchString(bl) { t.Fatalf("print mapvar failed: %s", bl) } @@ -212,7 +218,7 @@ func testGdbPython(t *testing.T, cgo bool) { // a collection of scalar vars holding the fields. In such cases // the DWARF variable location expression should be of the // form "var.field" and not just "field". - infoLocalsRe := regexp.MustCompile(`^slicevar.len = `) + infoLocalsRe := regexp.MustCompile(`.*\sslicevar.cap = `) if bl := blocks["info locals"]; !infoLocalsRe.MatchString(bl) { t.Fatalf("info locals failed: %s", bl) } @@ -260,8 +266,8 @@ func TestGdbBacktrace(t *testing.T) { testenv.SkipFlaky(t, 15603) } - t.Parallel() checkGdbEnvironment(t) + t.Parallel() checkGdbVersion(t) dir, err := ioutil.TempDir("", "go-build") @@ -278,7 +284,7 @@ func TestGdbBacktrace(t *testing.T) { } cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") cmd.Dir = dir - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source %v\n%s", err, out) } @@ -330,8 +336,8 @@ func main() { // TestGdbAutotmpTypes ensures that types of autotmp variables appear in .debug_info // See bug #17830. func TestGdbAutotmpTypes(t *testing.T) { - t.Parallel() checkGdbEnvironment(t) + t.Parallel() checkGdbVersion(t) dir, err := ioutil.TempDir("", "go-build") @@ -346,9 +352,9 @@ func TestGdbAutotmpTypes(t *testing.T) { if err != nil { t.Fatalf("failed to create file: %v", err) } - cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=-N -l", "-o", "a.exe") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe") cmd.Dir = dir - out, err := testEnv(cmd).CombinedOutput() + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source %v\n%s", err, out) } @@ -381,3 +387,62 @@ func TestGdbAutotmpTypes(t *testing.T) { } } } + +const constsSource = ` +package main + +const aConstant int = 42 +const largeConstant uint64 = ^uint64(0) +const minusOne int64 = -1 + +func main() { + println("hello world") +} +` + +func TestGdbConst(t *testing.T) { + checkGdbEnvironment(t) + t.Parallel() + checkGdbVersion(t) + + dir, err := ioutil.TempDir("", "go-build") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(dir) + + // Build the source code. + src := filepath.Join(dir, "main.go") + err = ioutil.WriteFile(src, []byte(constsSource), 0644) + if err != nil { + t.Fatalf("failed to create file: %v", err) + } + cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe") + cmd.Dir = dir + out, err := testenv.CleanCmdEnv(cmd).CombinedOutput() + if err != nil { + t.Fatalf("building source %v\n%s", err, out) + } + + // Execute gdb commands. + args := []string{"-nx", "-batch", + "-ex", "set startup-with-shell off", + "-ex", "break main.main", + "-ex", "run", + "-ex", "print main.aConstant", + "-ex", "print main.largeConstant", + "-ex", "print main.minusOne", + "-ex", "print 'runtime._MSpanInUse'", + "-ex", "print 'runtime._PageSize'", + filepath.Join(dir, "a.exe"), + } + got, _ := exec.Command("gdb", args...).CombinedOutput() + + sgot := strings.Replace(string(got), "\r\n", "\n", -1) + + t.Logf("output %q", sgot) + + if !strings.Contains(sgot, "\n$1 = 42\n$2 = 18446744073709551615\n$3 = -1\n$4 = 1 '\\001'\n$5 = 8192") { + t.Fatalf("output mismatch") + } +} diff --git a/src/runtime/runtime-lldb_test.go b/src/runtime/runtime-lldb_test.go index 98bc906666..9a287052ea 100644 --- a/src/runtime/runtime-lldb_test.go +++ b/src/runtime/runtime-lldb_test.go @@ -5,11 +5,7 @@ package runtime_test import ( - "debug/elf" - "debug/macho" - "encoding/binary" "internal/testenv" - "io" "io/ioutil" "os" "os/exec" @@ -158,7 +154,7 @@ func TestLldbPython(t *testing.T) { t.Fatalf("failed to create file: %v", err) } - cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags", "-N -l", "-o", "a.exe") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe") cmd.Dir = dir out, err := cmd.CombinedOutput() if err != nil { @@ -182,81 +178,3 @@ func TestLldbPython(t *testing.T) { t.Fatalf("Unexpected lldb output:\n%s", got) } } - -// Check that aranges are valid even when lldb isn't installed. -func TestDwarfAranges(t *testing.T) { - testenv.MustHaveGoBuild(t) - dir, err := ioutil.TempDir("", "go-build") - if err != nil { - t.Fatalf("failed to create temp directory: %v", err) - } - defer os.RemoveAll(dir) - - src := filepath.Join(dir, "main.go") - err = ioutil.WriteFile(src, []byte(lldbHelloSource), 0644) - if err != nil { - t.Fatalf("failed to create file: %v", err) - } - - cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe") - cmd.Dir = dir - out, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("building source %v\n%s", err, out) - } - - filename := filepath.Join(dir, "a.exe") - if f, err := elf.Open(filename); err == nil { - sect := f.Section(".debug_aranges") - if sect == nil { - t.Fatal("Missing aranges section") - } - verifyAranges(t, f.ByteOrder, sect.Open()) - } else if f, err := macho.Open(filename); err == nil { - sect := f.Section("__debug_aranges") - if sect == nil { - t.Fatal("Missing aranges section") - } - verifyAranges(t, f.ByteOrder, sect.Open()) - } else { - t.Skip("Not an elf or macho binary.") - } -} - -func verifyAranges(t *testing.T, byteorder binary.ByteOrder, data io.ReadSeeker) { - var header struct { - UnitLength uint32 // does not include the UnitLength field - Version uint16 - Offset uint32 - AddressSize uint8 - SegmentSize uint8 - } - for { - offset, err := data.Seek(0, io.SeekCurrent) - if err != nil { - t.Fatalf("Seek error: %v", err) - } - if err = binary.Read(data, byteorder, &header); err == io.EOF { - return - } else if err != nil { - t.Fatalf("Error reading arange header: %v", err) - } - tupleSize := int64(header.SegmentSize) + 2*int64(header.AddressSize) - lastTupleOffset := offset + int64(header.UnitLength) + 4 - tupleSize - if lastTupleOffset%tupleSize != 0 { - t.Fatalf("Invalid arange length %d, (addr %d, seg %d)", header.UnitLength, header.AddressSize, header.SegmentSize) - } - if _, err = data.Seek(lastTupleOffset, io.SeekStart); err != nil { - t.Fatalf("Seek error: %v", err) - } - buf := make([]byte, tupleSize) - if n, err := data.Read(buf); err != nil || int64(n) < tupleSize { - t.Fatalf("Read error: %v", err) - } - for _, val := range buf { - if val != 0 { - t.Fatalf("Invalid terminator") - } - } - } -} diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go index 33ca75dfed..33ecc260dd 100644 --- a/src/runtime/runtime.go +++ b/src/runtime/runtime.go @@ -58,14 +58,8 @@ func syscall_Getpagesize() int { return int(physPageSize) } //go:linkname os_runtime_args os.runtime_args func os_runtime_args() []string { return append([]string{}, argslice...) } -//go:linkname boring_runtime_arg0 crypto/internal/boring.runtime_arg0 -func boring_runtime_arg0() string { - // On Windows, argslice is not set, and it's too much work to find argv0. - if len(argslice) == 0 { - return "" - } - return argslice[0] +//go:linkname syscall_Exit syscall.Exit +//go:nosplit +func syscall_Exit(code int) { + exit(int32(code)) } - -//go:linkname fipstls_runtime_arg0 crypto/internal/boring/fipstls.runtime_arg0 -func fipstls_runtime_arg0() string { return boring_runtime_arg0() } diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index c0733481a8..0971e0cb37 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -99,10 +99,6 @@ var test_z64, test_x64 uint64 func testAtomic64() { test_z64 = 42 test_x64 = 0 - prefetcht0(uintptr(unsafe.Pointer(&test_z64))) - prefetcht1(uintptr(unsafe.Pointer(&test_z64))) - prefetcht2(uintptr(unsafe.Pointer(&test_z64))) - prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } @@ -390,13 +386,6 @@ func parsedebugvars() { setTraceback(gogetenv("GOTRACEBACK")) traceback_env = traceback_cache - - // For cgocheck > 1, we turn on the write barrier at all times - // and check all pointer writes. - if debug.cgocheck > 1 { - writeBarrier.cgo = true - writeBarrier.enabled = true - } } //go:linkname setTraceback runtime/debug.SetTraceback diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 6871d9c68c..556f13d1c1 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -169,9 +169,13 @@ func efaceOf(ep *interface{}) *eface { // a word that is completely ignored by the GC than to have one for which // only a few updates are ignored. // -// Gs, Ms, and Ps are always reachable via true pointers in the -// allgs, allm, and allp lists or (during allocation before they reach those lists) +// Gs and Ps are always reachable via true pointers in the +// allgs and allp lists or (during allocation before they reach those lists) // from stack variables. +// +// Ms are always reachable via true pointers either from allm or +// freem. Unlike Gs and Ps we do free Ms, so it's important that +// nothing ever hold an muintptr across a safe point. // A guintptr holds a goroutine pointer, but typed as a uintptr // to bypass write barriers. It is used in the Gobuf goroutine state @@ -221,6 +225,15 @@ func (pp puintptr) ptr() *p { return (*p)(unsafe.Pointer(pp)) } //go:nosplit func (pp *puintptr) set(p *p) { *pp = puintptr(unsafe.Pointer(p)) } +// muintptr is a *m that is not tracked by the garbage collector. +// +// Because we do free Ms, there are some additional constrains on +// muintptrs: +// +// 1. Never hold an muintptr locally across a safe point. +// +// 2. Any muintptr in the heap must be owned by the M itself so it can +// ensure it is not in use when the last true *m is released. type muintptr uintptr //go:nosplit @@ -241,17 +254,19 @@ type gobuf struct { // The offsets of sp, pc, and g are known to (hard-coded in) libmach. // // ctxt is unusual with respect to GC: it may be a - // heap-allocated funcval so write require a write barrier, - // but gobuf needs to be cleared from assembly. We take - // advantage of the fact that the only path that uses a - // non-nil ctxt is morestack. As a result, gogo is the only - // place where it may not already be nil, so gogo uses an - // explicit write barrier. Everywhere else that resets the - // gobuf asserts that ctxt is already nil. + // heap-allocated funcval, so GC needs to track it, but it + // needs to be set and cleared from assembly, where it's + // difficult to have write barriers. However, ctxt is really a + // saved, live register, and we only ever exchange it between + // the real register and the gobuf. Hence, we treat it as a + // root during stack scanning, which means assembly that saves + // and restores it doesn't need write barriers. It's still + // typed as a pointer so that any other writes from Go get + // write barriers. sp uintptr pc uintptr g guintptr - ctxt unsafe.Pointer // this has to be a pointer so that gc scans it + ctxt unsafe.Pointer ret sys.Uintreg lr uintptr bp uintptr // for GOEXPERIMENT=framepointer @@ -272,11 +287,14 @@ type sudog struct { // channel this sudog is blocking on. shrinkstack depends on // this for sudogs involved in channel ops. - g *g - selectdone *uint32 // CAS to 1 to win select race (may point to stack) - next *sudog - prev *sudog - elem unsafe.Pointer // data element (may point to stack) + g *g + + // isSelect indicates g is participating in a select, so + // g.selectDone must be CAS'd to win the wake-up race. + isSelect bool + next *sudog + prev *sudog + elem unsafe.Pointer // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -354,7 +372,7 @@ type g struct { sysexitticks int64 // cputicks when syscall has returned (for tracing) traceseq uint64 // trace event sequencer tracelastp puintptr // last P emitted an event for this goroutine - lockedm *m + lockedm muintptr sig uint32 writebuf []byte sigcode0 uintptr @@ -367,6 +385,7 @@ type g struct { cgoCtxt []uintptr // cgo traceback context labels unsafe.Pointer // profiler labels timer *timer // cached timer for time.Sleep + selectDone uint32 // are we participating in a select and did someone win the race? // Per-G GC state @@ -386,16 +405,17 @@ type m struct { divmod uint32 // div/mod denominator for arm - known to liblink // Fields not known to debuggers. - procid uint64 // for debuggers, but offset not hard-coded - gsignal *g // signal-handling g - sigmask sigset // storage for saved signal mask - tls [6]uintptr // thread-local storage (for x86 extern register) + procid uint64 // for debuggers, but offset not hard-coded + gsignal *g // signal-handling g + goSigStack gsignalStack // Go-allocated signal handling stack + sigmask sigset // storage for saved signal mask + tls [6]uintptr // thread-local storage (for x86 extern register) mstartfn func() curg *g // current running goroutine caughtsig guintptr // goroutine running during fatal signal p puintptr // attached p for executing go code (nil if not executing go code) nextp puintptr - id int32 + id int64 mallocing int32 throwing int32 preemptoff string // if != "", keep curg running on this m @@ -409,8 +429,11 @@ type m struct { inwb bool // m is executing a write barrier newSigstack bool // minit on C thread called sigaltstack printlock int8 - incgo bool // m is executing a cgo call - fastrand uint32 + incgo bool // m is executing a cgo call + freeWait uint32 // if == 0, safe to free g0 and delete m (atomic) + fastrand [2]uint32 + needextram bool + traceback uint8 ncgocall uint64 // number of cgo calls in total ncgo int32 // number of cgo calls currently in progress cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily @@ -419,15 +442,14 @@ type m struct { alllink *m // on allm schedlink muintptr mcache *mcache - lockedg *g - createstack [32]uintptr // stack that created this thread. - freglo [16]uint32 // d[i] lsb and f[i] - freghi [16]uint32 // d[i] msb and f[i+16] - fflag uint32 // floating point compare flags - locked uint32 // tracking for lockosthread - nextwaitm uintptr // next m waiting for lock - needextram bool - traceback uint8 + lockedg guintptr + createstack [32]uintptr // stack that created this thread. + freglo [16]uint32 // d[i] lsb and f[i] + freghi [16]uint32 // d[i] msb and f[i+16] + fflag uint32 // floating point compare flags + lockedExt uint32 // tracking for external LockOSThread + lockedInt uint32 // tracking for internal lockOSThread + nextwaitm muintptr // next m waiting for lock waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool waitlock unsafe.Pointer waittraceev byte @@ -435,6 +457,7 @@ type m struct { startingtrace bool syscalltick uint32 thread uintptr // thread handle + freelink *m // on sched.freem // these are here because they are too large to be on the stack // of low-level NOSPLIT functions. @@ -502,26 +525,30 @@ type p struct { palloc persistentAlloc // per-P to avoid mutex // Per-P GC state - gcAssistTime int64 // Nanoseconds in assistAlloc - gcBgMarkWorker guintptr - gcMarkWorkerMode gcMarkWorkerMode + gcAssistTime int64 // Nanoseconds in assistAlloc + gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker + gcBgMarkWorker guintptr + gcMarkWorkerMode gcMarkWorkerMode + + // gcMarkWorkerStartTime is the nanotime() at which this mark + // worker started. + gcMarkWorkerStartTime int64 // gcw is this P's GC work buffer cache. The work buffer is // filled by write barriers, drained by mutator assists, and // disposed on certain GC state transitions. gcw gcWork + // wbBuf is this P's GC write barrier buffer. + // + // TODO: Consider caching this in the running G. + wbBuf wbBuf + runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point pad [sys.CacheLineSize]byte } -const ( - // The max value of GOMAXPROCS. - // There are no fundamental restrictions on the value. - _MaxGomaxprocs = 1 << 10 -) - type schedt struct { // accessed atomically. keep at top to ensure alignment on 32-bit systems. goidgen uint64 @@ -529,11 +556,16 @@ type schedt struct { lock mutex + // When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be + // sure to call checkdead(). + midle muintptr // idle m's waiting for work nmidle int32 // number of idle m's waiting for work nmidlelocked int32 // number of locked m's waiting for work - mcount int32 // number of m's that have been created + mnext int64 // number of m's that have been created and next M ID maxmcount int32 // maximum number of m's allowed (or die) + nmsys int32 // number of system m's not counted for deadlock + nmfreed int64 // cumulative number of freed m's ngsys uint32 // number of system goroutines; updated atomically @@ -560,6 +592,10 @@ type schedt struct { deferlock mutex deferpool [5]*_defer + // freem is the list of m's waiting to be freed when their + // m.exited is set. Linked through m.freelink. + freem *m + gcwaiting uint32 // gc is waiting to run stopwait int32 stopnote note @@ -578,18 +614,7 @@ type schedt struct { totaltime int64 // ∫gomaxprocs dt up to procresizetime } -// The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread. -// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active. -// External locks are not recursive; a second lock is silently ignored. -// The upper bits of m.locked record the nesting depth of calls to lockOSThread -// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal). -// Internal locks can be recursive. For instance, a lock for cgo can occur while the main -// goroutine is holding the lock during the initialization phase. -const ( - _LockExternal = 1 - _LockInternal = 2 -) - +// Values for the flags field of a sigTabT. const ( _SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel _SigKill // if signal.Notify doesn't take it, exit quietly @@ -598,7 +623,8 @@ const ( _SigDefault // if the signal isn't explicitly requested, don't monitor it _SigGoExit // cause all runtime procs to exit (only used on Plan 9). _SigSetStack // add SA_ONSTACK to libc handler - _SigUnblock // unblocked in minit + _SigUnblock // always unblock; see blockableSig + _SigIgn // _SIG_DFL action is to ignore the signal ) // Layout of in-memory per-function information prepared by linker @@ -624,14 +650,11 @@ type _func struct { // Needs to be in sync with // ../cmd/compile/internal/gc/reflect.go:/^func.dumptypestructs. type itab struct { - inter *interfacetype - _type *_type - link *itab - hash uint32 // copy of _type.hash. Used for type switches. - bad bool // type does not implement interface - inhash bool // has this itab been added to hash? - unused [2]byte - fun [1]uintptr // variable sized + inter *interfacetype + _type *_type + hash uint32 // copy of _type.hash. Used for type switches. + _ [4]byte + fun [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter. } // Lock-free stack node. @@ -672,7 +695,8 @@ func extendRandom(r []byte, n int) { } } -// deferred subroutine calls +// A _defer holds an entry on the list of deferred calls. +// If you add a field here, add code to clear it in freedefer. type _defer struct { siz int32 started bool @@ -716,15 +740,15 @@ const ( const _TracebackMaxFrames = 100 var ( - emptystring string - allglen uintptr - allm *m - allp [_MaxGomaxprocs + 1]*p - gomaxprocs int32 - ncpu int32 - forcegc forcegcstate - sched schedt - newprocs int32 + allglen uintptr + allm *m + allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable + allpLock mutex // Protects P-less reads of allp and all writes + gomaxprocs int32 + ncpu int32 + forcegc forcegcstate + sched schedt + newprocs int32 // Information about what cpu features are available. // Set on startup in asm_{386,amd64,amd64p32}.s. diff --git a/src/runtime/runtime_boring.go b/src/runtime/runtime_boring.go new file mode 100644 index 0000000000..5a98b20253 --- /dev/null +++ b/src/runtime/runtime_boring.go @@ -0,0 +1,19 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import _ "unsafe" // for go:linkname + +//go:linkname boring_runtime_arg0 crypto/internal/boring.runtime_arg0 +func boring_runtime_arg0() string { + // On Windows, argslice is not set, and it's too much work to find argv0. + if len(argslice) == 0 { + return "" + } + return argslice[0] +} + +//go:linkname fipstls_runtime_arg0 crypto/internal/boring/fipstls.runtime_arg0 +func fipstls_runtime_arg0() string { return boring_runtime_arg0() } diff --git a/src/runtime/runtime_linux_test.go b/src/runtime/runtime_linux_test.go index 2b6daecbfc..612397293f 100644 --- a/src/runtime/runtime_linux_test.go +++ b/src/runtime/runtime_linux_test.go @@ -8,6 +8,7 @@ import ( . "runtime" "syscall" "testing" + "time" "unsafe" ) @@ -21,6 +22,17 @@ func init() { // for how it is used in init (must be on main thread). pid, tid = syscall.Getpid(), syscall.Gettid() LockOSThread() + + sysNanosleep = func(d time.Duration) { + // Invoke a blocking syscall directly; calling time.Sleep() + // would deschedule the goroutine instead. + ts := syscall.NsecToTimespec(d.Nanoseconds()) + for { + if err := syscall.Nanosleep(&ts, &ts); err != syscall.EINTR { + return + } + } + } } func TestLockOSThread(t *testing.T) { diff --git a/src/runtime/runtime_mmap_test.go b/src/runtime/runtime_mmap_test.go index 2eca6b9e88..57c38bc5dc 100644 --- a/src/runtime/runtime_mmap_test.go +++ b/src/runtime/runtime_mmap_test.go @@ -16,16 +16,10 @@ import ( // what the code in mem_bsd.go, mem_darwin.go, and mem_linux.go expects. // See the uses of ENOMEM in sysMap in those files. func TestMmapErrorSign(t *testing.T) { - p := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) + p, err := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) - // The runtime.mmap function is nosplit, but t.Errorf is not. - // Reset the pointer so that we don't get an "invalid stack - // pointer" error from t.Errorf if we call it. - v := uintptr(p) - p = nil - - if v != runtime.ENOMEM { - t.Errorf("mmap = %v, want %v", v, runtime.ENOMEM) + if p != nil || err != runtime.ENOMEM { + t.Errorf("mmap = %v, %v, want nil, %v", p, err, runtime.ENOMEM) } } @@ -35,20 +29,20 @@ func TestPhysPageSize(t *testing.T) { ps := runtime.GetPhysPageSize() // Get a region of memory to play with. This should be page-aligned. - b := uintptr(runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)) - if b < 4096 { - t.Fatalf("Mmap: %v", b) + b, err := runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0) + if err != 0 { + t.Fatalf("Mmap: %v", err) } // Mmap should fail at a half page into the buffer. - err := uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)) - if err >= 4096 { + _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0) + if err == 0 { t.Errorf("Mmap should have failed with half-page alignment %d, but succeeded: %v", ps/2, err) } // Mmap should succeed at a full page into the buffer. - err = uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0)) - if err < 4096 { + _, err = runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0) + if err != 0 { t.Errorf("Mmap at full-page alignment %d failed: %v", ps, err) } } diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go index e9bc256712..d5b6b3ac3c 100644 --- a/src/runtime/runtime_test.go +++ b/src/runtime/runtime_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "flag" "io" . "runtime" "runtime/debug" @@ -13,6 +14,8 @@ import ( "unsafe" ) +var flagQuick = flag.Bool("quick", false, "skip slow tests, for second run in all.bash") + func init() { // We're testing the runtime, so make tracebacks show things // in the runtime. This only raises the level, so it won't @@ -196,9 +199,9 @@ func eqstring_generic(s1, s2 string) bool { } func TestEqString(t *testing.T) { - // This isn't really an exhaustive test of eqstring, it's + // This isn't really an exhaustive test of == on strings, it's // just a convenient way of documenting (via eqstring_generic) - // what eqstring does. + // what == does. s := []string{ "", "a", @@ -213,7 +216,7 @@ func TestEqString(t *testing.T) { x := s1 == s2 y := eqstring_generic(s1, s2) if x != y { - t.Errorf(`eqstring("%s","%s") = %t, want %t`, s1, s2, x, y) + t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y) } } } diff --git a/src/runtime/rwmutex_test.go b/src/runtime/rwmutex_test.go index a69eca1511..872b3b098e 100644 --- a/src/runtime/rwmutex_test.go +++ b/src/runtime/rwmutex_test.go @@ -12,6 +12,7 @@ package runtime_test import ( "fmt" . "runtime" + "runtime/debug" "sync/atomic" "testing" ) @@ -47,6 +48,10 @@ func doTestParallelReaders(numReaders int) { func TestParallelRWMutexReaders(t *testing.T) { defer GOMAXPROCS(GOMAXPROCS(-1)) + // If runtime triggers a forced GC during this test then it will deadlock, + // since the goroutines can't be stopped/preempted. + // Disable GC for this test (see issue #10958). + defer debug.SetGCPercent(debug.SetGCPercent(-1)) doTestParallelReaders(1) doTestParallelReaders(3) doTestParallelReaders(4) diff --git a/src/runtime/select.go b/src/runtime/select.go index 715cee8750..b59c096928 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -73,7 +73,7 @@ func newselect(sel *hselect, selsize int64, size int32) { } func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectsend: too many cases") @@ -94,7 +94,7 @@ func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) { } func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectrecv: too many cases") @@ -116,7 +116,7 @@ func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) { } func selectdefault(sel *hselect) { - pc := getcallerpc(unsafe.Pointer(&sel)) + pc := getcallerpc() i := sel.ncase if i >= sel.tcase { throw("selectdefault: too many cases") @@ -286,7 +286,6 @@ func selectgo(sel *hselect) int { var ( gp *g - done uint32 sg *sudog c *hchan k *scase @@ -353,7 +352,6 @@ loop: // pass 2 - enqueue on all chans gp = getg() - done = 0 if gp.waiting != nil { throw("gp.waiting != nil") } @@ -367,8 +365,7 @@ loop: c = cas.c sg := acquireSudog() sg.g = gp - // Note: selectdone is adjusted for stack copies in stack1.go:adjustsudogs - sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done))) + sg.isSelect = true // No stack splits between assigning elem and enqueuing // sg on gp.waiting where copystack can find it. sg.elem = cas.elem @@ -394,62 +391,9 @@ loop: gp.param = nil gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1) - // While we were asleep, some goroutine came along and completed - // one of the cases in the select and woke us up (called ready). - // As part of that process, the goroutine did a cas on done above - // (aka *sg.selectdone for all queued sg) to win the right to - // complete the select. Now done = 1. - // - // If we copy (grow) our own stack, we will update the - // selectdone pointers inside the gp.waiting sudog list to point - // at the new stack. Another goroutine attempting to - // complete one of our (still linked in) select cases might - // see the new selectdone pointer (pointing at the new stack) - // before the new stack has real data; if the new stack has done = 0 - // (before the old values are copied over), the goroutine might - // do a cas via sg.selectdone and incorrectly believe that it has - // won the right to complete the select, executing a second - // communication and attempting to wake us (call ready) again. - // - // Then things break. - // - // The best break is that the goroutine doing ready sees the - // _Gcopystack status and throws, as in #17007. - // A worse break would be for us to continue on, start running real code, - // block in a semaphore acquisition (sema.go), and have the other - // goroutine wake us up without having really acquired the semaphore. - // That would result in the goroutine spuriously running and then - // queue up another spurious wakeup when the semaphore really is ready. - // In general the situation can cascade until something notices the - // problem and causes a crash. - // - // A stack shrink does not have this problem, because it locks - // all the channels that are involved first, blocking out the - // possibility of a cas on selectdone. - // - // A stack growth before gopark above does not have this - // problem, because we hold those channel locks (released by - // selparkcommit). - // - // A stack growth after sellock below does not have this - // problem, because again we hold those channel locks. - // - // The only problem is a stack growth during sellock. - // To keep that from happening, run sellock on the system stack. - // - // It might be that we could avoid this if copystack copied the - // stack before calling adjustsudogs. In that case, - // syncadjustsudogs would need to recopy the tiny part that - // it copies today, resulting in a little bit of extra copying. - // - // An even better fix, not for the week before a release candidate, - // would be to put space in every sudog and make selectdone - // point at (say) the space in the first sudog. - - systemstack(func() { - sellock(scases, lockorder) - }) + sellock(scases, lockorder) + gp.selectDone = 0 sg = (*sudog)(gp.param) gp.param = nil @@ -462,7 +406,7 @@ loop: sglist = gp.waiting // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { - sg1.selectdone = nil + sg1.isSelect = false sg1.elem = nil sg1.c = nil } @@ -513,10 +457,8 @@ loop: print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n") } - if cas.kind == caseRecv { - if cas.receivedp != nil { - *cas.receivedp = true - } + if cas.kind == caseRecv && cas.receivedp != nil { + *cas.receivedp = true } if raceenabled { diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 8715e07d7a..d5ea14d46d 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -275,7 +275,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { // on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket. // https://en.wikipedia.org/wiki/Treap // http://faculty.washington.edu/aragon/pubs/rst89.pdf - s.ticket = fastrand() + // + // s.ticket compared with zero in couple of places, therefore set lowest bit. + // It will not affect treap's quality noticeably. + s.ticket = fastrand() | 1 s.parent = last *pt = s diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go index 0c5481a2ef..8090fb22a5 100644 --- a/src/runtime/signal_darwin.go +++ b/src/runtime/signal_darwin.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -26,20 +21,20 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, - /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, + /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, - /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, + /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, /* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, } diff --git a/src/runtime/signal_darwin_arm.go b/src/runtime/signal_darwin_arm.go index c88b90c5e2..9a5d3ac5bb 100644 --- a/src/runtime/signal_darwin_arm.go +++ b/src/runtime/signal_darwin_arm.go @@ -36,7 +36,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().lr } func (c *sigctxt) pc() uint32 { return c.regs().pc } func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr } -func (c *sigctxt) fault() uint32 { return c.info.si_addr } +func (c *sigctxt) fault() uintptr { return uintptr(c.info.si_addr) } func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) } func (c *sigctxt) trap() uint32 { return 0 } func (c *sigctxt) error() uint32 { return 0 } diff --git a/src/runtime/signal_darwin_arm64.go b/src/runtime/signal_darwin_arm64.go index b14b9f1e50..41b8fcaab9 100644 --- a/src/runtime/signal_darwin_arm64.go +++ b/src/runtime/signal_darwin_arm64.go @@ -52,7 +52,7 @@ func (c *sigctxt) sp() uint64 { return c.regs().sp } //go:nowritebarrierrec func (c *sigctxt) pc() uint64 { return c.regs().pc } -func (c *sigctxt) fault() uint64 { return uint64(uintptr(unsafe.Pointer(c.info.si_addr))) } +func (c *sigctxt) fault() uintptr { return uintptr(unsafe.Pointer(c.info.si_addr)) } func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) } func (c *sigctxt) sigaddr() uint64 { return uint64(uintptr(unsafe.Pointer(c.info.si_addr))) } diff --git a/src/runtime/signal_dragonfly.go b/src/runtime/signal_dragonfly.go index 8e9ce17c86..f2b26e7179 100644 --- a/src/runtime/signal_dragonfly.go +++ b/src/runtime/signal_dragonfly.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -26,20 +21,20 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, - /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, + /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, - /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, + /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, /* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, /* 32 */ {_SigNotify, "SIGTHR: reserved"}, diff --git a/src/runtime/signal_freebsd.go b/src/runtime/signal_freebsd.go index 7ce7217e07..2812c69989 100644 --- a/src/runtime/signal_freebsd.go +++ b/src/runtime/signal_freebsd.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -26,20 +21,20 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, - /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, + /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, - /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, + /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, /* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, /* 32 */ {_SigNotify, "SIGTHR: reserved"}, diff --git a/src/runtime/signal_freebsd_arm.go b/src/runtime/signal_freebsd_arm.go index 9601370112..2135c1e752 100644 --- a/src/runtime/signal_freebsd_arm.go +++ b/src/runtime/signal_freebsd_arm.go @@ -36,7 +36,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().__gregs[14] } func (c *sigctxt) pc() uint32 { return c.regs().__gregs[15] } func (c *sigctxt) cpsr() uint32 { return c.regs().__gregs[16] } -func (c *sigctxt) fault() uint32 { return uint32(c.info.si_addr) } +func (c *sigctxt) fault() uintptr { return uintptr(c.info.si_addr) } func (c *sigctxt) trap() uint32 { return 0 } func (c *sigctxt) error() uint32 { return 0 } func (c *sigctxt) oldmask() uint32 { return 0 } diff --git a/src/runtime/signal_linux_arm.go b/src/runtime/signal_linux_arm.go index 06a57b83b9..876b505917 100644 --- a/src/runtime/signal_linux_arm.go +++ b/src/runtime/signal_linux_arm.go @@ -39,7 +39,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().lr } func (c *sigctxt) pc() uint32 { return c.regs().pc } func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr } -func (c *sigctxt) fault() uint32 { return c.regs().fault_address } +func (c *sigctxt) fault() uintptr { return uintptr(c.regs().fault_address) } func (c *sigctxt) trap() uint32 { return c.regs().trap_no } func (c *sigctxt) error() uint32 { return c.regs().error_code } func (c *sigctxt) oldmask() uint32 { return c.regs().oldmask } diff --git a/src/runtime/signal_linux_arm64.go b/src/runtime/signal_linux_arm64.go index f3d4d384e5..2075f253d7 100644 --- a/src/runtime/signal_linux_arm64.go +++ b/src/runtime/signal_linux_arm64.go @@ -56,7 +56,7 @@ func (c *sigctxt) sp() uint64 { return c.regs().sp } func (c *sigctxt) pc() uint64 { return c.regs().pc } func (c *sigctxt) pstate() uint64 { return c.regs().pstate } -func (c *sigctxt) fault() uint64 { return c.regs().fault_address } +func (c *sigctxt) fault() uintptr { return uintptr(c.regs().fault_address) } func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) } func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr } diff --git a/src/runtime/signal_linux_ppc64x.go b/src/runtime/signal_linux_ppc64x.go index b6831bc22d..97cb26d587 100644 --- a/src/runtime/signal_linux_ppc64x.go +++ b/src/runtime/signal_linux_ppc64x.go @@ -67,7 +67,7 @@ func (c *sigctxt) ccr() uint64 { return c.regs().ccr } func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) } func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr } -func (c *sigctxt) fault() uint64 { return c.regs().dar } +func (c *sigctxt) fault() uintptr { return uintptr(c.regs().dar) } func (c *sigctxt) set_r0(x uint64) { c.regs().gpr[0] = x } func (c *sigctxt) set_r12(x uint64) { c.regs().gpr[12] = x } diff --git a/src/runtime/signal_nacl.go b/src/runtime/signal_nacl.go index 47930757da..ad321d8b75 100644 --- a/src/runtime/signal_nacl.go +++ b/src/runtime/signal_nacl.go @@ -26,13 +26,13 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, diff --git a/src/runtime/signal_nacl_arm.go b/src/runtime/signal_nacl_arm.go index 959dbfbab1..b8312324ac 100644 --- a/src/runtime/signal_nacl_arm.go +++ b/src/runtime/signal_nacl_arm.go @@ -36,7 +36,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().lr } func (c *sigctxt) pc() uint32 { return c.regs().pc } func (c *sigctxt) cpsr() uint32 { return c.regs().cpsr } -func (c *sigctxt) fault() uint32 { return ^uint32(0) } +func (c *sigctxt) fault() uintptr { return ^uintptr(0) } func (c *sigctxt) trap() uint32 { return ^uint32(0) } func (c *sigctxt) error() uint32 { return ^uint32(0) } func (c *sigctxt) oldmask() uint32 { return ^uint32(0) } diff --git a/src/runtime/signal_netbsd.go b/src/runtime/signal_netbsd.go index 30a3b8e1a9..ca510842e1 100644 --- a/src/runtime/signal_netbsd.go +++ b/src/runtime/signal_netbsd.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -26,20 +21,20 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, - /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, + /* 23 */ {_SigNotify + _SigIgn, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, - /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, + /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 29 */ {_SigNotify + _SigIgn, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, /* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, /* 32 */ {_SigNotify, "SIGTHR: reserved"}, diff --git a/src/runtime/signal_netbsd_arm.go b/src/runtime/signal_netbsd_arm.go index 64cfffae5a..fdb30785d9 100644 --- a/src/runtime/signal_netbsd_arm.go +++ b/src/runtime/signal_netbsd_arm.go @@ -36,7 +36,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().__gregs[_REG_R14] } func (c *sigctxt) pc() uint32 { return c.regs().__gregs[_REG_R15] } func (c *sigctxt) cpsr() uint32 { return c.regs().__gregs[_REG_CPSR] } -func (c *sigctxt) fault() uint32 { return uint32(c.info._reason) } +func (c *sigctxt) fault() uintptr { return uintptr(c.info._reason) } func (c *sigctxt) trap() uint32 { return 0 } func (c *sigctxt) error() uint32 { return 0 } func (c *sigctxt) oldmask() uint32 { return 0 } diff --git a/src/runtime/signal_openbsd.go b/src/runtime/signal_openbsd.go index 30a3b8e1a9..99c601ce58 100644 --- a/src/runtime/signal_openbsd.go +++ b/src/runtime/signal_openbsd.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -26,13 +21,13 @@ var sigtable = [...]sigTabT{ /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 16 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 17 */ {0, "SIGSTOP: stop"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 19 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue after stop"}, + /* 20 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, diff --git a/src/runtime/signal_openbsd_arm.go b/src/runtime/signal_openbsd_arm.go index 66aea934f1..97bb13b4f3 100644 --- a/src/runtime/signal_openbsd_arm.go +++ b/src/runtime/signal_openbsd_arm.go @@ -38,7 +38,7 @@ func (c *sigctxt) lr() uint32 { return c.regs().sc_usr_lr } func (c *sigctxt) pc() uint32 { return c.regs().sc_pc } func (c *sigctxt) cpsr() uint32 { return c.regs().sc_spsr } -func (c *sigctxt) fault() uint32 { return c.sigaddr() } +func (c *sigctxt) fault() uintptr { return uintptr(c.sigaddr()) } func (c *sigctxt) trap() uint32 { return 0 } func (c *sigctxt) error() uint32 { return 0 } func (c *sigctxt) oldmask() uint32 { return 0 } diff --git a/src/runtime/signal_sighandler.go b/src/runtime/signal_sighandler.go index b2e15a6539..f24a117fcd 100644 --- a/src/runtime/signal_sighandler.go +++ b/src/runtime/signal_sighandler.go @@ -88,9 +88,9 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { } print("PC=", hex(c.sigpc()), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n") - if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { + if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 { print("signal arrived during cgo execution\n") - gp = _g_.m.lockedg + gp = _g_.m.lockedg.ptr() } print("\n") @@ -111,7 +111,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { if docrash { crashing++ - if crashing < sched.mcount-int32(extraMCount) { + if crashing < mcount()-int32(extraMCount) { // There are other m's that need to dump their stacks. // Relay SIGQUIT to the next m by sending it to the current process. // All m's that have already received SIGQUIT have signal masks blocking diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go index c931c222d6..a8eeeee129 100644 --- a/src/runtime/signal_solaris.go +++ b/src/runtime/signal_solaris.go @@ -4,11 +4,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: hangup"}, @@ -28,16 +23,16 @@ var sigtable = [...]sigTabT{ /* 15 */ {_SigNotify + _SigKill, "SIGTERM: software termination signal from kill"}, /* 16 */ {_SigNotify, "SIGUSR1: user defined signal 1"}, /* 17 */ {_SigNotify, "SIGUSR2: user defined signal 2"}, - /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status change alias (POSIX)"}, + /* 18 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status change alias (POSIX)"}, /* 19 */ {_SigNotify, "SIGPWR: power-fail restart"}, - /* 20 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 21 */ {_SigNotify, "SIGURG: urgent socket condition"}, + /* 20 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 21 */ {_SigNotify + _SigIgn, "SIGURG: urgent socket condition"}, /* 22 */ {_SigNotify, "SIGPOLL: pollable event occurred"}, /* 23 */ {0, "SIGSTOP: stop (cannot be caught or ignored)"}, - /* 24 */ {_SigNotify + _SigDefault, "SIGTSTP: user stop requested from tty"}, - /* 25 */ {_SigNotify + _SigDefault, "SIGCONT: stopped process has been continued"}, - /* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background tty read attempted"}, - /* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background tty write attempted"}, + /* 24 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: user stop requested from tty"}, + /* 25 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: stopped process has been continued"}, + /* 26 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background tty read attempted"}, + /* 27 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background tty write attempted"}, /* 28 */ {_SigNotify, "SIGVTALRM: virtual timer expired"}, /* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling timer expired"}, /* 30 */ {_SigNotify, "SIGXCPU: exceeded cpu limit"}, diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index 539b165ba1..2cd3d71800 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -8,10 +8,19 @@ package runtime import ( "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) +// sigTabT is the type of an entry in the global sigtable array. +// sigtable is inherently system dependent, and appears in OS-specific files, +// but sigTabT is the same for all Unixy systems. +// The sigtable array is indexed by a system signal number to get the flags +// and printable name of each signal. +type sigTabT struct { + flags int32 + name string +} + //go:linkname os_sigpipe os.sigpipe func os_sigpipe() { systemstack(sigpipe) @@ -266,6 +275,12 @@ func sigpipe() { // sigtrampgo is called from the signal handler function, sigtramp, // written in assembly code. // This is called by the signal handler, and the world may be stopped. +// +// It must be nosplit because getg() is still the G that was running +// (if any) when the signal was delivered, but it's (usually) called +// on the gsignal stack. Until this switches the G to gsignal, the +// stack bounds check won't work. +// //go:nosplit //go:nowritebarrierrec func sigtrampgo(sig uint32, info *siginfo, ctx unsafe.Pointer) { @@ -395,8 +410,9 @@ func sigpanic() { //go:nosplit //go:nowritebarrierrec func dieFromSignal(sig uint32) { - setsig(sig, _SIG_DFL) unblocksig(sig) + // Mark the signal as unhandled to ensure it is forwarded. + atomic.Store(&handlingSig[sig], 0) raise(sig) // That should have killed us. On some systems, though, raise @@ -408,6 +424,14 @@ func dieFromSignal(sig uint32) { osyield() osyield() + // If that didn't work, try _SIG_DFL. + setsig(sig, _SIG_DFL) + raise(sig) + + osyield() + osyield() + osyield() + // If we are still somehow running, just exit with the wrong status. exit(2) } @@ -474,7 +498,7 @@ func crash() { // this means the OS X core file will be >128 GB and even on a zippy // workstation can take OS X well over an hour to write (uninterruptible). // Save users from making that mistake. - if sys.PtrSize == 8 { + if GOARCH == "amd64" { return } } @@ -502,7 +526,7 @@ func ensureSigM() { // mask accordingly. sigBlocked := sigset_all for i := range sigtable { - if sigtable[i].flags&_SigUnblock != 0 { + if !blockableSig(uint32(i)) { sigdelset(&sigBlocked, i) } } @@ -514,7 +538,7 @@ func ensureSigM() { sigdelset(&sigBlocked, int(sig)) } case sig := <-disableSigChan: - if sig > 0 { + if sig > 0 && blockableSig(sig) { sigaddset(&sigBlocked, int(sig)) } } @@ -578,17 +602,23 @@ func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool { return false } fwdFn := atomic.Loaduintptr(&fwdSig[sig]) + flags := sigtable[sig].flags - if !signalsOK { - // The only way we can get here is if we are in a - // library or archive, we installed a signal handler - // at program startup, but the Go runtime has not yet - // been initialized. + // If we aren't handling the signal, forward it. + if atomic.Load(&handlingSig[sig]) == 0 || !signalsOK { + // If the signal is ignored, doing nothing is the same as forwarding. + if fwdFn == _SIG_IGN || (fwdFn == _SIG_DFL && flags&_SigIgn != 0) { + return true + } + // We are not handling the signal and there is no other handler to forward to. + // Crash with the default behavior. if fwdFn == _SIG_DFL { + setsig(sig, _SIG_DFL) dieFromSignal(sig) - } else { - sigfwd(fwdFn, sig, info, ctx) + return false } + + sigfwd(fwdFn, sig, info, ctx) return true } @@ -597,18 +627,6 @@ func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool { return false } - // If we aren't handling the signal, forward it. - // Really if we aren't handling the signal, we shouldn't get here, - // but on Darwin setsigstack can lead us here because it sets - // the sa_tramp field. The sa_tramp field is not returned by - // sigaction, so the fix for that is non-obvious. - if atomic.Load(&handlingSig[sig]) == 0 { - sigfwd(fwdFn, sig, info, ctx) - return true - } - - flags := sigtable[sig].flags - c := &sigctxt{info, ctx} // Only forward synchronous signals and SIGPIPE. // Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code @@ -702,7 +720,7 @@ func minitSignalStack() { signalstack(&_g_.m.gsignal.stack) _g_.m.newSigstack = true } else { - setGsignalStack(&st, nil) + setGsignalStack(&st, &_g_.m.goSigStack) _g_.m.newSigstack = false } } @@ -718,7 +736,7 @@ func minitSignalStack() { func minitSignalMask() { nmask := getg().m.sigmask for i := range sigtable { - if sigtable[i].flags&_SigUnblock != 0 { + if !blockableSig(uint32(i)) { sigdelset(&nmask, i) } } @@ -732,7 +750,34 @@ func unminitSignals() { if getg().m.newSigstack { st := stackt{ss_flags: _SS_DISABLE} sigaltstack(&st, nil) + } else { + // We got the signal stack from someone else. Restore + // the Go-allocated stack in case this M gets reused + // for another thread (e.g., it's an extram). Also, on + // Android, libc allocates a signal stack for all + // threads, so it's important to restore the Go stack + // even on Go-created threads so we can free it. + restoreGsignalStack(&getg().m.goSigStack) + } +} + +// blockableSig returns whether sig may be blocked by the signal mask. +// We never want to block the signals marked _SigUnblock; +// these are the synchronous signals that turn into a Go panic. +// In a Go program--not a c-archive/c-shared--we never want to block +// the signals marked _SigKill or _SigThrow, as otherwise it's possible +// for all running threads to block them and delay their delivery until +// we start a new thread. When linked into a C program we let the C code +// decide on the disposition of those signals. +func blockableSig(sig uint32) bool { + flags := sigtable[sig].flags + if flags&_SigUnblock != 0 { + return false + } + if isarchive || islibrary { + return true } + return flags&(_SigKill|_SigThrow) == 0 } // gsignalStack saves the fields of the gsignal stack changed by diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go index 73bd5b5cfc..7d230517f6 100644 --- a/src/runtime/signal_windows.go +++ b/src/runtime/signal_windows.go @@ -126,11 +126,11 @@ func lastcontinuehandler(info *exceptionrecord, r *context, gp *g) int32 { print("Exception ", hex(info.exceptioncode), " ", hex(info.exceptioninformation[0]), " ", hex(info.exceptioninformation[1]), " ", hex(r.ip()), "\n") print("PC=", hex(r.ip()), "\n") - if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { + if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 { if iscgo { print("signal arrived during external code execution\n") } - gp = _g_.m.lockedg + gp = _g_.m.lockedg.ptr() } print("\n") @@ -223,3 +223,6 @@ func crash() { // It's okay to leave this empty for now: if crash returns // the ordinary exit-after-panic happens. } + +// gsignalStack is unused on Windows. +type gsignalStack struct{} diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go index 236bb29929..98331627eb 100644 --- a/src/runtime/sigqueue.go +++ b/src/runtime/sigqueue.go @@ -45,13 +45,14 @@ import ( // as there is no connection between handling a signal and receiving one, // but atomic instructions should minimize it. var sig struct { - note note - mask [(_NSIG + 31) / 32]uint32 - wanted [(_NSIG + 31) / 32]uint32 - ignored [(_NSIG + 31) / 32]uint32 - recv [(_NSIG + 31) / 32]uint32 - state uint32 - inuse bool + note note + mask [(_NSIG + 31) / 32]uint32 + wanted [(_NSIG + 31) / 32]uint32 + ignored [(_NSIG + 31) / 32]uint32 + recv [(_NSIG + 31) / 32]uint32 + state uint32 + delivering uint32 + inuse bool } const ( @@ -60,15 +61,20 @@ const ( sigSending ) -// Called from sighandler to send a signal back out of the signal handling thread. -// Reports whether the signal was sent. If not, the caller typically crashes the program. +// sigsend delivers a signal from sighandler to the internal signal delivery queue. +// It reports whether the signal was sent. If not, the caller typically crashes the program. +// It runs from the signal handler, so it's limited in what it can do. func sigsend(s uint32) bool { bit := uint32(1) << uint(s&31) if !sig.inuse || s >= uint32(32*len(sig.wanted)) { return false } + atomic.Xadd(&sig.delivering, 1) + // We are running in the signal handler; defer is not available. + if w := atomic.Load(&sig.wanted[s/32]); w&bit == 0 { + atomic.Xadd(&sig.delivering, -1) return false } @@ -76,6 +82,7 @@ func sigsend(s uint32) bool { for { mask := sig.mask[s/32] if mask&bit != 0 { + atomic.Xadd(&sig.delivering, -1) return true // signal already in queue } if atomic.Cas(&sig.mask[s/32], mask, mask|bit) { @@ -104,6 +111,7 @@ Send: } } + atomic.Xadd(&sig.delivering, -1) return true } @@ -155,6 +163,15 @@ func signal_recv() uint32 { // by the os/signal package. //go:linkname signalWaitUntilIdle os/signal.signalWaitUntilIdle func signalWaitUntilIdle() { + // Although the signals we care about have been removed from + // sig.wanted, it is possible that another thread has received + // a signal, has read from sig.wanted, is now updating sig.mask, + // and has not yet woken up the processor thread. We need to wait + // until all current signal deliveries have completed. + for atomic.Load(&sig.delivering) != 0 { + Gosched() + } + // Although WaitUntilIdle seems like the right name for this // function, the state we are looking for is sigReceiving, not // sigIdle. The sigIdle state is really more like sigProcessing. diff --git a/src/runtime/sigtab_linux_generic.go b/src/runtime/sigtab_linux_generic.go index 874148e1d2..b26040b803 100644 --- a/src/runtime/sigtab_linux_generic.go +++ b/src/runtime/sigtab_linux_generic.go @@ -10,11 +10,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -33,18 +28,18 @@ var sigtable = [...]sigTabT{ /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, /* 16 */ {_SigThrow + _SigUnblock, "SIGSTKFLT: stack fault"}, - /* 17 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, - /* 18 */ {_SigNotify + _SigDefault, "SIGCONT: continue"}, + /* 17 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, + /* 18 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue"}, /* 19 */ {0, "SIGSTOP: stop, unblockable"}, - /* 20 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, - /* 23 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 20 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 21 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 22 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, + /* 23 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, - /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, + /* 28 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, /* 29 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 30 */ {_SigNotify, "SIGPWR: power failure restart"}, /* 31 */ {_SigThrow, "SIGSYS: bad system call"}, diff --git a/src/runtime/sigtab_linux_mipsx.go b/src/runtime/sigtab_linux_mipsx.go index 8d9fb06704..81dd2314c5 100644 --- a/src/runtime/sigtab_linux_mipsx.go +++ b/src/runtime/sigtab_linux_mipsx.go @@ -7,11 +7,6 @@ package runtime -type sigTabT struct { - flags int32 - name string -} - var sigtable = [...]sigTabT{ /* 0 */ {0, "SIGNONE: no trap"}, /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, @@ -31,16 +26,16 @@ var sigtable = [...]sigTabT{ /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, /* 16 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, /* 17 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, - /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, + /* 18 */ {_SigNotify + _SigUnblock + _SigIgn, "SIGCHLD: child status has changed"}, /* 19 */ {_SigNotify, "SIGPWR: power failure restart"}, - /* 20 */ {_SigNotify, "SIGWINCH: window size change"}, - /* 21 */ {_SigNotify, "SIGURG: urgent condition on socket"}, + /* 20 */ {_SigNotify + _SigIgn, "SIGWINCH: window size change"}, + /* 21 */ {_SigNotify + _SigIgn, "SIGURG: urgent condition on socket"}, /* 22 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 23 */ {0, "SIGSTOP: stop, unblockable"}, - /* 24 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, - /* 25 */ {_SigNotify + _SigDefault, "SIGCONT: continue"}, - /* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, - /* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, + /* 24 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTSTP: keyboard stop"}, + /* 25 */ {_SigNotify + _SigDefault + _SigIgn, "SIGCONT: continue"}, + /* 26 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTIN: background read from tty"}, + /* 27 */ {_SigNotify + _SigDefault + _SigIgn, "SIGTTOU: background write to tty"}, /* 28 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, /* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, /* 30 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, diff --git a/src/runtime/sizeclasses.go b/src/runtime/sizeclasses.go index 5366564afd..9e17b001d3 100644 --- a/src/runtime/sizeclasses.go +++ b/src/runtime/sizeclasses.go @@ -3,73 +3,73 @@ package runtime -// class bytes/obj bytes/span objects waste bytes -// 1 8 8192 1024 0 -// 2 16 8192 512 0 -// 3 32 8192 256 0 -// 4 48 8192 170 32 -// 5 64 8192 128 0 -// 6 80 8192 102 32 -// 7 96 8192 85 32 -// 8 112 8192 73 16 -// 9 128 8192 64 0 -// 10 144 8192 56 128 -// 11 160 8192 51 32 -// 12 176 8192 46 96 -// 13 192 8192 42 128 -// 14 208 8192 39 80 -// 15 224 8192 36 128 -// 16 240 8192 34 32 -// 17 256 8192 32 0 -// 18 288 8192 28 128 -// 19 320 8192 25 192 -// 20 352 8192 23 96 -// 21 384 8192 21 128 -// 22 416 8192 19 288 -// 23 448 8192 18 128 -// 24 480 8192 17 32 -// 25 512 8192 16 0 -// 26 576 8192 14 128 -// 27 640 8192 12 512 -// 28 704 8192 11 448 -// 29 768 8192 10 512 -// 30 896 8192 9 128 -// 31 1024 8192 8 0 -// 32 1152 8192 7 128 -// 33 1280 8192 6 512 -// 34 1408 16384 11 896 -// 35 1536 8192 5 512 -// 36 1792 16384 9 256 -// 37 2048 8192 4 0 -// 38 2304 16384 7 256 -// 39 2688 8192 3 128 -// 40 3072 24576 8 0 -// 41 3200 16384 5 384 -// 42 3456 24576 7 384 -// 43 4096 8192 2 0 -// 44 4864 24576 5 256 -// 45 5376 16384 3 256 -// 46 6144 24576 4 0 -// 47 6528 32768 5 128 -// 48 6784 40960 6 256 -// 49 6912 49152 7 768 -// 50 8192 8192 1 0 -// 51 9472 57344 6 512 -// 52 9728 49152 5 512 -// 53 10240 40960 4 0 -// 54 10880 32768 3 128 -// 55 12288 24576 2 0 -// 56 13568 40960 3 256 -// 57 14336 57344 4 0 -// 58 16384 16384 1 0 -// 59 18432 73728 4 0 -// 60 19072 57344 3 128 -// 61 20480 40960 2 0 -// 62 21760 65536 3 256 -// 63 24576 24576 1 0 -// 64 27264 81920 3 128 -// 65 28672 57344 2 0 -// 66 32768 32768 1 0 +// class bytes/obj bytes/span objects tail waste max waste +// 1 8 8192 1024 0 87.50% +// 2 16 8192 512 0 43.75% +// 3 32 8192 256 0 46.88% +// 4 48 8192 170 32 31.52% +// 5 64 8192 128 0 23.44% +// 6 80 8192 102 32 19.07% +// 7 96 8192 85 32 15.95% +// 8 112 8192 73 16 13.56% +// 9 128 8192 64 0 11.72% +// 10 144 8192 56 128 11.82% +// 11 160 8192 51 32 9.73% +// 12 176 8192 46 96 9.59% +// 13 192 8192 42 128 9.25% +// 14 208 8192 39 80 8.12% +// 15 224 8192 36 128 8.15% +// 16 240 8192 34 32 6.62% +// 17 256 8192 32 0 5.86% +// 18 288 8192 28 128 12.16% +// 19 320 8192 25 192 11.80% +// 20 352 8192 23 96 9.88% +// 21 384 8192 21 128 9.51% +// 22 416 8192 19 288 10.71% +// 23 448 8192 18 128 8.37% +// 24 480 8192 17 32 6.82% +// 25 512 8192 16 0 6.05% +// 26 576 8192 14 128 12.33% +// 27 640 8192 12 512 15.48% +// 28 704 8192 11 448 13.93% +// 29 768 8192 10 512 13.94% +// 30 896 8192 9 128 15.52% +// 31 1024 8192 8 0 12.40% +// 32 1152 8192 7 128 12.41% +// 33 1280 8192 6 512 15.55% +// 34 1408 16384 11 896 14.00% +// 35 1536 8192 5 512 14.00% +// 36 1792 16384 9 256 15.57% +// 37 2048 8192 4 0 12.45% +// 38 2304 16384 7 256 12.46% +// 39 2688 8192 3 128 15.59% +// 40 3072 24576 8 0 12.47% +// 41 3200 16384 5 384 6.22% +// 42 3456 24576 7 384 8.83% +// 43 4096 8192 2 0 15.60% +// 44 4864 24576 5 256 16.65% +// 45 5376 16384 3 256 10.92% +// 46 6144 24576 4 0 12.48% +// 47 6528 32768 5 128 6.23% +// 48 6784 40960 6 256 4.36% +// 49 6912 49152 7 768 3.37% +// 50 8192 8192 1 0 15.61% +// 51 9472 57344 6 512 14.28% +// 52 9728 49152 5 512 3.64% +// 53 10240 40960 4 0 4.99% +// 54 10880 32768 3 128 6.24% +// 55 12288 24576 2 0 11.45% +// 56 13568 40960 3 256 9.99% +// 57 14336 57344 4 0 5.35% +// 58 16384 16384 1 0 12.49% +// 59 18432 73728 4 0 11.11% +// 60 19072 57344 3 128 3.57% +// 61 20480 40960 2 0 6.87% +// 62 21760 65536 3 256 6.25% +// 63 24576 24576 1 0 11.45% +// 64 27264 81920 3 128 10.00% +// 65 28672 57344 2 0 4.91% +// 66 32768 32768 1 0 12.50% const ( _MaxSmallSize = 32768 diff --git a/src/runtime/slice.go b/src/runtime/slice.go index 0f49df1647..351fec067d 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -14,6 +14,13 @@ type slice struct { cap int } +// An notInHeapSlice is a slice backed by go:notinheap memory. +type notInHeapSlice struct { + array *notInHeap + len int + cap int +} + // maxElems is a lookup table containing the maximum capacity for a slice. // The index is the size of the slice element. var maxElems = [...]uintptr{ @@ -81,7 +88,7 @@ func makeslice64(et *_type, len64, cap64 int64) slice { // The SSA backend might prefer the new length or to return only ptr/cap and save stack space. func growslice(et *_type, old slice, cap int) slice { if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&et)) + callerpc := getcallerpc() racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice)) } if msanenabled { @@ -105,12 +112,20 @@ func growslice(et *_type, old slice, cap int) slice { if old.len < 1024 { newcap = doublecap } else { - for newcap < cap { + // Check 0 < newcap to detect overflow + // and prevent an infinite loop. + for 0 < newcap && newcap < cap { newcap += newcap / 4 } + // Set newcap to the requested cap when + // the newcap calculation overflowed. + if newcap <= 0 { + newcap = cap + } } } + var overflow bool var lenmem, newlenmem, capmem uintptr const ptrSize = unsafe.Sizeof((*byte)(nil)) switch et.size { @@ -118,20 +133,37 @@ func growslice(et *_type, old slice, cap int) slice { lenmem = uintptr(old.len) newlenmem = uintptr(cap) capmem = roundupsize(uintptr(newcap)) + overflow = uintptr(newcap) > _MaxMem newcap = int(capmem) case ptrSize: lenmem = uintptr(old.len) * ptrSize newlenmem = uintptr(cap) * ptrSize capmem = roundupsize(uintptr(newcap) * ptrSize) + overflow = uintptr(newcap) > _MaxMem/ptrSize newcap = int(capmem / ptrSize) default: lenmem = uintptr(old.len) * et.size newlenmem = uintptr(cap) * et.size capmem = roundupsize(uintptr(newcap) * et.size) + overflow = uintptr(newcap) > maxSliceCap(et.size) newcap = int(capmem / et.size) } - if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) { + // The check of overflow (uintptr(newcap) > maxSliceCap(et.size)) + // in addition to capmem > _MaxMem is needed to prevent an overflow + // which can be used to trigger a segfault on 32bit architectures + // with this example program: + // + // type T [1<<27 + 1]int64 + // + // var d T + // var s []T + // + // func main() { + // s = append(s, d, d, d, d) + // print(len(s), "\n") + // } + if cap < old.cap || overflow || capmem > _MaxMem { panic(errorString("growslice: cap out of range")) } @@ -172,7 +204,7 @@ func slicecopy(to, fm slice, width uintptr) int { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&to)) + callerpc := getcallerpc() pc := funcPC(slicecopy) racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc) racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc) @@ -203,7 +235,7 @@ func slicestringcopy(to []byte, fm string) int { } if raceenabled { - callerpc := getcallerpc(unsafe.Pointer(&to)) + callerpc := getcallerpc() pc := funcPC(slicestringcopy) racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc) } diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go index 1678e8f9f1..8fde0feddc 100644 --- a/src/runtime/softfloat64.go +++ b/src/runtime/softfloat64.go @@ -483,3 +483,115 @@ again2: return q1*b + q0, (un21*b + un0 - q0*v) >> s } + +func fadd32(x, y uint32) uint32 { + return f64to32(fadd64(f32to64(x), f32to64(y))) +} + +func fmul32(x, y uint32) uint32 { + return f64to32(fmul64(f32to64(x), f32to64(y))) +} + +func fdiv32(x, y uint32) uint32 { + return f64to32(fdiv64(f32to64(x), f32to64(y))) +} + +func feq32(x, y uint32) bool { + cmp, nan := fcmp64(f32to64(x), f32to64(y)) + return cmp == 0 && !nan +} + +func fgt32(x, y uint32) bool { + cmp, nan := fcmp64(f32to64(x), f32to64(y)) + return cmp >= 1 && !nan +} + +func fge32(x, y uint32) bool { + cmp, nan := fcmp64(f32to64(x), f32to64(y)) + return cmp >= 0 && !nan +} + +func feq64(x, y uint64) bool { + cmp, nan := fcmp64(x, y) + return cmp == 0 && !nan +} + +func fgt64(x, y uint64) bool { + cmp, nan := fcmp64(x, y) + return cmp >= 1 && !nan +} + +func fge64(x, y uint64) bool { + cmp, nan := fcmp64(x, y) + return cmp >= 0 && !nan +} + +func fint32to32(x int32) uint32 { + return f64to32(fintto64(int64(x))) +} + +func fint32to64(x int32) uint64 { + return fintto64(int64(x)) +} + +func fint64to32(x int64) uint32 { + return f64to32(fintto64(x)) +} + +func fint64to64(x int64) uint64 { + return fintto64(x) +} + +func f32toint32(x uint32) int32 { + val, _ := f64toint(f32to64(x)) + return int32(val) +} + +func f32toint64(x uint32) int64 { + val, _ := f64toint(f32to64(x)) + return val +} + +func f64toint32(x uint64) int32 { + val, _ := f64toint(x) + return int32(val) +} + +func f64toint64(x uint64) int64 { + val, _ := f64toint(x) + return val +} + +func f64touint64(x float64) uint64 { + if x < float64(1<<63) { + return uint64(int64(x)) + } + y := x - float64(1<<63) + z := uint64(int64(y)) + return z | (1 << 63) +} + +func f32touint64(x float32) uint64 { + if x < float32(1<<63) { + return uint64(int64(x)) + } + y := x - float32(1<<63) + z := uint64(int64(y)) + return z | (1 << 63) +} + +func fuint64to64(x uint64) float64 { + if int64(x) >= 0 { + return float64(int64(x)) + } + // See ../cmd/compile/internal/gc/ssa.go:uint64Tofloat + y := x & 1 + z := x >> 1 + z = z | y + r := float64(int64(z)) + return r + r +} + +func fuint64to32(x uint64) float32 { + return float32(fuint64to64(x)) +} diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 525d0b14c1..eb0716c18d 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -578,29 +578,30 @@ func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f if stackDebug >= 4 { print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n") } - if ptrbit(&bv, i) == 1 { - pp := (*uintptr)(add(scanp, i*sys.PtrSize)) - retry: - p := *pp - if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 { - // Looks like a junk value in a pointer slot. - // Live analysis wrong? - getg().m.traceback = 2 - print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n") - throw("invalid pointer found on stack") + if ptrbit(&bv, i) != 1 { + continue + } + pp := (*uintptr)(add(scanp, i*sys.PtrSize)) + retry: + p := *pp + if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 { + // Looks like a junk value in a pointer slot. + // Live analysis wrong? + getg().m.traceback = 2 + print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n") + throw("invalid pointer found on stack") + } + if minp <= p && p < maxp { + if stackDebug >= 3 { + print("adjust ptr ", hex(p), " ", funcname(f), "\n") } - if minp <= p && p < maxp { - if stackDebug >= 3 { - print("adjust ptr ", hex(p), " ", funcname(f), "\n") - } - if useCAS { - ppu := (*unsafe.Pointer)(unsafe.Pointer(pp)) - if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) { - goto retry - } - } else { - *pp = p + delta + if useCAS { + ppu := (*unsafe.Pointer)(unsafe.Pointer(pp)) + if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) { + goto retry } + } else { + *pp = p + delta } } } @@ -751,7 +752,6 @@ func adjustsudogs(gp *g, adjinfo *adjustinfo) { // might be in the stack. for s := gp.waiting; s != nil; s = s.waitlink { adjustpointer(adjinfo, unsafe.Pointer(&s.elem)) - adjustpointer(adjinfo, unsafe.Pointer(&s.selectdone)) } } @@ -768,10 +768,6 @@ func findsghi(gp *g, stk stack) uintptr { if stk.lo <= p && p < stk.hi && p > sghi { sghi = p } - p = uintptr(unsafe.Pointer(sg.selectdone)) + unsafe.Sizeof(sg.selectdone) - if stk.lo <= p && p < stk.hi && p > sghi { - sghi = p - } } return sghi } @@ -917,9 +913,12 @@ func round2(x int32) int32 { // g->atomicstatus will be Grunning or Gscanrunning upon entry. // If the GC is trying to stop this g then it will set preemptscan to true. // -// ctxt is the value of the context register on morestack. newstack -// will write it to g.sched.ctxt. -func newstack(ctxt unsafe.Pointer) { +// This must be nowritebarrierrec because it can be called as part of +// stack growth from other nowritebarrierrec functions, but the +// compiler doesn't check this. +// +//go:nowritebarrierrec +func newstack() { thisg := getg() // TODO: double check all gp. shouldn't be getg(). if thisg.m.morebuf.g.ptr().stackguard0 == stackFork { @@ -933,9 +932,6 @@ func newstack(ctxt unsafe.Pointer) { } gp := thisg.m.curg - // Write ctxt to gp.sched. We do this here instead of in - // morestack so it has the necessary write barrier. - gp.sched.ctxt = ctxt if thisg.m.curg.throwsplit { // Update syscallsp, syscallpc in case traceback uses them. @@ -946,6 +942,7 @@ func newstack(ctxt unsafe.Pointer) { "\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n", "\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n") + thisg.m.traceback = 2 // Include runtime frames traceback(morebuf.pc, morebuf.sp, morebuf.lr, gp) throw("runtime: stack split at bad time") } diff --git a/src/runtime/stack_test.go b/src/runtime/stack_test.go index 25e8f77da4..0fed241704 100644 --- a/src/runtime/stack_test.go +++ b/src/runtime/stack_test.go @@ -5,6 +5,9 @@ package runtime_test import ( + "bytes" + "fmt" + "reflect" . "runtime" "strings" "sync" @@ -78,10 +81,13 @@ func TestStackGrowth(t *testing.T) { var wg sync.WaitGroup // in a normal goroutine + var growDuration time.Duration // For debugging failures wg.Add(1) go func() { defer wg.Done() - growStack() + start := time.Now() + growStack(nil) + growDuration = time.Since(start) }() wg.Wait() @@ -90,7 +96,7 @@ func TestStackGrowth(t *testing.T) { go func() { defer wg.Done() LockOSThread() - growStack() + growStack(nil) UnlockOSThread() }() wg.Wait() @@ -100,12 +106,14 @@ func TestStackGrowth(t *testing.T) { go func() { defer wg.Done() done := make(chan bool) - var started uint32 + var startTime time.Time + var started, progress uint32 go func() { s := new(string) SetFinalizer(s, func(ss *string) { + startTime = time.Now() atomic.StoreUint32(&started, 1) - growStack() + growStack(&progress) done <- true }) s = nil @@ -118,7 +126,10 @@ func TestStackGrowth(t *testing.T) { case <-time.After(20 * time.Second): if atomic.LoadUint32(&started) == 0 { t.Log("finalizer did not start") + } else { + t.Logf("finalizer started %s ago and finished %d iterations", time.Since(startTime), atomic.LoadUint32(&progress)) } + t.Log("first growStack took", growDuration) t.Error("finalizer did not run") return } @@ -131,7 +142,7 @@ func TestStackGrowth(t *testing.T) { // growStack() //} -func growStack() { +func growStack(progress *uint32) { n := 1 << 10 if testing.Short() { n = 1 << 8 @@ -142,6 +153,9 @@ func growStack() { if x != i+1 { panic("stack is corrupted") } + if progress != nil { + atomic.StoreUint32(progress, uint32(i)) + } } GC() } @@ -231,7 +245,7 @@ func TestDeferPtrs(t *testing.T) { } }() defer set(&y, 42) - growStack() + growStack(nil) } type bigBuf [4 * 1024]byte @@ -627,3 +641,169 @@ func count23(n int) int { } return 1 + count1(n-1) } + +type structWithMethod struct{} + +func (s structWithMethod) caller() string { + _, file, line, ok := Caller(1) + if !ok { + panic("Caller failed") + } + return fmt.Sprintf("%s:%d", file, line) +} + +func (s structWithMethod) callers() []uintptr { + pc := make([]uintptr, 16) + return pc[:Callers(0, pc)] +} + +func (s structWithMethod) stack() string { + buf := make([]byte, 4<<10) + return string(buf[:Stack(buf, false)]) +} + +func (s structWithMethod) nop() {} + +func TestStackWrapperCaller(t *testing.T) { + var d structWithMethod + // Force the compiler to construct a wrapper method. + wrapper := (*structWithMethod).caller + // Check that the wrapper doesn't affect the stack trace. + if dc, ic := d.caller(), wrapper(&d); dc != ic { + t.Fatalf("direct caller %q != indirect caller %q", dc, ic) + } +} + +func TestStackWrapperCallers(t *testing.T) { + var d structWithMethod + wrapper := (*structWithMethod).callers + // Check that <autogenerated> doesn't appear in the stack trace. + pcs := wrapper(&d) + frames := CallersFrames(pcs) + for { + fr, more := frames.Next() + if fr.File == "<autogenerated>" { + t.Fatalf("<autogenerated> appears in stack trace: %+v", fr) + } + if !more { + break + } + } +} + +func TestStackWrapperStack(t *testing.T) { + var d structWithMethod + wrapper := (*structWithMethod).stack + // Check that <autogenerated> doesn't appear in the stack trace. + stk := wrapper(&d) + if strings.Contains(stk, "<autogenerated>") { + t.Fatalf("<autogenerated> appears in stack trace:\n%s", stk) + } +} + +type I interface { + M() +} + +func TestStackWrapperStackPanic(t *testing.T) { + t.Run("sigpanic", func(t *testing.T) { + // nil calls to interface methods cause a sigpanic. + testStackWrapperPanic(t, func() { I.M(nil) }, "runtime_test.I.M") + }) + t.Run("panicwrap", func(t *testing.T) { + // Nil calls to value method wrappers call panicwrap. + wrapper := (*structWithMethod).nop + testStackWrapperPanic(t, func() { wrapper(nil) }, "runtime_test.(*structWithMethod).nop") + }) +} + +func testStackWrapperPanic(t *testing.T, cb func(), expect string) { + // Test that the stack trace from a panicking wrapper includes + // the wrapper, even though elide these when they don't panic. + t.Run("CallersFrames", func(t *testing.T) { + defer func() { + err := recover() + if err == nil { + t.Fatalf("expected panic") + } + pcs := make([]uintptr, 10) + n := Callers(0, pcs) + frames := CallersFrames(pcs[:n]) + for { + frame, more := frames.Next() + t.Log(frame.Function) + if frame.Function == expect { + return + } + if !more { + break + } + } + t.Fatalf("panicking wrapper %s missing from stack trace", expect) + }() + cb() + }) + t.Run("Stack", func(t *testing.T) { + defer func() { + err := recover() + if err == nil { + t.Fatalf("expected panic") + } + buf := make([]byte, 4<<10) + stk := string(buf[:Stack(buf, false)]) + if !strings.Contains(stk, "\n"+expect) { + t.Fatalf("panicking wrapper %s missing from stack trace:\n%s", expect, stk) + } + }() + cb() + }) +} + +func TestCallersFromWrapper(t *testing.T) { + // Test that invoking CallersFrames on a stack where the first + // PC is an autogenerated wrapper keeps the wrapper in the + // trace. Normally we elide these, assuming that the wrapper + // calls the thing you actually wanted to see, but in this + // case we need to keep it. + pc := reflect.ValueOf(I.M).Pointer() + frames := CallersFrames([]uintptr{pc}) + frame, more := frames.Next() + if frame.Function != "runtime_test.I.M" { + t.Fatalf("want function %s, got %s", "runtime_test.I.M", frame.Function) + } + if more { + t.Fatalf("want 1 frame, got > 1") + } +} + +func TestTracebackSystemstack(t *testing.T) { + if GOARCH == "ppc64" || GOARCH == "ppc64le" { + t.Skip("systemstack tail call not implemented on ppc64x") + } + + // Test that profiles correctly jump over systemstack, + // including nested systemstack calls. + pcs := make([]uintptr, 20) + pcs = pcs[:TracebackSystemstack(pcs, 5)] + // Check that runtime.TracebackSystemstack appears five times + // and that we see TestTracebackSystemstack. + countIn, countOut := 0, 0 + frames := CallersFrames(pcs) + var tb bytes.Buffer + for { + frame, more := frames.Next() + fmt.Fprintf(&tb, "\n%s+0x%x %s:%d", frame.Function, frame.PC-frame.Entry, frame.File, frame.Line) + switch frame.Function { + case "runtime.TracebackSystemstack": + countIn++ + case "runtime_test.TestTracebackSystemstack": + countOut++ + } + if !more { + break + } + } + if countIn != 5 || countOut != 1 { + t.Fatalf("expected 5 calls to TracebackSystemstack and 1 call to TestTracebackSystemstack, got:%s", tb.String()) + } +} diff --git a/src/runtime/string.go b/src/runtime/string.go index 0ccc81ee58..22be091375 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -80,7 +80,7 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) { if raceenabled { racereadrangepc(unsafe.Pointer(&b[0]), uintptr(l), - getcallerpc(unsafe.Pointer(&buf)), + getcallerpc(), funcPC(slicebytetostring)) } if msanenabled { @@ -134,7 +134,7 @@ func slicebytetostringtmp(b []byte) string { if raceenabled && len(b) > 0 { racereadrangepc(unsafe.Pointer(&b[0]), uintptr(len(b)), - getcallerpc(unsafe.Pointer(&b)), + getcallerpc(), funcPC(slicebytetostringtmp)) } if msanenabled && len(b) > 0 { @@ -183,7 +183,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string { if raceenabled && len(a) > 0 { racereadrangepc(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]), - getcallerpc(unsafe.Pointer(&buf)), + getcallerpc(), funcPC(slicerunetostring)) } if msanenabled && len(a) > 0 { diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index 72d21187ec..e83064166a 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -4,10 +4,7 @@ package runtime -import ( - "runtime/internal/sys" - "unsafe" -) +import "unsafe" // Should be a built-in for unsafe.Pointer? //go:nosplit @@ -91,25 +88,28 @@ func reflect_memmove(to, from unsafe.Pointer, n uintptr) { } // exported value for testing -var hashLoad = loadFactor +var hashLoad = float32(loadFactorNum) / float32(loadFactorDen) //go:nosplit func fastrand() uint32 { mp := getg().m - fr := mp.fastrand - mx := uint32(int32(fr)>>31) & 0xa8888eef - fr = fr<<1 ^ mx - mp.fastrand = fr - return fr + // Implement xorshift64+: 2 32-bit xorshift sequences added together. + // Shift triplet [17,7,16] was calculated as indicated in Marsaglia's + // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf + // This generator passes the SmallCrush suite, part of TestU01 framework: + // http://simul.iro.umontreal.ca/testu01/tu01.html + s1, s0 := mp.fastrand[0], mp.fastrand[1] + s1 ^= s1 << 17 + s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16 + mp.fastrand[0], mp.fastrand[1] = s0, s1 + return s0 + s1 } //go:nosplit func fastrandn(n uint32) uint32 { - // Don't be clever. - // fastrand is not good enough for cleverness. - // Just use mod. - // See golang.org/issue/21806. - return fastrand() % n + // This is similar to fastrand() % n, but faster. + // See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + return uint32(uint64(fastrand()) * uint64(n) >> 32) } //go:linkname sync_fastrand sync.fastrand @@ -133,11 +133,9 @@ func noescape(p unsafe.Pointer) unsafe.Pointer { func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr) func gogo(buf *gobuf) func gosave(buf *gobuf) -func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 //go:noescape func jmpdefer(fv *funcval, argp uintptr) -func exit1(code int32) func asminit() func setg(gg *g) func breakpoint() @@ -196,14 +194,16 @@ func publicationBarrier() // getcallerpc returns the program counter (PC) of its caller's caller. // getcallersp returns the stack pointer (SP) of its caller's caller. -// For both, the argp must be a pointer to the caller's first function argument. +// argp must be a pointer to the caller's first function argument. // The implementation may or may not use argp, depending on -// the architecture. +// the architecture. The implementation may be a compiler +// intrinsic; there is not necessarily code implementing this +// on every platform. // // For example: // // func f(arg1, arg2, arg3 int) { -// pc := getcallerpc(unsafe.Pointer(&arg1)) +// pc := getcallerpc() // sp := getcallersp(unsafe.Pointer(&arg1)) // } // @@ -223,12 +223,26 @@ func publicationBarrier() // immediately and can only be passed to nosplit functions. //go:noescape -func getcallerpc(argp unsafe.Pointer) uintptr +func getcallerpc() uintptr -//go:nosplit -func getcallersp(argp unsafe.Pointer) uintptr { - return uintptr(argp) - sys.MinFrameSize -} +//go:noescape +func getcallersp(argp unsafe.Pointer) uintptr // implemented as an intrinsic on all platforms + +// getclosureptr returns the pointer to the current closure. +// getclosureptr can only be used in an assignment statement +// at the entry of a function. Moreover, go:nosplit directive +// must be specified at the declaration of caller function, +// so that the function prolog does not clobber the closure register. +// for example: +// +// //go:nosplit +// func f(arg1, arg2, arg3 int) { +// dx := getclosureptr() +// } +// +// The compiler rewrites calls to this function into instructions that fetch the +// pointer from a well-known register (DX on x86 architecture, etc.) directly. +func getclosureptr() uintptr //go:noescape func asmcgocall(fn, arg unsafe.Pointer) int32 @@ -278,11 +292,6 @@ func call1073741824(typ, fn, arg unsafe.Pointer, n, retoffset uint32) func systemstack_switch() -func prefetcht0(addr uintptr) -func prefetcht1(addr uintptr) -func prefetcht2(addr uintptr) -func prefetchnta(addr uintptr) - // round n up to a multiple of a. a must be a power of 2. func round(n, a uintptr) uintptr { return (n + a - 1) &^ (a - 1) @@ -292,7 +301,6 @@ func round(n, a uintptr) uintptr { func checkASM() bool func memequal_varlen(a, b unsafe.Pointer) bool -func eqstring(s1, s2 string) bool // bool2int returns 0 if x is false or 1 if x is true. func bool2int(x bool) int { diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go index 8390d8fca9..ae5ccd3fee 100644 --- a/src/runtime/stubs2.go +++ b/src/runtime/stubs2.go @@ -25,3 +25,9 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32 func open(name *byte, mode, perm int32) int32 func madvise(addr unsafe.Pointer, n uintptr, flags int32) + +// exitThread terminates the current thread, writing *wait = 0 when +// the stack is safe to reclaim. +// +//go:noescape +func exitThread(wait *uint32) diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index e1b41ca4ff..bdf98b9e9d 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -19,6 +19,11 @@ type Frames struct { // stackExpander expands callers into a sequence of Frames, // tracking the necessary state across PCs. stackExpander stackExpander + + // elideWrapper indicates that, if the next frame is an + // autogenerated wrapper function, it should be elided from + // the stack. + elideWrapper bool } // Frame is the information returned by Frames for each call frame. @@ -112,12 +117,14 @@ func (se *stackExpander) init(callers []uintptr) []uintptr { // Next returns frame information for the next caller. // If more is false, there are no more callers (the Frame value is valid). func (ci *Frames) Next() (frame Frame, more bool) { - ci.callers, frame, more = ci.stackExpander.next(ci.callers) + ci.callers, frame, more = ci.stackExpander.next(ci.callers, ci.elideWrapper) + ci.elideWrapper = elideWrapperCalling(frame.Function) return } -func (se *stackExpander) next(callers []uintptr) (ncallers []uintptr, frame Frame, more bool) { +func (se *stackExpander) next(callers []uintptr, elideWrapper bool) (ncallers []uintptr, frame Frame, more bool) { ncallers = callers +again: if !se.pcExpander.more { // Expand the next PC. if len(ncallers) == 0 { @@ -144,6 +151,13 @@ func (se *stackExpander) next(callers []uintptr) (ncallers []uintptr, frame Fram } frame = se.pcExpander.next() + if elideWrapper && frame.File == "<autogenerated>" { + // Ignore autogenerated functions such as pointer + // method forwarding functions. These are an + // implementation detail that doesn't reflect the + // source code. + goto again + } return ncallers, frame, se.pcExpander.more || len(ncallers) > 0 } @@ -338,8 +352,8 @@ const ( // moduledata records information about the layout of the executable // image. It is written by the linker. Any changes here must be // matched changes to the code in cmd/internal/ld/symtab.go:symtab. -// moduledata is stored in read-only memory; none of the pointers here -// are visible to the garbage collector. +// moduledata is stored in statically allocated non-pointer memory; +// none of the pointers here are visible to the garbage collector. type moduledata struct { pclntable []byte ftab []functab @@ -367,10 +381,14 @@ type moduledata struct { modulename string modulehashes []modulehash + hasmain uint8 // 1 if module contains the main function, 0 otherwise + gcdatamask, gcbssmask bitvector typemap map[typeOff]*_type // offset to *_rtype in previous module + bad bool // module failed to load and should be ignored + next *moduledata } @@ -403,7 +421,7 @@ var pinnedTypemaps []map[typeOff]*_type var firstmoduledata moduledata // linker symbol var lastmoduledatap *moduledata // linker symbol -var modulesSlice unsafe.Pointer // see activeModules +var modulesSlice *[]*moduledata // see activeModules // activeModules returns a slice of active modules. // @@ -443,6 +461,9 @@ func activeModules() []*moduledata { func modulesinit() { modules := new([]*moduledata) for md := &firstmoduledata; md != nil; md = md.next { + if md.bad { + continue + } *modules = append(*modules, md) if md.gcdatamask == (bitvector{}) { md.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(md.gcdata)), md.edata-md.data) @@ -459,9 +480,8 @@ func modulesinit() { // contains the main function. // // See Issue #18729. - mainText := funcPC(main_main) for i, md := range *modules { - if md.text <= mainText && mainText <= md.etext { + if md.hasmain != 0 { (*modules)[0] = md (*modules)[i] = &firstmoduledata break @@ -521,7 +541,6 @@ func moduledataverify1(datap *moduledata) { // ftab is lookup table for function by program counter. nftab := len(datap.ftab) - 1 - var pcCache pcvalueCache for i := 0; i < nftab; i++ { // NOTE: ftab[nftab].entry is legal; it is the address beyond the final function. if datap.ftab[i].entry > datap.ftab[i+1].entry { @@ -537,30 +556,6 @@ func moduledataverify1(datap *moduledata) { } throw("invalid runtime symbol table") } - - if debugPcln || nftab-i < 5 { - // Check a PC near but not at the very end. - // The very end might be just padding that is not covered by the tables. - // No architecture rounds function entries to more than 16 bytes, - // but if one came along we'd need to subtract more here. - // But don't use the next PC if it corresponds to a foreign object chunk - // (no pcln table, f2.pcln == 0). That chunk might have an alignment - // more than 16 bytes. - f := funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[datap.ftab[i].funcoff])), datap} - end := f.entry - if i+1 < nftab { - f2 := funcInfo{(*_func)(unsafe.Pointer(&datap.pclntable[datap.ftab[i+1].funcoff])), datap} - if f2.pcln != 0 { - end = f2.entry - 16 - if end < f.entry { - end = f.entry - } - } - } - pcvalue(f, f.pcfile, end, &pcCache, true) - pcvalue(f, f.pcln, end, &pcCache, true) - pcvalue(f, f.pcsp, end, &pcCache, true) - } } if datap.minpc != datap.ftab[0].entry || diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s index 5c62bfd20e..ccd901ada5 100644 --- a/src/runtime/sys_darwin_386.s +++ b/src/runtime/sys_darwin_386.s @@ -19,13 +19,39 @@ TEXT runtime·exit(SB),NOSPLIT,$0 // Exit this OS thread (like pthread_exit, which eventually // calls __bsdthread_terminate). -TEXT runtime·exit1(SB),NOSPLIT,$0 +TEXT exit1<>(SB),NOSPLIT,$16-0 + // __bsdthread_terminate takes 4 word-size arguments. + // Set them all to 0. (None are an exit status.) + MOVL $0, 0(SP) + MOVL $0, 4(SP) + MOVL $0, 8(SP) + MOVL $0, 12(SP) MOVL $361, AX INT $0x80 JAE 2(PC) MOVL $0xf1, 0xf1 // crash RET +GLOBL exitStack<>(SB),RODATA,$(4*4) +DATA exitStack<>+0x00(SB)/4, $0 +DATA exitStack<>+0x04(SB)/4, $0 +DATA exitStack<>+0x08(SB)/4, $0 +DATA exitStack<>+0x0c(SB)/4, $0 + +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + // __bsdthread_terminate takes 4 arguments, which it expects + // on the stack. They should all be 0, so switch over to a + // fake stack of 0s. It won't write to the stack. + MOVL $exitStack<>(SB), SP + MOVL $361, AX // __bsdthread_terminate + INT $0x80 + MOVL $0xf1, 0xf1 // crash + JMP 0(PC) + TEXT runtime·open(SB),NOSPLIT,$0 MOVL $5, AX INT $0x80 @@ -77,7 +103,13 @@ TEXT runtime·raiseproc(SB),NOSPLIT,$16 TEXT runtime·mmap(SB),NOSPLIT,$0 MOVL $197, AX INT $0x80 - MOVL AX, ret+24(FP) + JAE ok + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·madvise(SB),NOSPLIT,$0 @@ -394,7 +426,7 @@ TEXT runtime·bsdthread_start(SB),NOSPLIT,$0 MOVL BX, m_procid(DX) // m->procid = thread port (for debuggers) CALL runtime·stackcheck(SB) // smashes AX CALL CX // fn() - CALL runtime·exit1(SB) + CALL exit1<>(SB) RET // func bsdthread_register() int32 diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s index a8dc700d60..f549efdbf6 100644 --- a/src/runtime/sys_darwin_amd64.s +++ b/src/runtime/sys_darwin_amd64.s @@ -25,13 +25,26 @@ TEXT runtime·exit(SB),NOSPLIT,$0 // Exit this OS thread (like pthread_exit, which eventually // calls __bsdthread_terminate). -TEXT runtime·exit1(SB),NOSPLIT,$0 - MOVL code+0(FP), DI // arg 1 exit status +TEXT exit1<>(SB),NOSPLIT,$0 + // Because of exitThread below, this must not use the stack. + // __bsdthread_terminate takes 4 word-size arguments. + // Set them all to 0. (None are an exit status.) + MOVL $0, DI + MOVL $0, SI + MOVL $0, DX + MOVL $0, R10 MOVL $(0x2000000+361), AX // syscall entry SYSCALL MOVL $0xf1, 0xf1 // crash RET +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + JMP exit1<>(SB) + TEXT runtime·open(SB),NOSPLIT,$0 MOVQ name+0(FP), DI // arg 1 pathname MOVL mode+8(FP), SI // arg 2 flags @@ -107,29 +120,38 @@ TEXT runtime·madvise(SB), NOSPLIT, $0 RET // OS X comm page time offsets -// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h -#define nt_tsc_base 0x50 -#define nt_scale 0x58 -#define nt_shift 0x5c -#define nt_ns_base 0x60 -#define nt_generation 0x68 -#define gtod_generation 0x6c -#define gtod_ns_base 0x70 -#define gtod_sec_base 0x78 +// https://opensource.apple.com/source/xnu/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h + +#define commpage_version 0x1e + +#define v12_nt_tsc_base 0x50 +#define v12_nt_scale 0x58 +#define v12_nt_shift 0x5c +#define v12_nt_ns_base 0x60 +#define v12_nt_generation 0x68 +#define v12_gtod_generation 0x6c // obsolete since High Sierra (v13) +#define v12_gtod_ns_base 0x70 // obsolete since High Sierra (v13) +#define v12_gtod_sec_base 0x78 // obsolete since High Sierra (v13) + +#define v13_gtod_ns_base 0xd0 +#define v13_gtod_sec_ofs 0xd8 +#define v13_gtod_frac_ofs 0xe0 +#define v13_gtod_scale 0xe8 +#define v13_gtod_tkspersec 0xf0 TEXT runtime·nanotime(SB),NOSPLIT,$0-8 MOVQ $0x7fffffe00000, BP /* comm page base */ // Loop trying to take a consistent snapshot // of the time parameters. timeloop: - MOVL nt_generation(BP), R9 + MOVL v12_nt_generation(BP), R9 TESTL R9, R9 JZ timeloop RDTSC - MOVQ nt_tsc_base(BP), R10 - MOVL nt_scale(BP), R11 - MOVQ nt_ns_base(BP), R12 - CMPL nt_generation(BP), R9 + MOVQ v12_nt_tsc_base(BP), R10 + MOVL v12_nt_scale(BP), R11 + MOVQ v12_nt_ns_base(BP), R12 + CMPL v12_nt_generation(BP), R9 JNE timeloop // Gathered all the data we need. Compute monotonic time: @@ -151,22 +173,91 @@ TEXT time·now(SB), NOSPLIT, $32-24 // are used in the systime fallback, as the timeval address // filled in by the system call. MOVQ $0x7fffffe00000, BP /* comm page base */ + CMPW commpage_version(BP), $13 + JB v12 /* sierra and older */ + + // This is the new code, for macOS High Sierra (v13) and newer. +v13: + // Loop trying to take a consistent snapshot + // of the time parameters. +timeloop13: + MOVQ v13_gtod_ns_base(BP), R12 + + MOVL v12_nt_generation(BP), CX + TESTL CX, CX + JZ timeloop13 + RDTSC + MOVQ v12_nt_tsc_base(BP), SI + MOVL v12_nt_scale(BP), DI + MOVQ v12_nt_ns_base(BP), BX + CMPL v12_nt_generation(BP), CX + JNE timeloop13 + + MOVQ v13_gtod_sec_ofs(BP), R8 + MOVQ v13_gtod_frac_ofs(BP), R9 + MOVQ v13_gtod_scale(BP), R10 + MOVQ v13_gtod_tkspersec(BP), R11 + CMPQ v13_gtod_ns_base(BP), R12 + JNE timeloop13 + + // Compute monotonic time + // mono = ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base + // The multiply and shift extracts the top 64 bits of the 96-bit product. + SHLQ $32, DX + ADDQ DX, AX + SUBQ SI, AX + MULQ DI + SHRQ $32, AX:DX + ADDQ BX, AX + + // Subtract startNano base to return the monotonic runtime timer + // which is an offset from process boot. + MOVQ AX, BX + MOVQ runtime·startNano(SB), CX + SUBQ CX, BX + MOVQ BX, monotonic+16(FP) + + // Now compute the 128-bit wall time: + // wall = ((mono - gtod_ns_base) * gtod_scale) + gtod_offs + // The parameters are updated every second, so if we found them + // outdated (that is, more than one second is passed from the ns base), + // fallback to the syscall. + TESTQ R12, R12 + JZ systime + SUBQ R12, AX + CMPQ R11, AX + JB systime + MULQ R10 + ADDQ R9, AX + ADCQ R8, DX + + // Convert the 128-bit wall time into (sec,nsec). + // High part (seconds) is already good to go, while low part + // (fraction of seconds) must be converted to nanoseconds. + MOVQ DX, sec+0(FP) + MOVQ $1000000000, CX + MULQ CX + MOVQ DX, nsec+8(FP) + RET + + // This is the legacy code needed for macOS Sierra (v12) and older. +v12: // Loop trying to take a consistent snapshot // of the time parameters. timeloop: - MOVL gtod_generation(BP), R8 - MOVL nt_generation(BP), R9 + MOVL v12_gtod_generation(BP), R8 + MOVL v12_nt_generation(BP), R9 TESTL R9, R9 JZ timeloop RDTSC - MOVQ nt_tsc_base(BP), R10 - MOVL nt_scale(BP), R11 - MOVQ nt_ns_base(BP), R12 - CMPL nt_generation(BP), R9 + MOVQ v12_nt_tsc_base(BP), R10 + MOVL v12_nt_scale(BP), R11 + MOVQ v12_nt_ns_base(BP), R12 + CMPL v12_nt_generation(BP), R9 JNE timeloop - MOVQ gtod_ns_base(BP), R13 - MOVQ gtod_sec_base(BP), R14 - CMPL gtod_generation(BP), R8 + MOVQ v12_gtod_ns_base(BP), R13 + MOVQ v12_gtod_sec_base(BP), R14 + CMPL v12_gtod_generation(BP), R8 JNE timeloop // Gathered all the data we need. Compute: @@ -283,7 +374,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVL off+28(FP), R9 // arg 6 offset MOVL $(0x2000000+197), AX // syscall entry SYSCALL - MOVQ AX, ret+32(FP) + JCC ok + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 @@ -375,7 +472,7 @@ TEXT runtime·bsdthread_start(SB),NOSPLIT,$0 MOVQ CX, g_m(AX) CALL runtime·stackcheck(SB) // smashes AX, CX CALL DX // fn - CALL runtime·exit1(SB) + CALL exit1<>(SB) RET // func bsdthread_register() int32 diff --git a/src/runtime/sys_darwin_arm.s b/src/runtime/sys_darwin_arm.s index ea559b5f3e..1ad904f833 100644 --- a/src/runtime/sys_darwin_arm.s +++ b/src/runtime/sys_darwin_arm.s @@ -19,7 +19,6 @@ #define SYS_mmap 197 #define SYS_munmap 73 #define SYS_madvise 75 -#define SYS_mincore 78 #define SYS_gettimeofday 116 #define SYS_kill 37 #define SYS_getpid 20 @@ -90,13 +89,32 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 // Exit this OS thread (like pthread_exit, which eventually // calls __bsdthread_terminate). -TEXT runtime·exit1(SB),NOSPLIT,$0 +TEXT exit1<>(SB),NOSPLIT,$0 + // Because of exitThread below, this must not use the stack. + // __bsdthread_terminate takes 4 word-size arguments. + // Set them all to 0. (None are an exit status.) + MOVW $0, R0 + MOVW $0, R1 + MOVW $0, R2 + MOVW $0, R3 MOVW $SYS_bsdthread_terminate, R12 SWI $0x80 MOVW $1234, R0 MOVW $1003, R1 MOVW R0, (R1) // fail hard +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVW wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R2 +storeloop: + LDREX (R0), R4 // loads R4 + STREX R2, (R0), R1 // stores R2 + CMP $0, R1 + BNE storeloop + JMP exit1<>(SB) + TEXT runtime·raise(SB),NOSPLIT,$0 // Ideally we'd send the signal to the current thread, // not the whole process, but that's too hard on OS X. @@ -122,7 +140,14 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVW $0, R6 // off_t is uint64_t MOVW $SYS_mmap, R12 SWI $0x80 - MOVW R0, ret+24(FP) + MOVW $0, R1 + BCC ok + MOVW R1, p+24(FP) + MOVW R0, err+28(FP) + RET +ok: + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 @@ -150,15 +175,6 @@ TEXT runtime·setitimer(SB),NOSPLIT,$0 SWI $0x80 RET -TEXT runtime·mincore(SB),NOSPLIT,$0 - MOVW addr+0(FP), R0 - MOVW n+4(FP), R1 - MOVW dst+8(FP), R2 - MOVW $SYS_mincore, R12 - SWI $0x80 - MOVW R0, ret+12(FP) - RET - TEXT runtime·walltime(SB), 7, $32 MOVW $8(R13), R0 // timeval MOVW $0, R1 // zone @@ -380,7 +396,7 @@ TEXT runtime·bsdthread_start(SB),NOSPLIT,$0 EOR R12, R12 WORD $0xeee1ca10 // fmxr fpscr, ip BL (R2) // fn - BL runtime·exit1(SB) + BL exit1<>(SB) RET // int32 bsdthread_register(void) diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s index 0e91d5bd10..5663af512d 100644 --- a/src/runtime/sys_darwin_arm64.s +++ b/src/runtime/sys_darwin_arm64.s @@ -19,7 +19,6 @@ #define SYS_mmap 197 #define SYS_munmap 73 #define SYS_madvise 75 -#define SYS_mincore 78 #define SYS_gettimeofday 116 #define SYS_kill 37 #define SYS_getpid 20 @@ -90,13 +89,28 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 // Exit this OS thread (like pthread_exit, which eventually // calls __bsdthread_terminate). -TEXT runtime·exit1(SB),NOSPLIT,$0 +TEXT exit1<>(SB),NOSPLIT,$0 + // Because of exitThread below, this must not use the stack. + // __bsdthread_terminate takes 4 word-size arguments. + // Set them all to 0. (None are an exit status.) + MOVW $0, R0 + MOVW $0, R1 + MOVW $0, R2 + MOVW $0, R3 MOVW $SYS_bsdthread_terminate, R16 SVC $0x80 MOVD $1234, R0 MOVD $1003, R1 MOVD R0, (R1) // fail hard +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVD wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R1 + STLRW R1, (R0) + JMP exit1<>(SB) + TEXT runtime·raise(SB),NOSPLIT,$0 // Ideally we'd send the signal to the current thread, // not the whole process, but that's too hard on OS X. @@ -121,7 +135,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVW off+28(FP), R5 MOVW $SYS_mmap, R16 SVC $0x80 - MOVD R0, ret+32(FP) + BCC ok + MOVD $0, p+32(FP) + MOVD R0, err+40(FP) + RET +ok: + MOVD R0, p+32(FP) + MOVD $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s index f355268b99..813f1f4b69 100644 --- a/src/runtime/sys_dragonfly_amd64.s +++ b/src/runtime/sys_dragonfly_amd64.s @@ -64,12 +64,18 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-8 - MOVL code+0(FP), DI // arg 1 exit status - MOVL $431, AX +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + MOVL $0x10000, DI // arg 1 how - EXTEXIT_LWP + MOVL $0, SI // arg 2 status + MOVL $0, DX // arg 3 addr + MOVL $494, AX // extexit SYSCALL MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8 MOVQ name+0(FP), DI // arg 1 pathname @@ -236,8 +242,15 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVQ $0, R9 // arg 6 - pad MOVL $197, AX SYSCALL + JCC ok + ADDQ $16, SP + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: ADDQ $16, SP - MOVQ AX, ret+32(FP) + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s index 0f5df21e40..bef8e3257a 100644 --- a/src/runtime/sys_freebsd_386.s +++ b/src/runtime/sys_freebsd_386.s @@ -52,12 +52,23 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-4 - MOVL $431, AX +GLOBL exitStack<>(SB),RODATA,$8 +DATA exitStack<>+0x00(SB)/4, $0 +DATA exitStack<>+0x04(SB)/4, $0 + +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + // thr_exit takes a single pointer argument, which it expects + // on the stack. We want to pass 0, so switch over to a fake + // stack of 0s. It won't write to the stack. + MOVL $exitStack<>(SB), SP + MOVL $431, AX // thr_exit INT $0x80 - JAE 2(PC) MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-4 MOVL $5, AX @@ -138,7 +149,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$32 STOSL MOVL $477, AX INT $0x80 - MOVL AX, ret+24(FP) + JAE ok + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$-4 diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s index 5d072a9957..7499931ca1 100644 --- a/src/runtime/sys_freebsd_amd64.s +++ b/src/runtime/sys_freebsd_amd64.s @@ -54,12 +54,16 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-8 - MOVL code+0(FP), DI // arg 1 exit status - MOVL $431, AX +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + MOVL $0, DI // arg 1 long *state + MOVL $431, AX // thr_exit SYSCALL MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8 MOVQ name+0(FP), DI // arg 1 pathname @@ -229,7 +233,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVL off+28(FP), R9 // arg 6 offset MOVL $477, AX SYSCALL - MOVQ AX, ret+32(FP) + JCC ok + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s index 2851587b0d..3f52864305 100644 --- a/src/runtime/sys_freebsd_arm.s +++ b/src/runtime/sys_freebsd_arm.s @@ -82,13 +82,22 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 MOVW.CS R8, (R8) RET -TEXT runtime·exit1(SB),NOSPLIT,$-8 - MOVW code+0(FP), R0 // arg 1 exit status - MOVW $SYS_thr_exit, R7 - SWI $0 - MOVW.CS $0, R8 // crash on syscall failure - MOVW.CS R8, (R8) - RET +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVW wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R2 +storeloop: + LDREX (R0), R4 // loads R4 + STREX R2, (R0), R1 // stores R2 + CMP $0, R1 + BNE storeloop + MOVW $0, R0 // arg 1 long *state + MOVW $SYS_thr_exit, R7 + SWI $0 + MOVW.CS $0, R8 // crash on syscall failure + MOVW.CS R8, (R8) + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8 MOVW name+0(FP), R0 // arg 1 name @@ -249,8 +258,11 @@ TEXT runtime·mmap(SB),NOSPLIT,$16 MOVW $SYS_mmap, R7 SWI $0 SUB $4, R13 - // TODO(dfc) error checking ? - MOVW R0, ret+24(FP) + MOVW $0, R1 + MOVW.CS R0, R1 // if failed, put in R1 + MOVW.CS $0, R0 + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index a3baebae4a..bc3b8dbb1c 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -24,22 +24,70 @@ //#define INVOKE_SYSCALL CALL 0x10(GS) // non-portable #define INVOKE_SYSCALL INT $0x80 +#define SYS_exit 1 +#define SYS_read 3 +#define SYS_write 4 +#define SYS_open 5 +#define SYS_close 6 +#define SYS_getpid 20 +#define SYS_access 33 +#define SYS_kill 37 +#define SYS_brk 45 +#define SYS_fcntl 55 +#define SYS_munmap 91 +#define SYS_socketcall 102 +#define SYS_setittimer 104 +#define SYS_clone 120 +#define SYS_sched_yield 158 +#define SYS_rt_sigreturn 173 +#define SYS_rt_sigaction 174 +#define SYS_rt_sigprocmask 175 +#define SYS_sigaltstack 186 +#define SYS_ugetrlimit 191 +#define SYS_mmap2 192 +#define SYS_mincore 218 +#define SYS_madvise 219 +#define SYS_gettid 224 +#define SYS_tkill 238 +#define SYS_futex 240 +#define SYS_sched_getaffinity 242 +#define SYS_set_thread_area 243 +#define SYS_exit_group 252 +#define SYS_epoll_create 254 +#define SYS_epoll_ctl 255 +#define SYS_epoll_wait 256 +#define SYS_clock_gettime 265 +#define SYS_pselect6 308 +#define SYS_epoll_create1 329 + TEXT runtime·exit(SB),NOSPLIT,$0 - MOVL $252, AX // syscall number + MOVL $SYS_exit_group, AX MOVL code+0(FP), BX INVOKE_SYSCALL INT $3 // not reached RET -TEXT runtime·exit1(SB),NOSPLIT,$0 - MOVL $1, AX // exit - exit the current os thread +TEXT exit1<>(SB),NOSPLIT,$0 + MOVL $SYS_exit, AX MOVL code+0(FP), BX INVOKE_SYSCALL INT $3 // not reached RET +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + MOVL $1, AX // exit (just this thread) + MOVL $0, BX // exit code + INT $0x80 // no stack; must not use CALL + // We may not even have a stack any more. + INT $3 + JMP 0(PC) + TEXT runtime·open(SB),NOSPLIT,$0 - MOVL $5, AX // syscall - open + MOVL $SYS_open, AX MOVL name+0(FP), BX MOVL mode+4(FP), CX MOVL perm+8(FP), DX @@ -51,7 +99,7 @@ TEXT runtime·open(SB),NOSPLIT,$0 RET TEXT runtime·closefd(SB),NOSPLIT,$0 - MOVL $6, AX // syscall - close + MOVL $SYS_close, AX MOVL fd+0(FP), BX INVOKE_SYSCALL CMPL AX, $0xfffff001 @@ -61,7 +109,7 @@ TEXT runtime·closefd(SB),NOSPLIT,$0 RET TEXT runtime·write(SB),NOSPLIT,$0 - MOVL $4, AX // syscall - write + MOVL $SYS_write, AX MOVL fd+0(FP), BX MOVL p+4(FP), CX MOVL n+8(FP), DX @@ -73,7 +121,7 @@ TEXT runtime·write(SB),NOSPLIT,$0 RET TEXT runtime·read(SB),NOSPLIT,$0 - MOVL $3, AX // syscall - read + MOVL $SYS_read, AX MOVL fd+0(FP), BX MOVL p+4(FP), CX MOVL n+8(FP), DX @@ -85,7 +133,7 @@ TEXT runtime·read(SB),NOSPLIT,$0 RET TEXT runtime·getrlimit(SB),NOSPLIT,$0 - MOVL $191, AX // syscall - ugetrlimit + MOVL $SYS_ugetrlimit, AX MOVL kind+0(FP), BX MOVL limit+4(FP), CX INVOKE_SYSCALL @@ -103,7 +151,7 @@ TEXT runtime·usleep(SB),NOSPLIT,$8 MOVL AX, 4(SP) // pselect6(0, 0, 0, 0, &ts, 0) - MOVL $308, AX + MOVL $SYS_pselect6, AX MOVL $0, BX MOVL $0, CX MOVL $0, DX @@ -114,31 +162,31 @@ TEXT runtime·usleep(SB),NOSPLIT,$8 RET TEXT runtime·gettid(SB),NOSPLIT,$0-4 - MOVL $224, AX // syscall - gettid + MOVL $SYS_gettid, AX INVOKE_SYSCALL MOVL AX, ret+0(FP) RET TEXT runtime·raise(SB),NOSPLIT,$12 - MOVL $224, AX // syscall - gettid + MOVL $SYS_gettid, AX INVOKE_SYSCALL MOVL AX, BX // arg 1 tid MOVL sig+0(FP), CX // arg 2 signal - MOVL $238, AX // syscall - tkill + MOVL $SYS_tkill, AX INVOKE_SYSCALL RET TEXT runtime·raiseproc(SB),NOSPLIT,$12 - MOVL $20, AX // syscall - getpid + MOVL $SYS_getpid, AX INVOKE_SYSCALL MOVL AX, BX // arg 1 pid MOVL sig+0(FP), CX // arg 2 signal - MOVL $37, AX // syscall - kill + MOVL $SYS_kill, AX INVOKE_SYSCALL RET TEXT runtime·setitimer(SB),NOSPLIT,$0-12 - MOVL $104, AX // syscall - setitimer + MOVL $SYS_setittimer, AX MOVL mode+0(FP), BX MOVL new+4(FP), CX MOVL old+8(FP), DX @@ -146,7 +194,7 @@ TEXT runtime·setitimer(SB),NOSPLIT,$0-12 RET TEXT runtime·mincore(SB),NOSPLIT,$0-16 - MOVL $218, AX // syscall - mincore + MOVL $SYS_mincore, AX MOVL addr+0(FP), BX MOVL n+4(FP), CX MOVL dst+8(FP), DX @@ -155,15 +203,56 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16 RET // func walltime() (sec int64, nsec int32) -TEXT runtime·walltime(SB), NOSPLIT, $32 - MOVL $265, AX // syscall - clock_gettime +TEXT runtime·walltime(SB), NOSPLIT, $0-12 + // We don't know how much stack space the VDSO code will need, + // so switch to g0. + + MOVL SP, BP // Save old SP; BP unchanged by C code. + + get_tls(CX) + MOVL g(CX), AX + MOVL g_m(AX), CX + MOVL m_curg(CX), DX + + CMPL AX, DX // Only switch if on curg. + JNE noswitch + + MOVL m_g0(CX), DX + MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack + +noswitch: + SUBL $16, SP // Space for results + ANDL $~15, SP // Align for C code + + // Stack layout, depending on call path: + // x(SP) vDSO INVOKE_SYSCALL + // 12 ts.tv_nsec ts.tv_nsec + // 8 ts.tv_sec ts.tv_sec + // 4 &ts - + // 0 CLOCK_<id> - + + MOVL runtime·__vdso_clock_gettime_sym(SB), AX + CMPL AX, $0 + JEQ fallback + + LEAL 8(SP), BX // &ts (struct timespec) + MOVL BX, 4(SP) + MOVL $0, 0(SP) // CLOCK_REALTIME + CALL AX + JMP finish + +fallback: + MOVL $SYS_clock_gettime, AX MOVL $0, BX // CLOCK_REALTIME LEAL 8(SP), CX - MOVL $0, DX INVOKE_SYSCALL + +finish: MOVL 8(SP), AX // sec MOVL 12(SP), BX // nsec + MOVL BP, SP // Restore real SP + // sec is in AX, nsec in BX MOVL AX, sec_lo+0(FP) MOVL $0, sec_hi+4(FP) @@ -172,15 +261,48 @@ TEXT runtime·walltime(SB), NOSPLIT, $32 // int64 nanotime(void) so really // void nanotime(int64 *nsec) -TEXT runtime·nanotime(SB), NOSPLIT, $32 - MOVL $265, AX // syscall - clock_gettime +TEXT runtime·nanotime(SB), NOSPLIT, $0-8 + // Switch to g0 stack. See comment above in runtime·walltime. + + MOVL SP, BP // Save old SP; BP unchanged by C code. + + get_tls(CX) + MOVL g(CX), AX + MOVL g_m(AX), CX + MOVL m_curg(CX), DX + + CMPL AX, DX // Only switch if on curg. + JNE noswitch + + MOVL m_g0(CX), DX + MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack + +noswitch: + SUBL $16, SP // Space for results + ANDL $~15, SP // Align for C code + + MOVL runtime·__vdso_clock_gettime_sym(SB), AX + CMPL AX, $0 + JEQ fallback + + LEAL 8(SP), BX // &ts (struct timespec) + MOVL BX, 4(SP) + MOVL $1, 0(SP) // CLOCK_MONOTONIC + CALL AX + JMP finish + +fallback: + MOVL $SYS_clock_gettime, AX MOVL $1, BX // CLOCK_MONOTONIC LEAL 8(SP), CX - MOVL $0, DX INVOKE_SYSCALL + +finish: MOVL 8(SP), AX // sec MOVL 12(SP), BX // nsec + MOVL BP, SP // Restore real SP + // sec is in AX, nsec in BX // convert to DX:AX nsec MOVL $1000000000, CX @@ -193,7 +315,7 @@ TEXT runtime·nanotime(SB), NOSPLIT, $32 RET TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0 - MOVL $175, AX // syscall entry + MOVL $SYS_rt_sigprocmask, AX MOVL how+0(FP), BX MOVL new+4(FP), CX MOVL old+8(FP), DX @@ -205,7 +327,7 @@ TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0 RET TEXT runtime·rt_sigaction(SB),NOSPLIT,$0 - MOVL $174, AX // syscall - rt_sigaction + MOVL $SYS_rt_sigaction, AX MOVL sig+0(FP), BX MOVL new+4(FP), CX MOVL old+8(FP), DX @@ -258,7 +380,7 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 JMP runtime·sigtramp(SB) TEXT runtime·sigreturn(SB),NOSPLIT,$0 - MOVL $173, AX // rt_sigreturn + MOVL $SYS_rt_sigreturn, AX // Sigreturn expects same SP as signal handler, // so cannot CALL 0x10(GS) here. INT $0x80 @@ -266,7 +388,7 @@ TEXT runtime·sigreturn(SB),NOSPLIT,$0 RET TEXT runtime·mmap(SB),NOSPLIT,$0 - MOVL $192, AX // mmap2 + MOVL $SYS_mmap2, AX MOVL addr+0(FP), BX MOVL n+4(FP), CX MOVL prot+8(FP), DX @@ -276,14 +398,19 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 SHRL $12, BP INVOKE_SYSCALL CMPL AX, $0xfffff001 - JLS 3(PC) + JLS ok NOTL AX INCL AX - MOVL AX, ret+24(FP) + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 - MOVL $91, AX // munmap + MOVL $SYS_munmap, AX MOVL addr+0(FP), BX MOVL n+4(FP), CX INVOKE_SYSCALL @@ -293,7 +420,7 @@ TEXT runtime·munmap(SB),NOSPLIT,$0 RET TEXT runtime·madvise(SB),NOSPLIT,$0 - MOVL $219, AX // madvise + MOVL $SYS_madvise, AX MOVL addr+0(FP), BX MOVL n+4(FP), CX MOVL flags+8(FP), DX @@ -304,7 +431,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0 // int32 futex(int32 *uaddr, int32 op, int32 val, // struct timespec *timeout, int32 *uaddr2, int32 val2); TEXT runtime·futex(SB),NOSPLIT,$0 - MOVL $240, AX // futex + MOVL $SYS_futex, AX MOVL addr+0(FP), BX MOVL op+4(FP), CX MOVL val+8(FP), DX @@ -317,7 +444,7 @@ TEXT runtime·futex(SB),NOSPLIT,$0 // int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void)); TEXT runtime·clone(SB),NOSPLIT,$0 - MOVL $120, AX // clone + MOVL $SYS_clone, AX MOVL flags+0(FP), BX MOVL stk+4(FP), CX MOVL $0, DX // parent tid ptr @@ -351,7 +478,7 @@ TEXT runtime·clone(SB),NOSPLIT,$0 INT $3 // Initialize AX to Linux tid - MOVL $224, AX + MOVL $SYS_gettid, AX INVOKE_SYSCALL MOVL 0(SP), BX // m @@ -396,11 +523,11 @@ TEXT runtime·clone(SB),NOSPLIT,$0 nog: CALL SI // fn() - CALL runtime·exit1(SB) + CALL exit1<>(SB) MOVL $0x1234, 0x1005 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8 - MOVL $186, AX // sigaltstack + MOVL $SYS_sigaltstack, AX MOVL new+0(FP), BX MOVL old+4(FP), CX INVOKE_SYSCALL @@ -483,7 +610,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 // call set_thread_area MOVL AX, BX // user_desc - MOVL $243, AX // syscall - set_thread_area + MOVL $SYS_set_thread_area, AX // We can't call this via 0x10(GS) because this is called from setldt0 to set that up. INT $0x80 @@ -509,12 +636,12 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 RET TEXT runtime·osyield(SB),NOSPLIT,$0 - MOVL $158, AX + MOVL $SYS_sched_yield, AX INVOKE_SYSCALL RET TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 - MOVL $242, AX // syscall - sched_getaffinity + MOVL $SYS_sched_getaffinity, AX MOVL pid+0(FP), BX MOVL len+4(FP), CX MOVL buf+8(FP), DX @@ -524,7 +651,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 // int32 runtime·epollcreate(int32 size); TEXT runtime·epollcreate(SB),NOSPLIT,$0 - MOVL $254, AX + MOVL $SYS_epoll_create, AX MOVL size+0(FP), BX INVOKE_SYSCALL MOVL AX, ret+4(FP) @@ -532,7 +659,7 @@ TEXT runtime·epollcreate(SB),NOSPLIT,$0 // int32 runtime·epollcreate1(int32 flags); TEXT runtime·epollcreate1(SB),NOSPLIT,$0 - MOVL $329, AX + MOVL $SYS_epoll_create1, AX MOVL flags+0(FP), BX INVOKE_SYSCALL MOVL AX, ret+4(FP) @@ -540,7 +667,7 @@ TEXT runtime·epollcreate1(SB),NOSPLIT,$0 // func epollctl(epfd, op, fd int32, ev *epollEvent) int TEXT runtime·epollctl(SB),NOSPLIT,$0 - MOVL $255, AX + MOVL $SYS_epoll_ctl, AX MOVL epfd+0(FP), BX MOVL op+4(FP), CX MOVL fd+8(FP), DX @@ -551,7 +678,7 @@ TEXT runtime·epollctl(SB),NOSPLIT,$0 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout); TEXT runtime·epollwait(SB),NOSPLIT,$0 - MOVL $256, AX + MOVL $SYS_epoll_wait, AX MOVL epfd+0(FP), BX MOVL ev+4(FP), CX MOVL nev+8(FP), DX @@ -562,7 +689,7 @@ TEXT runtime·epollwait(SB),NOSPLIT,$0 // void runtime·closeonexec(int32 fd); TEXT runtime·closeonexec(SB),NOSPLIT,$0 - MOVL $55, AX // fcntl + MOVL $SYS_fcntl, AX MOVL fd+0(FP), BX // fd MOVL $2, CX // F_SETFD MOVL $1, DX // FD_CLOEXEC @@ -571,7 +698,7 @@ TEXT runtime·closeonexec(SB),NOSPLIT,$0 // int access(const char *name, int mode) TEXT runtime·access(SB),NOSPLIT,$0 - MOVL $33, AX // syscall - access + MOVL $SYS_access, AX MOVL name+0(FP), BX MOVL mode+4(FP), CX INVOKE_SYSCALL @@ -582,7 +709,7 @@ TEXT runtime·access(SB),NOSPLIT,$0 TEXT runtime·connect(SB),NOSPLIT,$0-16 // connect is implemented as socketcall(NR_socket, 3, *(rest of args)) // stack already should have fd, addr, addrlen. - MOVL $102, AX // syscall - socketcall + MOVL $SYS_socketcall, AX MOVL $3, BX // connect LEAL fd+0(FP), CX INVOKE_SYSCALL @@ -593,7 +720,7 @@ TEXT runtime·connect(SB),NOSPLIT,$0-16 TEXT runtime·socket(SB),NOSPLIT,$0-16 // socket is implemented as socketcall(NR_socket, 1, *(rest of args)) // stack already should have domain, type, protocol. - MOVL $102, AX // syscall - socketcall + MOVL $SYS_socketcall, AX MOVL $1, BX // socket LEAL domain+0(FP), CX INVOKE_SYSCALL @@ -603,7 +730,7 @@ TEXT runtime·socket(SB),NOSPLIT,$0-16 // func sbrk0() uintptr TEXT runtime·sbrk0(SB),NOSPLIT,$0-4 // Implemented as brk(NULL). - MOVL $45, AX // syscall - brk + MOVL $SYS_brk, AX MOVL $0, BX // NULL INVOKE_SYSCALL MOVL AX, ret+0(FP) diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index e0dc3e1264..5a94bda7c2 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -10,23 +10,65 @@ #include "go_tls.h" #include "textflag.h" +#define SYS_read 0 +#define SYS_write 1 +#define SYS_open 2 +#define SYS_close 3 +#define SYS_mmap 9 +#define SYS_munmap 11 +#define SYS_brk 12 +#define SYS_rt_sigaction 13 +#define SYS_rt_sigprocmask 14 +#define SYS_rt_sigreturn 15 +#define SYS_access 21 +#define SYS_sched_yield 24 +#define SYS_mincore 27 +#define SYS_madvise 28 +#define SYS_setittimer 38 +#define SYS_getpid 39 +#define SYS_socket 41 +#define SYS_connect 42 +#define SYS_clone 56 +#define SYS_exit 60 +#define SYS_kill 62 +#define SYS_fcntl 72 +#define SYS_getrlimit 97 +#define SYS_sigaltstack 131 +#define SYS_arch_prctl 158 +#define SYS_gettid 186 +#define SYS_tkill 200 +#define SYS_futex 202 +#define SYS_sched_getaffinity 204 +#define SYS_epoll_create 213 +#define SYS_exit_group 231 +#define SYS_epoll_wait 232 +#define SYS_epoll_ctl 233 +#define SYS_pselect6 270 +#define SYS_epoll_create1 291 + TEXT runtime·exit(SB),NOSPLIT,$0-4 MOVL code+0(FP), DI - MOVL $231, AX // exitgroup - force all os threads to exit + MOVL $SYS_exit_group, AX SYSCALL RET -TEXT runtime·exit1(SB),NOSPLIT,$0-4 - MOVL code+0(FP), DI - MOVL $60, AX // exit - exit the current os thread +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + MOVL $0, DI // exit code + MOVL $SYS_exit, AX SYSCALL - RET + // We may not even have a stack any more. + INT $3 + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$0-20 MOVQ name+0(FP), DI MOVL mode+8(FP), SI MOVL perm+12(FP), DX - MOVL $2, AX // syscall entry + MOVL $SYS_open, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -36,7 +78,7 @@ TEXT runtime·open(SB),NOSPLIT,$0-20 TEXT runtime·closefd(SB),NOSPLIT,$0-12 MOVL fd+0(FP), DI - MOVL $3, AX // syscall entry + MOVL $SYS_close, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -48,7 +90,7 @@ TEXT runtime·write(SB),NOSPLIT,$0-28 MOVQ fd+0(FP), DI MOVQ p+8(FP), SI MOVL n+16(FP), DX - MOVL $1, AX // syscall entry + MOVL $SYS_write, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -60,7 +102,7 @@ TEXT runtime·read(SB),NOSPLIT,$0-28 MOVL fd+0(FP), DI MOVQ p+8(FP), SI MOVL n+16(FP), DX - MOVL $0, AX // syscall entry + MOVL $SYS_read, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -71,7 +113,7 @@ TEXT runtime·read(SB),NOSPLIT,$0-28 TEXT runtime·getrlimit(SB),NOSPLIT,$0-20 MOVL kind+0(FP), DI MOVQ limit+8(FP), SI - MOVL $97, AX // syscall entry + MOVL $SYS_getrlimit, AX SYSCALL MOVL AX, ret+16(FP) RET @@ -93,31 +135,31 @@ TEXT runtime·usleep(SB),NOSPLIT,$16 MOVL $0, R10 MOVQ SP, R8 MOVL $0, R9 - MOVL $270, AX + MOVL $SYS_pselect6, AX SYSCALL RET TEXT runtime·gettid(SB),NOSPLIT,$0-4 - MOVL $186, AX // syscall - gettid + MOVL $SYS_gettid, AX SYSCALL MOVL AX, ret+0(FP) RET TEXT runtime·raise(SB),NOSPLIT,$0 - MOVL $186, AX // syscall - gettid + MOVL $SYS_gettid, AX SYSCALL MOVL AX, DI // arg 1 tid MOVL sig+0(FP), SI // arg 2 - MOVL $200, AX // syscall - tkill + MOVL $SYS_tkill, AX SYSCALL RET TEXT runtime·raiseproc(SB),NOSPLIT,$0 - MOVL $39, AX // syscall - getpid + MOVL $SYS_getpid, AX SYSCALL MOVL AX, DI // arg 1 pid MOVL sig+0(FP), SI // arg 2 - MOVL $62, AX // syscall - kill + MOVL $SYS_kill, AX SYSCALL RET @@ -125,7 +167,7 @@ TEXT runtime·setitimer(SB),NOSPLIT,$0-24 MOVL mode+0(FP), DI MOVQ new+8(FP), SI MOVQ old+16(FP), DX - MOVL $38, AX // syscall entry + MOVL $SYS_setittimer, AX SYSCALL RET @@ -133,17 +175,37 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-28 MOVQ addr+0(FP), DI MOVQ n+8(FP), SI MOVQ dst+16(FP), DX - MOVL $27, AX // syscall entry + MOVL $SYS_mincore, AX SYSCALL MOVL AX, ret+24(FP) RET // func walltime() (sec int64, nsec int32) -TEXT runtime·walltime(SB),NOSPLIT,$16 - // Be careful. We're calling a function with gcc calling convention here. - // We're guaranteed 128 bytes on entry, and we've taken 16, and the - // call uses another 8. - // That leaves 104 for the gettime code to use. Hope that's enough! +TEXT runtime·walltime(SB),NOSPLIT,$0-12 + // We don't know how much stack space the VDSO code will need, + // so switch to g0. + // In particular, a kernel configured with CONFIG_OPTIMIZE_INLINING=n + // and hardening can use a full page of stack space in gettime_sym + // due to stack probes inserted to avoid stack/heap collisions. + // See issue #20427. + + MOVQ SP, BP // Save old SP; BP unchanged by C code. + + get_tls(CX) + MOVQ g(CX), AX + MOVQ g_m(AX), CX + MOVQ m_curg(CX), DX + + CMPQ AX, DX // Only switch if on curg. + JNE noswitch + + MOVQ m_g0(CX), DX + MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack + +noswitch: + SUBQ $16, SP // Space for results + ANDQ $~15, SP // Align for C code + MOVQ runtime·__vdso_clock_gettime_sym(SB), AX CMPQ AX, $0 JEQ fallback @@ -152,6 +214,7 @@ TEXT runtime·walltime(SB),NOSPLIT,$16 CALL AX MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec + MOVQ BP, SP // Restore real SP MOVQ AX, sec+0(FP) MOVL DX, nsec+8(FP) RET @@ -163,13 +226,31 @@ fallback: MOVQ 0(SP), AX // sec MOVL 8(SP), DX // usec IMULQ $1000, DX + MOVQ BP, SP // Restore real SP MOVQ AX, sec+0(FP) MOVL DX, nsec+8(FP) RET -TEXT runtime·nanotime(SB),NOSPLIT,$16 - // Duplicate time.now here to avoid using up precious stack space. - // See comment above in time.now. +TEXT runtime·nanotime(SB),NOSPLIT,$0-8 + // Switch to g0 stack. See comment above in runtime·walltime. + + MOVQ SP, BP // Save old SP; BX unchanged by C code. + + get_tls(CX) + MOVQ g(CX), AX + MOVQ g_m(AX), CX + MOVQ m_curg(CX), DX + + CMPQ AX, DX // Only switch if on curg. + JNE noswitch + + MOVQ m_g0(CX), DX + MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack + +noswitch: + SUBQ $16, SP // Space for results + ANDQ $~15, SP // Align for C code + MOVQ runtime·__vdso_clock_gettime_sym(SB), AX CMPQ AX, $0 JEQ fallback @@ -178,6 +259,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16 CALL AX MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec + MOVQ BP, SP // Restore real SP // sec is in AX, nsec in DX // return nsec in AX IMULQ $1000000000, AX @@ -191,6 +273,7 @@ fallback: CALL AX MOVQ 0(SP), AX // sec MOVL 8(SP), DX // usec + MOVQ BP, SP // Restore real SP IMULQ $1000, DX // sec is in AX, nsec in DX // return nsec in AX @@ -204,7 +287,7 @@ TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-28 MOVQ new+8(FP), SI MOVQ old+16(FP), DX MOVL size+24(FP), R10 - MOVL $14, AX // syscall entry + MOVL $SYS_rt_sigprocmask, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -216,7 +299,7 @@ TEXT runtime·sysSigaction(SB),NOSPLIT,$0-36 MOVQ new+8(FP), SI MOVQ old+16(FP), DX MOVQ size+24(FP), R10 - MOVL $13, AX // syscall entry + MOVL $SYS_rt_sigaction, AX SYSCALL MOVL AX, ret+32(FP) RET @@ -354,7 +437,7 @@ sigtrampnog: // The code that cares about the precise instructions used is: // https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup TEXT runtime·sigreturn(SB),NOSPLIT,$0 - MOVQ $15, AX // rt_sigreturn + MOVQ $SYS_rt_sigreturn, AX SYSCALL INT $3 // not reached @@ -366,13 +449,18 @@ TEXT runtime·sysMmap(SB),NOSPLIT,$0 MOVL fd+24(FP), R8 MOVL off+28(FP), R9 - MOVL $9, AX // mmap + MOVL $SYS_mmap, AX SYSCALL CMPQ AX, $0xfffffffffffff001 - JLS 3(PC) + JLS ok NOTQ AX INCQ AX - MOVQ AX, ret+32(FP) + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET // Call the function stored in _cgo_mmap using the GCC calling convention. @@ -396,7 +484,7 @@ TEXT runtime·callCgoMmap(SB),NOSPLIT,$16 TEXT runtime·sysMunmap(SB),NOSPLIT,$0 MOVQ addr+0(FP), DI MOVQ n+8(FP), SI - MOVQ $11, AX // munmap + MOVQ $SYS_munmap, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -420,7 +508,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0 MOVQ addr+0(FP), DI MOVQ n+8(FP), SI MOVL flags+16(FP), DX - MOVQ $28, AX // madvise + MOVQ $SYS_madvise, AX SYSCALL // ignore failure - maybe pages are locked RET @@ -434,7 +522,7 @@ TEXT runtime·futex(SB),NOSPLIT,$0 MOVQ ts+16(FP), R10 MOVQ addr2+24(FP), R8 MOVL val3+32(FP), R9 - MOVL $202, AX + MOVL $SYS_futex, AX SYSCALL MOVL AX, ret+40(FP) RET @@ -452,7 +540,7 @@ TEXT runtime·clone(SB),NOSPLIT,$0 MOVQ gp+24(FP), R9 MOVQ fn+32(FP), R12 - MOVL $56, AX + MOVL $SYS_clone, AX SYSCALL // In parent, return. @@ -471,7 +559,7 @@ TEXT runtime·clone(SB),NOSPLIT,$0 JEQ nog // Initialize m->procid to Linux tid - MOVL $186, AX // gettid + MOVL $SYS_gettid, AX SYSCALL MOVQ AX, m_procid(R8) @@ -491,14 +579,14 @@ nog: // It shouldn't return. If it does, exit that thread. MOVL $111, DI - MOVL $60, AX + MOVL $SYS_exit, AX SYSCALL JMP -3(PC) // keep exiting TEXT runtime·sigaltstack(SB),NOSPLIT,$-8 MOVQ new+0(FP), DI MOVQ old+8(FP), SI - MOVQ $131, AX + MOVQ $SYS_sigaltstack, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -517,7 +605,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 #endif MOVQ DI, SI MOVQ $0x1002, DI // ARCH_SET_FS - MOVQ $158, AX // arch_prctl + MOVQ $SYS_arch_prctl, AX SYSCALL CMPQ AX, $0xfffffffffffff001 JLS 2(PC) @@ -525,7 +613,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 RET TEXT runtime·osyield(SB),NOSPLIT,$0 - MOVL $24, AX + MOVL $SYS_sched_yield, AX SYSCALL RET @@ -533,7 +621,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 MOVQ pid+0(FP), DI MOVQ len+8(FP), SI MOVQ buf+16(FP), DX - MOVL $204, AX // syscall entry + MOVL $SYS_sched_getaffinity, AX SYSCALL MOVL AX, ret+24(FP) RET @@ -541,7 +629,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 // int32 runtime·epollcreate(int32 size); TEXT runtime·epollcreate(SB),NOSPLIT,$0 MOVL size+0(FP), DI - MOVL $213, AX // syscall entry + MOVL $SYS_epoll_create, AX SYSCALL MOVL AX, ret+8(FP) RET @@ -549,7 +637,7 @@ TEXT runtime·epollcreate(SB),NOSPLIT,$0 // int32 runtime·epollcreate1(int32 flags); TEXT runtime·epollcreate1(SB),NOSPLIT,$0 MOVL flags+0(FP), DI - MOVL $291, AX // syscall entry + MOVL $SYS_epoll_create1, AX SYSCALL MOVL AX, ret+8(FP) RET @@ -560,7 +648,7 @@ TEXT runtime·epollctl(SB),NOSPLIT,$0 MOVL op+4(FP), SI MOVL fd+8(FP), DX MOVQ ev+16(FP), R10 - MOVL $233, AX // syscall entry + MOVL $SYS_epoll_ctl, AX SYSCALL MOVL AX, ret+24(FP) RET @@ -571,7 +659,7 @@ TEXT runtime·epollwait(SB),NOSPLIT,$0 MOVQ ev+8(FP), SI MOVL nev+16(FP), DX MOVL timeout+20(FP), R10 - MOVL $232, AX // syscall entry + MOVL $SYS_epoll_wait, AX SYSCALL MOVL AX, ret+24(FP) RET @@ -581,7 +669,7 @@ TEXT runtime·closeonexec(SB),NOSPLIT,$0 MOVL fd+0(FP), DI // fd MOVQ $2, SI // F_SETFD MOVQ $1, DX // FD_CLOEXEC - MOVL $72, AX // fcntl + MOVL $SYS_fcntl, AX SYSCALL RET @@ -590,7 +678,7 @@ TEXT runtime·closeonexec(SB),NOSPLIT,$0 TEXT runtime·access(SB),NOSPLIT,$0 MOVQ name+0(FP), DI MOVL mode+8(FP), SI - MOVL $21, AX // syscall entry + MOVL $SYS_access, AX SYSCALL MOVL AX, ret+16(FP) RET @@ -600,7 +688,7 @@ TEXT runtime·connect(SB),NOSPLIT,$0-28 MOVL fd+0(FP), DI MOVQ addr+8(FP), SI MOVL len+16(FP), DX - MOVL $42, AX // syscall entry + MOVL $SYS_connect, AX SYSCALL MOVL AX, ret+24(FP) RET @@ -610,7 +698,7 @@ TEXT runtime·socket(SB),NOSPLIT,$0-20 MOVL domain+0(FP), DI MOVL typ+4(FP), SI MOVL prot+8(FP), DX - MOVL $41, AX // syscall entry + MOVL $SYS_socket, AX SYSCALL MOVL AX, ret+16(FP) RET @@ -619,7 +707,7 @@ TEXT runtime·socket(SB),NOSPLIT,$0-20 TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 // Implemented as brk(NULL). MOVQ $0, DI - MOVL $12, AX // syscall entry + MOVL $SYS_brk, AX SYSCALL MOVQ AX, ret+0(FP) RET diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s index 64beed8f68..794f9b39a6 100644 --- a/src/runtime/sys_linux_arm.s +++ b/src/runtime/sys_linux_arm.s @@ -114,7 +114,7 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVW $1002, R1 MOVW R0, (R1) // fail hard -TEXT runtime·exit1(SB),NOSPLIT,$-4 +TEXT exit1<>(SB),NOSPLIT,$-4 MOVW code+0(FP), R0 MOVW $SYS_exit, R7 SWI $0 @@ -122,6 +122,22 @@ TEXT runtime·exit1(SB),NOSPLIT,$-4 MOVW $1003, R1 MOVW R0, (R1) // fail hard +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$-4-4 + MOVW wait+0(FP), R0 + // We're done using the stack. + // Alas, there's no reliable way to make this write atomic + // without potentially using the stack. So it goes. + MOVW $0, R1 + MOVW R1, (R0) + MOVW $0, R0 // exit code + MOVW $SYS_exit, R7 + SWI $0 + MOVW $1234, R0 + MOVW $1004, R1 + MOVW R0, (R1) // fail hard + JMP 0(PC) + TEXT runtime·gettid(SB),NOSPLIT,$0-4 MOVW $SYS_gettid, R7 SWI $0 @@ -157,8 +173,12 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 SWI $0 MOVW $0xfffff001, R6 CMP R6, R0 + MOVW $0, R1 RSB.HI $0, R0 - MOVW R0, ret+24(FP) + MOVW.HI R0, R1 // if error, put in R1 + MOVW.HI $0, R0 + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 @@ -317,7 +337,7 @@ nog: SUB $16, R13 // restore the stack pointer to avoid memory corruption MOVW $0, R0 MOVW R0, 4(R13) - BL runtime·exit1(SB) + BL exit1<>(SB) MOVW $1234, R0 MOVW $1005, R1 diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s index e921f9906c..758e68575b 100644 --- a/src/runtime/sys_linux_arm64.s +++ b/src/runtime/sys_linux_arm64.s @@ -54,11 +54,16 @@ TEXT runtime·exit(SB),NOSPLIT,$-8-4 SVC RET -TEXT runtime·exit1(SB),NOSPLIT,$-8-4 - MOVW code+0(FP), R0 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$-8-8 + MOVD wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R1 + STLRW R1, (R0) + MOVW $0, R0 // exit code MOVD $SYS_exit, R8 SVC - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8-20 MOVD $AT_FDCWD, R0 @@ -273,9 +278,14 @@ TEXT runtime·mmap(SB),NOSPLIT,$-8 MOVD $SYS_mmap, R8 SVC CMN $4095, R0 - BCC 2(PC) + BCC ok NEG R0,R0 - MOVD R0, ret+32(FP) + MOVD $0, p+32(FP) + MOVD R0, err+40(FP) + RET +ok: + MOVD R0, p+32(FP) + MOVD $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$-8 diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s index 27de7b0901..7402ae21d4 100644 --- a/src/runtime/sys_linux_mips64x.s +++ b/src/runtime/sys_linux_mips64x.s @@ -53,11 +53,18 @@ TEXT runtime·exit(SB),NOSPLIT,$-8-4 SYSCALL RET -TEXT runtime·exit1(SB),NOSPLIT,$-8-4 - MOVW code+0(FP), R4 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$-8-8 + MOVV wait+0(FP), R1 + // We're done using the stack. + MOVW $0, R2 + SYNC + MOVW R2, (R1) + SYNC + MOVW $0, R4 // exit code MOVV $SYS_exit, R2 SYSCALL - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8-20 MOVV name+0(FP), R4 @@ -262,7 +269,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$-8 MOVV $SYS_mmap, R2 SYSCALL - MOVV R2, ret+32(FP) + BEQ R7, ok + MOVV $0, p+32(FP) + MOVV R2, err+40(FP) + RET +ok: + MOVV R2, p+32(FP) + MOVV $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$-8 diff --git a/src/runtime/sys_linux_mipsx.s b/src/runtime/sys_linux_mipsx.s index 39bd731a4d..6bd0267ea2 100644 --- a/src/runtime/sys_linux_mipsx.s +++ b/src/runtime/sys_linux_mipsx.s @@ -54,12 +54,19 @@ TEXT runtime·exit(SB),NOSPLIT,$0-4 UNDEF RET -TEXT runtime·exit1(SB),NOSPLIT,$0-4 - MOVW code+0(FP), R4 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVW wait+0(FP), R1 + // We're done using the stack. + MOVW $0, R2 + SYNC + MOVW R2, (R1) + SYNC + MOVW $0, R4 // exit code MOVW $SYS_exit, R2 SYSCALL UNDEF - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$0-16 MOVW name+0(FP), R4 @@ -272,7 +279,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$12 TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 JMP runtime·sigtramp(SB) -TEXT runtime·mmap(SB),NOSPLIT,$20-28 +TEXT runtime·mmap(SB),NOSPLIT,$20-32 MOVW addr+0(FP), R4 MOVW n+4(FP), R5 MOVW prot+8(FP), R6 @@ -284,7 +291,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$20-28 MOVW $SYS_mmap, R2 SYSCALL - MOVW R2, ret+24(FP) + BEQ R7, ok + MOVW $0, p+24(FP) + MOVW R2, err+28(FP) + RET +ok: + MOVW R2, p+24(FP) + MOVW $0, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0-8 diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s index 2b2aa61d06..9b45f94e65 100644 --- a/src/runtime/sys_linux_ppc64x.s +++ b/src/runtime/sys_linux_ppc64x.s @@ -54,10 +54,16 @@ TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 SYSCALL $SYS_exit_group RET -TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4 - MOVW code+0(FP), R3 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8 + MOVD wait+0(FP), R1 + // We're done using the stack. + MOVW $0, R2 + SYNC + MOVW R2, (R1) + MOVW $0, R3 // exit code SYSCALL $SYS_exit - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20 MOVD name+0(FP), R3 @@ -244,7 +250,96 @@ TEXT runtime·_sigtramp(SB),NOSPLIT,$64 #ifdef GOARCH_ppc64le // ppc64le doesn't need function descriptors -TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 +TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0 + // The stack unwinder, presumably written in C, may not be able to + // handle Go frame correctly. So, this function is NOFRAME, and we + // we save/restore LR manually. + MOVD LR, R10 + + // We're coming from C code, initialize essential registers. + CALL runtime·reginit(SB) + + // If no traceback function, do usual sigtramp. + MOVD runtime·cgoTraceback(SB), R6 + CMP $0, R6 + BEQ sigtramp + + // If no traceback support function, which means that + // runtime/cgo was not linked in, do usual sigtramp. + MOVD _cgo_callers(SB), R6 + CMP $0, R6 + BEQ sigtramp + + // Set up g register. + CALL runtime·load_g(SB) + + // Figure out if we are currently in a cgo call. + // If not, just do usual sigtramp. + CMP $0, g + BEQ sigtrampnog // g == nil + MOVD g_m(g), R6 + CMP $0, R6 + BEQ sigtramp // g.m == nil + MOVW m_ncgo(R6), R7 + CMPW $0, R7 + BEQ sigtramp // g.m.ncgo = 0 + MOVD m_curg(R6), R7 + CMP $0, R7 + BEQ sigtramp // g.m.curg == nil + MOVD g_syscallsp(R7), R7 + CMP $0, R7 + BEQ sigtramp // g.m.curg.syscallsp == 0 + MOVD m_cgoCallers(R6), R7 // R7 is the fifth arg in C calling convention. + CMP $0, R7 + BEQ sigtramp // g.m.cgoCallers == nil + MOVW m_cgoCallersUse(R6), R8 + CMPW $0, R8 + BNE sigtramp // g.m.cgoCallersUse != 0 + + // Jump to a function in runtime/cgo. + // That function, written in C, will call the user's traceback + // function with proper unwind info, and will then call back here. + // The first three arguments, and the fifth, are already in registers. + // Set the two remaining arguments now. + MOVD runtime·cgoTraceback(SB), R6 + MOVD $runtime·sigtramp(SB), R8 + MOVD _cgo_callers(SB), R12 + MOVD R12, CTR + MOVD R10, LR // restore LR + JMP (CTR) + +sigtramp: + MOVD R10, LR // restore LR + JMP runtime·sigtramp(SB) + +sigtrampnog: + // Signal arrived on a non-Go thread. If this is SIGPROF, get a + // stack trace. + CMPW R3, $27 // 27 == SIGPROF + BNE sigtramp + + // Lock sigprofCallersUse (cas from 0 to 1). + MOVW $1, R7 + MOVD $runtime·sigprofCallersUse(SB), R8 + SYNC + LWAR (R8), R6 + CMPW $0, R6 + BNE sigtramp + STWCCC R7, (R8) + BNE -4(PC) + ISYNC + + // Jump to the traceback function in runtime/cgo. + // It will call back to sigprofNonGo, which will ignore the + // arguments passed in registers. + // First three arguments to traceback function are in registers already. + MOVD runtime·cgoTraceback(SB), R6 + MOVD $runtime·sigprofCallers(SB), R7 + MOVD $runtime·sigprofNonGoWrapper<>(SB), R8 + MOVD _cgo_callers(SB), R12 + MOVD R12, CTR + MOVD R10, LR // restore LR + JMP (CTR) #else // function descriptor for the real sigtramp TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0 @@ -252,10 +347,14 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0 DWORD $0 DWORD $0 TEXT runtime·_cgoSigtramp(SB),NOSPLIT,$0 + JMP runtime·sigtramp(SB) #endif - MOVD $runtime·sigtramp(SB), R12 - MOVD R12, CTR - JMP (CTR) + +TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT,$0 + // We're coming from C code, set up essential register, then call sigprofNonGo. + CALL runtime·reginit(SB) + CALL runtime·sigprofNonGo(SB) + RET TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0 MOVD addr+0(FP), R3 @@ -266,7 +365,13 @@ TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0 MOVW off+28(FP), R8 SYSCALL $SYS_mmap - MOVD R3, ret+32(FP) + BVC ok + MOVD $0, p+32(FP) + MOVD R3, err+40(FP) + RET +ok: + MOVD R3, p+32(FP) + MOVD $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s index b8099e2553..72b024434f 100644 --- a/src/runtime/sys_linux_s390x.s +++ b/src/runtime/sys_linux_s390x.s @@ -49,11 +49,16 @@ TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 SYSCALL RET -TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4 - MOVW code+0(FP), R2 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8 + MOVD wait+0(FP), R1 + // We're done using the stack. + MOVW $0, R2 + MOVW R2, (R1) + MOVW $0, R2 // exit code MOVW $SYS_exit, R1 SYSCALL - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20 MOVD name+0(FP), R2 @@ -246,7 +251,7 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 BR runtime·sigtramp(SB) // func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer -TEXT runtime·mmap(SB),NOSPLIT,$48-40 +TEXT runtime·mmap(SB),NOSPLIT,$48-48 MOVD addr+0(FP), R2 MOVD n+8(FP), R3 MOVW prot+16(FP), R4 @@ -267,9 +272,14 @@ TEXT runtime·mmap(SB),NOSPLIT,$48-40 MOVW $SYS_mmap, R1 SYSCALL MOVD $-4095, R3 - CMPUBLT R2, R3, 2(PC) + CMPUBLT R2, R3, ok NEG R2 - MOVD R2, ret+32(FP) + MOVD $0, p+32(FP) + MOVD R2, err+40(FP) + RET +ok: + MOVD R2, p+32(FP) + MOVD $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s index d945453970..cdc8ff1a02 100644 --- a/src/runtime/sys_nacl_386.s +++ b/src/runtime/sys_nacl_386.s @@ -16,11 +16,13 @@ TEXT runtime·exit(SB),NOSPLIT,$4 NACL_SYSCALL(SYS_exit) JMP 0(PC) -TEXT runtime·exit1(SB),NOSPLIT,$4 - MOVL code+0(FP), AX +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$4-4 + MOVL wait+0(FP), AX + // SYS_thread_exit will clear *wait when the stack is free. MOVL AX, 0(SP) NACL_SYSCALL(SYS_thread_exit) - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$12 MOVL name+0(FP), AX @@ -228,9 +230,14 @@ TEXT runtime·mmap(SB),NOSPLIT,$32 MOVL AX, 20(SP) NACL_SYSCALL(SYS_mmap) CMPL AX, $-4095 - JNA 2(PC) + JNA ok NEGL AX - MOVL AX, ret+24(FP) + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·walltime(SB),NOSPLIT,$20 diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s index 2a3998391c..ff4c2e7bb5 100644 --- a/src/runtime/sys_nacl_amd64p32.s +++ b/src/runtime/sys_nacl_amd64p32.s @@ -19,10 +19,12 @@ TEXT runtime·exit(SB),NOSPLIT,$0 NACL_SYSCALL(SYS_exit) RET -TEXT runtime·exit1(SB),NOSPLIT,$0 - MOVL code+0(FP), DI +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), DI + // SYS_thread_exit will clear *wait when the stack is free. NACL_SYSCALL(SYS_thread_exit) - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$0 MOVL name+0(FP), DI @@ -237,9 +239,14 @@ TEXT runtime·mmap(SB),NOSPLIT,$8 MOVL SP, R9 NACL_SYSCALL(SYS_mmap) CMPL AX, $-4095 - JNA 2(PC) + JNA ok NEGL AX - MOVL AX, ret+24(FP) + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·walltime(SB),NOSPLIT,$16 diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s index 6a6ef4e6b2..6e01fe42e8 100644 --- a/src/runtime/sys_nacl_arm.s +++ b/src/runtime/sys_nacl_arm.s @@ -15,10 +15,12 @@ TEXT runtime·exit(SB),NOSPLIT,$0 NACL_SYSCALL(SYS_exit) RET -TEXT runtime·exit1(SB),NOSPLIT,$0 - MOVW code+0(FP), R0 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$4-4 + MOVW wait+0(FP), R0 + // SYS_thread_exit will clear *wait when the stack is free. NACL_SYSCALL(SYS_thread_exit) - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$0 MOVW name+0(FP), R0 @@ -192,8 +194,12 @@ TEXT runtime·mmap(SB),NOSPLIT,$8 NACL_SYSCALL(SYS_mmap) MOVM.IA.W (R13), [R4, R5] CMP $-4095, R0 + MOVW $0, R1 RSB.HI $0, R0 - MOVW R0, ret+24(FP) + MOVW.HI R0, R1 // if error, put in R1 + MOVW.HI $0, R0 + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·walltime(SB),NOSPLIT,$16 diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s index 742193cf28..4042ab4f8a 100644 --- a/src/runtime/sys_netbsd_386.s +++ b/src/runtime/sys_netbsd_386.s @@ -17,12 +17,15 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-4 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) MOVL $310, AX // sys__lwp_exit INT $0x80 - JAE 2(PC) MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-4 MOVL $5, AX @@ -113,7 +116,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$36 STOSL MOVL $197, AX // sys_mmap INT $0x80 - MOVL AX, ret+24(FP) + JAE ok + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$-4 @@ -155,7 +164,7 @@ TEXT runtime·walltime(SB), NOSPLIT, $32 // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB),NOSPLIT,$32 LEAL 12(SP), BX - MOVL $0, 4(SP) // arg 1 - clock_id + MOVL $3, 4(SP) // arg 1 - clock_id CLOCK_MONOTONIC MOVL BX, 8(SP) // arg 2 - tp MOVL $427, AX // sys_clock_gettime INT $0x80 @@ -298,7 +307,7 @@ TEXT runtime·lwp_tramp(SB),NOSPLIT,$0 // Call fn CALL SI - CALL runtime·exit1(SB) + // fn should never return MOVL $0x1234, 0x1005 RET @@ -337,9 +346,9 @@ TEXT runtime·osyield(SB),NOSPLIT,$-4 RET TEXT runtime·lwp_park(SB),NOSPLIT,$-4 - MOVL $434, AX // sys__lwp_park + MOVL $478, AX // sys__lwp_park INT $0x80 - MOVL AX, ret+16(FP) + MOVL AX, ret+24(FP) RET TEXT runtime·lwp_unpark(SB),NOSPLIT,$-4 @@ -366,10 +375,12 @@ TEXT runtime·sysctl(SB),NOSPLIT,$28 MOVSL // arg 6 - newlen MOVL $202, AX // sys___sysctl INT $0x80 - JCC 3(PC) + JAE 4(PC) NEGL AX + MOVL AX, ret+24(FP) RET MOVL $0, AX + MOVL AX, ret+24(FP) RET GLOBL runtime·tlsoffset(SB),NOPTR,$4 diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s index c632a0b969..11b9c1b417 100644 --- a/src/runtime/sys_netbsd_amd64.s +++ b/src/runtime/sys_netbsd_amd64.s @@ -48,13 +48,15 @@ TEXT runtime·osyield(SB),NOSPLIT,$0 RET TEXT runtime·lwp_park(SB),NOSPLIT,$0 - MOVQ abstime+0(FP), DI // arg 1 - abstime - MOVL unpark+8(FP), SI // arg 2 - unpark - MOVQ hint+16(FP), DX // arg 3 - hint - MOVQ unparkhint+24(FP), R10 // arg 4 - unparkhint - MOVL $434, AX // sys__lwp_park + MOVL clockid+0(FP), DI // arg 1 - clockid + MOVL flags+4(FP), SI // arg 2 - flags + MOVQ ts+8(FP), DX // arg 3 - ts + MOVL unpark+16(FP), R10 // arg 4 - unpark + MOVQ hint+24(FP), R8 // arg 5 - hint + MOVQ unparkhint+32(FP), R9 // arg 6 - unparkhint + MOVL $478, AX // sys__lwp_park SYSCALL - MOVL AX, ret+32(FP) + MOVL AX, ret+40(FP) RET TEXT runtime·lwp_unpark(SB),NOSPLIT,$0 @@ -79,11 +81,15 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-8 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) MOVL $310, AX // sys__lwp_exit SYSCALL MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8 MOVQ name+0(FP), DI // arg 1 pathname @@ -184,7 +190,7 @@ TEXT runtime·walltime(SB), NOSPLIT, $32 RET TEXT runtime·nanotime(SB),NOSPLIT,$32 - MOVQ $0, DI // arg 1 - clock_id + MOVQ $3, DI // arg 1 - clock_id CLOCK_MONOTONIC LEAQ 8(SP), SI // arg 2 - tp MOVL $427, AX // sys_clock_gettime SYSCALL @@ -286,8 +292,15 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVQ $0, R9 // arg 6 - pad MOVL $197, AX // sys_mmap SYSCALL + JCC ok + ADDQ $16, SP + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: ADDQ $16, SP - MOVQ AX, ret+32(FP) + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s index 789b12ef5b..7d2e290dd9 100644 --- a/src/runtime/sys_netbsd_arm.s +++ b/src/runtime/sys_netbsd_arm.s @@ -18,12 +18,21 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVW.CS R8, (R8) RET -TEXT runtime·exit1(SB),NOSPLIT,$-4 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVW wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R2 +storeloop: + LDREX (R0), R4 // loads R4 + STREX R2, (R0), R1 // stores R2 + CMP $0, R1 + BNE storeloop SWI $0xa00136 // sys__lwp_exit MOVW $1, R8 // crash MOVW R8, (R8) - RET - + JMP 0(PC) + TEXT runtime·open(SB),NOSPLIT,$-8 MOVW name+0(FP), R0 MOVW mode+4(FP), R1 @@ -71,13 +80,17 @@ TEXT runtime·osyield(SB),NOSPLIT,$0 SWI $0xa0015e // sys_sched_yield RET -TEXT runtime·lwp_park(SB),NOSPLIT,$0 - MOVW abstime+0(FP), R0 // arg 1 - abstime - MOVW unpark+4(FP), R1 // arg 2 - unpark - MOVW hint+8(FP), R2 // arg 3 - hint - MOVW unparkhint+12(FP), R3 // arg 4 - unparkhint - SWI $0xa001b2 // sys__lwp_park - MOVW R0, ret+16(FP) +TEXT runtime·lwp_park(SB),NOSPLIT,$8 + MOVW clockid+0(FP), R0 // arg 1 - clock_id + MOVW flags+4(FP), R1 // arg 2 - flags + MOVW ts+8(FP), R2 // arg 3 - ts + MOVW unpark+12(FP), R3 // arg 4 - unpark + MOVW hint+16(FP), R4 // arg 5 - hint + MOVW R4, 4(R13) + MOVW unparkhint+20(FP), R5 // arg 6 - unparkhint + MOVW R5, 8(R13) + SWI $0xa001de // sys__lwp_park + MOVW R0, ret+24(FP) RET TEXT runtime·lwp_unpark(SB),NOSPLIT,$0 @@ -155,7 +168,7 @@ TEXT runtime·walltime(SB), NOSPLIT, $32 // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB), NOSPLIT, $32 - MOVW $0, R0 // CLOCK_REALTIME + MOVW $3, R0 // CLOCK_MONOTONIC MOVW $8(R13), R1 SWI $0xa001ab // clock_gettime @@ -255,7 +268,11 @@ TEXT runtime·mmap(SB),NOSPLIT,$12 ADD $4, R13 // pass arg 5 and arg 6 on stack SWI $0xa000c5 // sys_mmap SUB $4, R13 - MOVW R0, ret+24(FP) + MOVW $0, R1 + MOVW.CS R0, R1 // if error, move to R1 + MOVW.CS $0, R0 + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s index fb2a6883df..475a937798 100644 --- a/src/runtime/sys_openbsd_386.s +++ b/src/runtime/sys_openbsd_386.s @@ -19,14 +19,21 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$8 - MOVL $0, 0(SP) - MOVL $0, 4(SP) // arg 1 - notdead +GLOBL exitStack<>(SB),RODATA,$8 +DATA exitStack<>+0x00(SB)/4, $0 +DATA exitStack<>+0x04(SB)/4, $0 + +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVL wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) + // sys__lwp_exit takes 1 argument, which it expects on the stack. + MOVL $exitStack<>(SB), SP MOVL $302, AX // sys___threxit INT $0x80 - JAE 2(PC) MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-4 MOVL $5, AX @@ -118,7 +125,13 @@ TEXT runtime·mmap(SB),NOSPLIT,$36 STOSL MOVL $197, AX // sys_mmap INT $0x80 - MOVL AX, ret+24(FP) + JAE ok + MOVL $0, p+24(FP) + MOVL AX, err+28(FP) + RET +ok: + MOVL AX, p+24(FP) + MOVL $0, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$-4 @@ -308,7 +321,7 @@ TEXT runtime·tfork(SB),NOSPLIT,$12 // Call fn. CALL SI - CALL runtime·exit1(SB) + // fn should never return. MOVL $0x1234, 0x1005 RET diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s index 9a52e5d9ef..658f2c49dc 100644 --- a/src/runtime/sys_openbsd_amd64.s +++ b/src/runtime/sys_openbsd_amd64.s @@ -88,12 +88,16 @@ TEXT runtime·exit(SB),NOSPLIT,$-8 MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),NOSPLIT,$-8 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-8 + MOVQ wait+0(FP), AX + // We're done using the stack. + MOVL $0, (AX) MOVQ $0, DI // arg 1 - notdead MOVL $302, AX // sys___threxit SYSCALL MOVL $0xf1, 0xf1 // crash - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-8 MOVQ name+0(FP), DI // arg 1 pathname @@ -278,8 +282,15 @@ TEXT runtime·mmap(SB),NOSPLIT,$0 MOVQ $0, R9 // arg 6 - pad MOVL $197, AX SYSCALL + JCC ok + ADDQ $16, SP + MOVQ $0, p+32(FP) + MOVQ AX, err+40(FP) + RET +ok: ADDQ $16, SP - MOVQ AX, ret+32(FP) + MOVQ AX, p+32(FP) + MOVQ $0, err+40(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_openbsd_arm.s b/src/runtime/sys_openbsd_arm.s index 93a5d5b7f6..ea7538630d 100644 --- a/src/runtime/sys_openbsd_arm.s +++ b/src/runtime/sys_openbsd_arm.s @@ -22,13 +22,22 @@ TEXT runtime·exit(SB),NOSPLIT,$-4 MOVW.CS R8, (R8) RET -TEXT runtime·exit1(SB),NOSPLIT,$-4 +// func exitThread(wait *uint32) +TEXT runtime·exitThread(SB),NOSPLIT,$0-4 + MOVW wait+0(FP), R0 + // We're done using the stack. + MOVW $0, R2 +storeloop: + LDREX (R0), R4 // loads R4 + STREX R2, (R0), R1 // stores R2 + CMP $0, R1 + BNE storeloop MOVW $0, R0 // arg 1 - notdead MOVW $302, R12 // sys___threxit SWI $0 MOVW.CS $1, R8 // crash on syscall failure MOVW.CS R8, (R8) - RET + JMP 0(PC) TEXT runtime·open(SB),NOSPLIT,$-4 MOVW name+0(FP), R0 // arg 1 - path @@ -120,7 +129,11 @@ TEXT runtime·mmap(SB),NOSPLIT,$16 MOVW $197, R12 // sys_mmap SWI $0 SUB $4, R13 - MOVW R0, ret+24(FP) + MOVW $0, R1 + MOVW.CS R0, R1 // if error, move to R1 + MOVW.CS $0, R0 + MOVW R0, p+24(FP) + MOVW R1, err+28(FP) RET TEXT runtime·munmap(SB),NOSPLIT,$0 @@ -269,7 +282,7 @@ TEXT runtime·tfork(SB),NOSPLIT,$0 // Call fn. BL (R6) - BL runtime·exit1(SB) + // fn should never return. MOVW $2, R8 // crash if reached MOVW R8, (R8) RET diff --git a/src/runtime/sys_plan9_386.s b/src/runtime/sys_plan9_386.s index 688bd2371a..47dcb8db04 100644 --- a/src/runtime/sys_plan9_386.s +++ b/src/runtime/sys_plan9_386.s @@ -139,7 +139,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0 MOVL AX, ret+4(FP) RET -TEXT runtime·tstart_plan9(SB),NOSPLIT,$0 +TEXT runtime·tstart_plan9(SB),NOSPLIT,$4 MOVL newm+0(FP), CX MOVL m_g0(CX), DX @@ -163,8 +163,10 @@ TEXT runtime·tstart_plan9(SB),NOSPLIT,$0 CALL runtime·stackcheck(SB) // smashes AX, CX CALL runtime·mstart(SB) - MOVL $0x1234, 0x1234 // not reached - RET + // Exit the thread. + MOVL $0, 0(SP) + CALL runtime·exits(SB) + JMP 0(PC) // void sigtramp(void *ureg, int8 *note) TEXT runtime·sigtramp(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_plan9_amd64.s b/src/runtime/sys_plan9_amd64.s index d7bd92c1b4..8077d6d324 100644 --- a/src/runtime/sys_plan9_amd64.s +++ b/src/runtime/sys_plan9_amd64.s @@ -136,7 +136,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0 MOVL AX, ret+8(FP) RET -TEXT runtime·tstart_plan9(SB),NOSPLIT,$0 +TEXT runtime·tstart_plan9(SB),NOSPLIT,$8 MOVQ newm+0(FP), CX MOVQ m_g0(CX), DX @@ -160,8 +160,10 @@ TEXT runtime·tstart_plan9(SB),NOSPLIT,$0 CALL runtime·stackcheck(SB) // smashes AX, CX CALL runtime·mstart(SB) - MOVQ $0x1234, 0x1234 // not reached - RET + // Exit the thread. + MOVQ $0, 0(SP) + CALL runtime·exits(SB) + JMP 0(PC) // This is needed by asm_amd64.s TEXT runtime·settls(SB),NOSPLIT,$0 diff --git a/src/runtime/sys_plan9_arm.s b/src/runtime/sys_plan9_arm.s index 94a6f63974..efaf60337b 100644 --- a/src/runtime/sys_plan9_arm.s +++ b/src/runtime/sys_plan9_arm.s @@ -207,7 +207,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0-8 RET //func tstart_plan9(newm *m) -TEXT runtime·tstart_plan9(SB),NOSPLIT,$0-4 +TEXT runtime·tstart_plan9(SB),NOSPLIT,$4-4 MOVW newm+0(FP), R1 MOVW m_g0(R1), g @@ -226,9 +226,11 @@ TEXT runtime·tstart_plan9(SB),NOSPLIT,$0-4 BL runtime·mstart(SB) - MOVW $0x1234, R0 - MOVW R0, 0(R0) // not reached - RET + // Exit the thread. + MOVW $0, R0 + MOVW R0, 4(R13) + CALL runtime·exits(SB) + JMP 0(PC) //func sigtramp(ureg, note unsafe.Pointer) TEXT runtime·sigtramp(SB),NOSPLIT,$0-8 diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s index aeb2e2c897..2b6dabab99 100644 --- a/src/runtime/sys_solaris_amd64.s +++ b/src/runtime/sys_solaris_amd64.s @@ -183,9 +183,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0 JMP exit allgood: - // save g - MOVQ R10, 80(SP) - // Save m->libcall and m->scratch. We need to do this because we // might get interrupted by a signal in runtime·asmcgocall. @@ -223,19 +220,11 @@ allgood: MOVL 0(R10), R10 MOVQ R10, 160(SP) - MOVQ g(BX), R10 - // g = m->gsignal - MOVQ m_gsignal(BP), BP - MOVQ BP, g(BX) - - // TODO: If current SP is not in gsignal.stack, then adjust. - // prepare call MOVQ DI, 0(SP) MOVQ SI, 8(SP) MOVQ DX, 16(SP) - MOVQ R10, 24(SP) - CALL runtime·sighandler(SB) + CALL runtime·sigtrampgo(SB) get_tls(BX) MOVQ g(BX), BP @@ -273,10 +262,6 @@ allgood: MOVQ 160(SP), R10 MOVL R10, 0(R11) - // restore g - MOVQ 80(SP), R10 - MOVQ R10, g(BX) - exit: // restore registers MOVQ 32(SP), BX diff --git a/src/runtime/sys_x86.go b/src/runtime/sys_x86.go index 7e4e27354e..2b4ed8bdf5 100644 --- a/src/runtime/sys_x86.go +++ b/src/runtime/sys_x86.go @@ -11,7 +11,7 @@ import ( "unsafe" ) -// adjust Gobuf as it if executed a call to fn with context ctxt +// adjust Gobuf as if it executed a call to fn with context ctxt // and then did an immediate gosave. func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) { sp := buf.sp diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index ca8ea8b04f..134d4dbd99 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -93,6 +93,8 @@ const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 //go:linkname syscall_loadsystemlibrary syscall.loadsystemlibrary //go:nosplit func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall if useLoadLibraryEx { @@ -126,6 +128,8 @@ func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) { //go:linkname syscall_loadlibrary syscall.loadlibrary //go:nosplit func syscall_loadlibrary(filename *uint16) (handle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = getLoadLibrary() c.n = 1 @@ -141,6 +145,8 @@ func syscall_loadlibrary(filename *uint16) (handle, err uintptr) { //go:linkname syscall_getprocaddress syscall.getprocaddress //go:nosplit func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = getGetProcAddress() c.n = 2 @@ -156,6 +162,8 @@ func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uint //go:linkname syscall_Syscall syscall.Syscall //go:nosplit func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -167,6 +175,8 @@ func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) { //go:linkname syscall_Syscall6 syscall.Syscall6 //go:nosplit func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -178,6 +188,8 @@ func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err ui //go:linkname syscall_Syscall9 syscall.Syscall9 //go:nosplit func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -189,6 +201,8 @@ func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1 //go:linkname syscall_Syscall12 syscall.Syscall12 //go:nosplit func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -200,6 +214,8 @@ func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, //go:linkname syscall_Syscall15 syscall.Syscall15 //go:nosplit func syscall_Syscall15(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -207,9 +223,3 @@ func syscall_Syscall15(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, cgocall(asmstdcallAddr, unsafe.Pointer(c)) return c.r1, c.r2, c.err } - -//go:linkname syscall_exit syscall.Exit -//go:nosplit -func syscall_exit(code int) { - exit(int32(code)) -} diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go index 3da154dfa8..dfde12a211 100644 --- a/src/runtime/syscall_windows_test.go +++ b/src/runtime/syscall_windows_test.go @@ -15,6 +15,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -537,6 +538,17 @@ func TestWERDialogue(t *testing.T) { cmd.CombinedOutput() } +func TestWindowsStackMemory(t *testing.T) { + o := runTestProg(t, "testprog", "StackMemory") + stackUsage, err := strconv.Atoi(o) + if err != nil { + t.Fatalf("Failed to read stack usage: %v", err) + } + if expected, got := 100<<10, stackUsage; got > expected { + t.Fatalf("expected < %d bytes of memory per thread, got %d", expected, got) + } +} + var used byte func use(buf []byte) { @@ -1043,7 +1055,7 @@ func BenchmarkRunningGoProgram(b *testing.B) { } exe := filepath.Join(tmpdir, "main.exe") - cmd := exec.Command("go", "build", "-o", exe, src) + cmd := exec.Command(testenv.GoToolPath(b), "build", "-o", exe, src) cmd.Dir = tmpdir out, err := cmd.CombinedOutput() if err != nil { diff --git a/src/runtime/testdata/testprog/gettid.go b/src/runtime/testdata/testprog/gettid.go new file mode 100644 index 0000000000..1b3e29ab08 --- /dev/null +++ b/src/runtime/testdata/testprog/gettid.go @@ -0,0 +1,29 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux + +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "syscall" +) + +func gettid() int { + return syscall.Gettid() +} + +func tidExists(tid int) (exists, supported bool) { + stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid)) + if os.IsNotExist(err) { + return false, true + } + // Check if it's a zombie thread. + state := bytes.Fields(stat)[2] + return !(len(state) == 1 && state[0] == 'Z'), true +} diff --git a/src/runtime/testdata/testprog/gettid_none.go b/src/runtime/testdata/testprog/gettid_none.go new file mode 100644 index 0000000000..036db87e10 --- /dev/null +++ b/src/runtime/testdata/testprog/gettid_none.go @@ -0,0 +1,15 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !linux + +package main + +func gettid() int { + return 0 +} + +func tidExists(tid int) (exists, supported bool) { + return false, false +} diff --git a/src/runtime/testdata/testprog/lockosthread.go b/src/runtime/testdata/testprog/lockosthread.go new file mode 100644 index 0000000000..88c0d12e4c --- /dev/null +++ b/src/runtime/testdata/testprog/lockosthread.go @@ -0,0 +1,94 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "os" + "runtime" + "time" +) + +var mainTID int + +func init() { + registerInit("LockOSThreadMain", func() { + // init is guaranteed to run on the main thread. + mainTID = gettid() + }) + register("LockOSThreadMain", LockOSThreadMain) + + registerInit("LockOSThreadAlt", func() { + // Lock the OS thread now so main runs on the main thread. + runtime.LockOSThread() + }) + register("LockOSThreadAlt", LockOSThreadAlt) +} + +func LockOSThreadMain() { + // gettid only works on Linux, so on other platforms this just + // checks that the runtime doesn't do anything terrible. + + // This requires GOMAXPROCS=1 from the beginning to reliably + // start a goroutine on the main thread. + if runtime.GOMAXPROCS(-1) != 1 { + println("requires GOMAXPROCS=1") + os.Exit(1) + } + + ready := make(chan bool, 1) + go func() { + // Because GOMAXPROCS=1, this *should* be on the main + // thread. Stay there. + runtime.LockOSThread() + if mainTID != 0 && gettid() != mainTID { + println("failed to start goroutine on main thread") + os.Exit(1) + } + // Exit with the thread locked, which should exit the + // main thread. + ready <- true + }() + <-ready + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is still running on a different + // thread. + if mainTID != 0 && gettid() == mainTID { + println("goroutine migrated to locked thread") + os.Exit(1) + } + println("OK") +} + +func LockOSThreadAlt() { + // This is running locked to the main OS thread. + + var subTID int + ready := make(chan bool, 1) + go func() { + // This goroutine must be running on a new thread. + runtime.LockOSThread() + subTID = gettid() + ready <- true + // Exit with the thread locked. + }() + <-ready + runtime.UnlockOSThread() + for i := 0; i < 100; i++ { + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is running on a different thread. + if subTID != 0 && gettid() == subTID { + println("locked thread reused") + os.Exit(1) + } + exists, supported := tidExists(subTID) + if !supported || !exists { + goto ok + } + } + println("sub thread", subTID, "still running") + return +ok: + println("OK") +} diff --git a/src/runtime/testdata/testprog/syscall_windows.go b/src/runtime/testdata/testprog/syscall_windows.go index 6e6782e987..b4b66441b8 100644 --- a/src/runtime/testdata/testprog/syscall_windows.go +++ b/src/runtime/testdata/testprog/syscall_windows.go @@ -4,11 +4,18 @@ package main -import "syscall" +import ( + "internal/syscall/windows" + "runtime" + "sync" + "syscall" + "unsafe" +) func init() { register("RaiseException", RaiseException) register("ZeroDivisionException", ZeroDivisionException) + register("StackMemory", StackMemory) } func RaiseException() { @@ -25,3 +32,39 @@ func ZeroDivisionException() { z := x / y println(z) } + +func getPagefileUsage() (uintptr, error) { + p, err := syscall.GetCurrentProcess() + if err != nil { + return 0, err + } + var m windows.PROCESS_MEMORY_COUNTERS + err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m))) + if err != nil { + return 0, err + } + return m.PagefileUsage, nil +} + +func StackMemory() { + mem1, err := getPagefileUsage() + if err != nil { + panic(err) + } + const threadCount = 100 + var wg sync.WaitGroup + for i := 0; i < threadCount; i++ { + wg.Add(1) + go func() { + runtime.LockOSThread() + wg.Done() + select {} + }() + } + wg.Wait() + mem2, err := getPagefileUsage() + if err != nil { + panic(err) + } + print((mem2 - mem1) / threadCount) +} diff --git a/src/runtime/testdata/testprogcgo/callback.go b/src/runtime/testdata/testprogcgo/callback.go index 7d9d68ddd1..be0409f39d 100644 --- a/src/runtime/testdata/testprogcgo/callback.go +++ b/src/runtime/testdata/testprogcgo/callback.go @@ -29,6 +29,7 @@ import "C" import ( "fmt" + "os" "runtime" ) @@ -63,7 +64,10 @@ func grow1(x, sum *int) int { } func CgoCallbackGC() { - const P = 100 + P := 100 + if os.Getenv("RUNTIME_TESTING_SHORT") != "" { + P = 10 + } done := make(chan bool) // allocate a bunch of stack frames and spray them with pointers for i := 0; i < P; i++ { diff --git a/src/runtime/testdata/testprogcgo/catchpanic.go b/src/runtime/testdata/testprogcgo/catchpanic.go new file mode 100644 index 0000000000..55a606d1bc --- /dev/null +++ b/src/runtime/testdata/testprogcgo/catchpanic.go @@ -0,0 +1,46 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +/* +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +static void abrthandler(int signum) { + if (signum == SIGABRT) { + exit(0); // success + } +} + +void registerAbortHandler() { + struct sigaction act; + memset(&act, 0, sizeof act); + act.sa_handler = abrthandler; + sigaction(SIGABRT, &act, NULL); +} + +static void __attribute__ ((constructor)) sigsetup(void) { + if (getenv("CGOCATCHPANIC_EARLY_HANDLER") == NULL) + return; + registerAbortHandler(); +} +*/ +import "C" +import "os" + +func init() { + register("CgoCatchPanic", CgoCatchPanic) +} + +// Test that the SIGABRT raised by panic can be caught by an early signal handler. +func CgoCatchPanic() { + if _, ok := os.LookupEnv("CGOCATCHPANIC_EARLY_HANDLER"); !ok { + C.registerAbortHandler() + } + panic("catch me") +} diff --git a/src/runtime/testdata/testprogcgo/cgo.go b/src/runtime/testdata/testprogcgo/cgo.go index 209524a24d..a587db385b 100644 --- a/src/runtime/testdata/testprogcgo/cgo.go +++ b/src/runtime/testdata/testprogcgo/cgo.go @@ -52,7 +52,11 @@ func CgoSignalDeadlock() { time.Sleep(time.Millisecond) start := time.Now() var times []time.Duration - for i := 0; i < 64; i++ { + n := 64 + if os.Getenv("RUNTIME_TEST_SHORT") != "" { + n = 16 + } + for i := 0; i < n; i++ { go func() { runtime.LockOSThread() select {} diff --git a/src/runtime/testdata/testprogcgo/lockosthread.c b/src/runtime/testdata/testprogcgo/lockosthread.c new file mode 100644 index 0000000000..b10cc4f3b9 --- /dev/null +++ b/src/runtime/testdata/testprogcgo/lockosthread.c @@ -0,0 +1,13 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +#include <stdint.h> + +uint32_t threadExited; + +void setExited(void *x) { + __sync_fetch_and_add(&threadExited, 1); +} diff --git a/src/runtime/testdata/testprogcgo/lockosthread.go b/src/runtime/testdata/testprogcgo/lockosthread.go new file mode 100644 index 0000000000..36423d9eb0 --- /dev/null +++ b/src/runtime/testdata/testprogcgo/lockosthread.go @@ -0,0 +1,111 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +import ( + "os" + "runtime" + "sync/atomic" + "time" + "unsafe" +) + +/* +#include <pthread.h> +#include <stdint.h> + +extern uint32_t threadExited; + +void setExited(void *x); +*/ +import "C" + +var mainThread C.pthread_t + +func init() { + registerInit("LockOSThreadMain", func() { + // init is guaranteed to run on the main thread. + mainThread = C.pthread_self() + }) + register("LockOSThreadMain", LockOSThreadMain) + + registerInit("LockOSThreadAlt", func() { + // Lock the OS thread now so main runs on the main thread. + runtime.LockOSThread() + }) + register("LockOSThreadAlt", LockOSThreadAlt) +} + +func LockOSThreadMain() { + // This requires GOMAXPROCS=1 from the beginning to reliably + // start a goroutine on the main thread. + if runtime.GOMAXPROCS(-1) != 1 { + println("requires GOMAXPROCS=1") + os.Exit(1) + } + + ready := make(chan bool, 1) + go func() { + // Because GOMAXPROCS=1, this *should* be on the main + // thread. Stay there. + runtime.LockOSThread() + self := C.pthread_self() + if C.pthread_equal(mainThread, self) == 0 { + println("failed to start goroutine on main thread") + os.Exit(1) + } + // Exit with the thread locked, which should exit the + // main thread. + ready <- true + }() + <-ready + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is still running on a different + // thread. + self := C.pthread_self() + if C.pthread_equal(mainThread, self) != 0 { + println("goroutine migrated to locked thread") + os.Exit(1) + } + println("OK") +} + +func LockOSThreadAlt() { + // This is running locked to the main OS thread. + + var subThread C.pthread_t + ready := make(chan bool, 1) + C.threadExited = 0 + go func() { + // This goroutine must be running on a new thread. + runtime.LockOSThread() + subThread = C.pthread_self() + // Register a pthread destructor so we can tell this + // thread has exited. + var key C.pthread_key_t + C.pthread_key_create(&key, (*[0]byte)(unsafe.Pointer(C.setExited))) + C.pthread_setspecific(key, unsafe.Pointer(new(int))) + ready <- true + // Exit with the thread locked. + }() + <-ready + for i := 0; i < 100; i++ { + time.Sleep(1 * time.Millisecond) + // Check that this goroutine is running on a different thread. + self := C.pthread_self() + if C.pthread_equal(subThread, self) != 0 { + println("locked thread reused") + os.Exit(1) + } + if atomic.LoadUint32((*uint32)(&C.threadExited)) != 0 { + println("OK") + return + } + } + println("sub thread still running") + os.Exit(1) +} diff --git a/src/runtime/testdata/testprogcgo/sigstack.go b/src/runtime/testdata/testprogcgo/sigstack.go new file mode 100644 index 0000000000..e30a5592dc --- /dev/null +++ b/src/runtime/testdata/testprogcgo/sigstack.go @@ -0,0 +1,95 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +// Test handling of Go-allocated signal stacks when calling from +// C-created threads with and without signal stacks. (See issue +// #22930.) + +package main + +/* +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#ifndef MAP_STACK +#define MAP_STACK 0 +#endif + +extern void SigStackCallback(); + +static void* WithSigStack(void* arg __attribute__((unused))) { + // Set up an alternate system stack. + void* base = mmap(0, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON|MAP_STACK, -1, 0); + if (base == MAP_FAILED) { + perror("mmap failed"); + abort(); + } + stack_t st = {}, ost = {}; + st.ss_sp = (char*)base; + st.ss_flags = 0; + st.ss_size = SIGSTKSZ; + if (sigaltstack(&st, &ost) < 0) { + perror("sigaltstack failed"); + abort(); + } + + // Call Go. + SigStackCallback(); + + // Disable signal stack and protect it so we can detect reuse. + if (ost.ss_flags & SS_DISABLE) { + // Darwin libsystem has a bug where it checks ss_size + // even if SS_DISABLE is set. (The kernel gets it right.) + ost.ss_size = SIGSTKSZ; + } + if (sigaltstack(&ost, NULL) < 0) { + perror("sigaltstack restore failed"); + abort(); + } + mprotect(base, SIGSTKSZ, PROT_NONE); + return NULL; +} + +static void* WithoutSigStack(void* arg __attribute__((unused))) { + SigStackCallback(); + return NULL; +} + +static void DoThread(int sigstack) { + pthread_t tid; + if (sigstack) { + pthread_create(&tid, NULL, WithSigStack, NULL); + } else { + pthread_create(&tid, NULL, WithoutSigStack, NULL); + } + pthread_join(tid, NULL); +} +*/ +import "C" + +func init() { + register("SigStack", SigStack) +} + +func SigStack() { + C.DoThread(0) + C.DoThread(1) + C.DoThread(0) + C.DoThread(1) + println("OK") +} + +var BadPtr *int + +//export SigStackCallback +func SigStackCallback() { + // Cause the Go signal handler to run. + defer func() { recover() }() + *BadPtr = 42 +} diff --git a/src/runtime/testdata/testprogcgo/stack_windows.go b/src/runtime/testdata/testprogcgo/stack_windows.go new file mode 100644 index 0000000000..846297a960 --- /dev/null +++ b/src/runtime/testdata/testprogcgo/stack_windows.go @@ -0,0 +1,54 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "C" +import ( + "internal/syscall/windows" + "runtime" + "sync" + "syscall" + "unsafe" +) + +func init() { + register("StackMemory", StackMemory) +} + +func getPagefileUsage() (uintptr, error) { + p, err := syscall.GetCurrentProcess() + if err != nil { + return 0, err + } + var m windows.PROCESS_MEMORY_COUNTERS + err = windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m))) + if err != nil { + return 0, err + } + return m.PagefileUsage, nil +} + +func StackMemory() { + mem1, err := getPagefileUsage() + if err != nil { + panic(err) + } + const threadCount = 100 + var wg sync.WaitGroup + for i := 0; i < threadCount; i++ { + wg.Add(1) + go func() { + runtime.LockOSThread() + wg.Done() + select {} + }() + } + wg.Wait() + mem2, err := getPagefileUsage() + if err != nil { + panic(err) + } + print((mem2 - mem1) / threadCount) +} diff --git a/src/runtime/time.go b/src/runtime/time.go index 23f61d62d0..6c349c8461 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -6,14 +6,18 @@ package runtime -import "unsafe" +import ( + "runtime/internal/sys" + "unsafe" +) // Package time knows the layout of this structure. // If this struct changes, adjust ../time/sleep.go:/runtimeTimer. // For GOOS=nacl, package syscall knows the layout of this structure. // If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer. type timer struct { - i int // heap index + tb *timersBucket // the bucket the timer lives in + i int // heap index // Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(arg, now) in the timer goroutine, so f must be @@ -25,7 +29,37 @@ type timer struct { seq uintptr } -var timers struct { +// timersLen is the length of timers array. +// +// Ideally, this would be set to GOMAXPROCS, but that would require +// dynamic reallocation +// +// The current value is a compromise between memory usage and performance +// that should cover the majority of GOMAXPROCS values used in the wild. +const timersLen = 64 + +// timers contains "per-P" timer heaps. +// +// Timers are queued into timersBucket associated with the current P, +// so each P may work with its own timers independently of other P instances. +// +// Each timersBucket may be associated with multiple P +// if GOMAXPROCS > timersLen. +var timers [timersLen]struct { + timersBucket + + // The padding should eliminate false sharing + // between timersBucket values. + pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte +} + +func (t *timer) assignBucket() *timersBucket { + id := uint8(getg().m.p.ptr().id) % timersLen + t.tb = &timers[id].timersBucket + return t.tb +} + +type timersBucket struct { lock mutex gp *g created bool @@ -51,18 +85,20 @@ func timeSleep(ns int64) { return } - t := getg().timer + gp := getg() + t := gp.timer if t == nil { t = new(timer) - getg().timer = t + gp.timer = t } *t = timer{} t.when = nanotime() + ns t.f = goroutineReady - t.arg = getg() - lock(&timers.lock) - addtimerLocked(t) - goparkunlock(&timers.lock, "sleep", traceEvGoSleep, 2) + t.arg = gp + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2) } // startTimer adds t to the timer heap. @@ -89,87 +125,95 @@ func goroutineReady(arg interface{}, seq uintptr) { } func addtimer(t *timer) { - lock(&timers.lock) - addtimerLocked(t) - unlock(&timers.lock) + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + unlock(&tb.lock) } // Add a timer to the heap and start or kick timerproc if the new timer is // earlier than any of the others. // Timers are locked. -func addtimerLocked(t *timer) { +func (tb *timersBucket) addtimerLocked(t *timer) { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime timers. if t.when < 0 { t.when = 1<<63 - 1 } - t.i = len(timers.t) - timers.t = append(timers.t, t) - siftupTimer(t.i) + t.i = len(tb.t) + tb.t = append(tb.t, t) + siftupTimer(tb.t, t.i) if t.i == 0 { // siftup moved to top: new earliest deadline. - if timers.sleeping { - timers.sleeping = false - notewakeup(&timers.waitnote) + if tb.sleeping { + tb.sleeping = false + notewakeup(&tb.waitnote) } - if timers.rescheduling { - timers.rescheduling = false - goready(timers.gp, 0) + if tb.rescheduling { + tb.rescheduling = false + goready(tb.gp, 0) } } - if !timers.created { - timers.created = true - go timerproc() + if !tb.created { + tb.created = true + go timerproc(tb) } } // Delete timer t from the heap. // Do not need to update the timerproc: if it wakes up early, no big deal. func deltimer(t *timer) bool { - // Dereference t so that any panic happens before the lock is held. - // Discard result, because t might be moving in the heap. - _ = t.i + if t.tb == nil { + // t.tb can be nil if the user created a timer + // directly, without invoking startTimer e.g + // time.Ticker{C: c} + // In this case, return early without any deletion. + // See Issue 21874. + return false + } - lock(&timers.lock) + tb := t.tb + + lock(&tb.lock) // t may not be registered anymore and may have // a bogus i (typically 0, if generated by Go). // Verify it before proceeding. i := t.i - last := len(timers.t) - 1 - if i < 0 || i > last || timers.t[i] != t { - unlock(&timers.lock) + last := len(tb.t) - 1 + if i < 0 || i > last || tb.t[i] != t { + unlock(&tb.lock) return false } if i != last { - timers.t[i] = timers.t[last] - timers.t[i].i = i + tb.t[i] = tb.t[last] + tb.t[i].i = i } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if i != last { - siftupTimer(i) - siftdownTimer(i) + siftupTimer(tb.t, i) + siftdownTimer(tb.t, i) } - unlock(&timers.lock) + unlock(&tb.lock) return true } // Timerproc runs the time-driven events. -// It sleeps until the next event in the timers heap. +// It sleeps until the next event in the tb heap. // If addtimer inserts a new earlier event, it wakes timerproc early. -func timerproc() { - timers.gp = getg() +func timerproc(tb *timersBucket) { + tb.gp = getg() for { - lock(&timers.lock) - timers.sleeping = false + lock(&tb.lock) + tb.sleeping = false now := nanotime() delta := int64(-1) for { - if len(timers.t) == 0 { + if len(tb.t) == 0 { delta = -1 break } - t := timers.t[0] + t := tb.t[0] delta = t.when - now if delta > 0 { break @@ -177,43 +221,43 @@ func timerproc() { if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) - siftdownTimer(0) + siftdownTimer(tb.t, 0) } else { // remove from heap - last := len(timers.t) - 1 + last := len(tb.t) - 1 if last > 0 { - timers.t[0] = timers.t[last] - timers.t[0].i = 0 + tb.t[0] = tb.t[last] + tb.t[0].i = 0 } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if last > 0 { - siftdownTimer(0) + siftdownTimer(tb.t, 0) } t.i = -1 // mark as removed } f := t.f arg := t.arg seq := t.seq - unlock(&timers.lock) + unlock(&tb.lock) if raceenabled { raceacquire(unsafe.Pointer(t)) } f(arg, seq) - lock(&timers.lock) + lock(&tb.lock) } if delta < 0 || faketime > 0 { // No timers left - put goroutine to sleep. - timers.rescheduling = true - goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1) + tb.rescheduling = true + goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1) continue } // At least one timer pending. Sleep until then. - timers.sleeping = true - timers.sleepUntil = now + delta - noteclear(&timers.waitnote) - unlock(&timers.lock) - notetsleepg(&timers.waitnote, delta) + tb.sleeping = true + tb.sleepUntil = now + delta + noteclear(&tb.waitnote) + unlock(&tb.lock) + notetsleepg(&tb.waitnote, delta) } } @@ -222,28 +266,67 @@ func timejump() *g { return nil } - lock(&timers.lock) - if !timers.created || len(timers.t) == 0 { - unlock(&timers.lock) + for i := range timers { + lock(&timers[i].lock) + } + gp := timejumpLocked() + for i := range timers { + unlock(&timers[i].lock) + } + + return gp +} + +func timejumpLocked() *g { + // Determine a timer bucket with minimum when. + var minT *timer + for i := range timers { + tb := &timers[i] + if !tb.created || len(tb.t) == 0 { + continue + } + t := tb.t[0] + if minT == nil || t.when < minT.when { + minT = t + } + } + if minT == nil || minT.when <= faketime { + return nil + } + + faketime = minT.when + tb := minT.tb + if !tb.rescheduling { return nil } + tb.rescheduling = false + return tb.gp +} + +func timeSleepUntil() int64 { + next := int64(1<<63 - 1) - var gp *g - if faketime < timers.t[0].when { - faketime = timers.t[0].when - if timers.rescheduling { - timers.rescheduling = false - gp = timers.gp + // Determine minimum sleepUntil across all the timer buckets. + // + // The function can not return a precise answer, + // as another timer may pop in as soon as timers have been unlocked. + // So lock the timers one by one instead of all at once. + for i := range timers { + tb := &timers[i] + + lock(&tb.lock) + if tb.sleeping && tb.sleepUntil < next { + next = tb.sleepUntil } + unlock(&tb.lock) } - unlock(&timers.lock) - return gp + + return next } // Heap maintenance algorithms. -func siftupTimer(i int) { - t := timers.t +func siftupTimer(t []*timer, i int) { when := t[i].when tmp := t[i] for i > 0 { @@ -253,14 +336,15 @@ func siftupTimer(i int) { } t[i] = t[p] t[i].i = i - t[p] = tmp - t[p].i = p i = p } + if tmp != t[i] { + t[i] = tmp + t[i].i = i + } } -func siftdownTimer(i int) { - t := timers.t +func siftdownTimer(t []*timer, i int) { n := len(t) when := t[i].when tmp := t[i] @@ -291,10 +375,12 @@ func siftdownTimer(i int) { } t[i] = t[c] t[i].i = i - t[c] = tmp - t[c].i = c i = c } + if tmp != t[i] { + t[i] = tmp + t[i].i = i + } } // Entry points for net, time to call nanotime. diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 826dc9a999..fab797601b 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -28,8 +28,8 @@ const ( traceEvProcStop = 6 // stop of P [timestamp] traceEvGCStart = 7 // GC start [timestamp, seq, stack id] traceEvGCDone = 8 // GC done [timestamp] - traceEvGCScanStart = 9 // GC mark termination start [timestamp] - traceEvGCScanDone = 10 // GC mark termination done [timestamp] + traceEvGCSTWStart = 9 // GC STW start [timestamp, kind] + traceEvGCSTWDone = 10 // GC STW done [timestamp] traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id] traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] @@ -235,21 +235,21 @@ func StartTrace() error { trace.timeStart = nanotime() trace.headerWritten = false trace.footerWritten = false - trace.strings = make(map[string]uint64) + + // string to id mapping + // 0 : reserved for an empty string + // remaining: other strings registered by traceString trace.stringSeq = 0 + trace.strings = make(map[string]uint64) + trace.seqGC = 0 _g_.m.startingtrace = false trace.enabled = true // Register runtime goroutine labels. _, pid, bufp := traceAcquireBuffer() - buf := (*bufp).ptr() - if buf == nil { - buf = traceFlush(0).ptr() - (*bufp).set(buf) - } for i, label := range gcMarkWorkerModeStrings[:] { - trace.markWorkerLabels[i], buf = traceString(buf, label) + trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label) } traceReleaseBuffer(pid) @@ -277,10 +277,9 @@ func StopTrace() { traceGoSched() - for _, p := range &allp { - if p == nil { - break - } + // Loop over all allocated Ps because dead Ps may still have + // trace buffers. + for _, p := range allp[:cap(allp)] { buf := p.tracebuf if buf != 0 { traceFullQueue(buf) @@ -320,10 +319,7 @@ func StopTrace() { // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world. lock(&trace.lock) - for _, p := range &allp { - if p == nil { - break - } + for _, p := range allp[:cap(allp)] { if p.tracebuf != 0 { throw("trace: non-empty trace buffer in proc") } @@ -382,7 +378,7 @@ func ReadTrace() []byte { trace.headerWritten = true trace.lockOwner = nil unlock(&trace.lock) - return []byte("go 1.9 trace\x00\x00\x00\x00") + return []byte("go 1.10 trace\x00\x00\x00") } // Wait for new data. if trace.fullHead == 0 && !trace.shutdown { @@ -408,9 +404,12 @@ func ReadTrace() []byte { var data []byte data = append(data, traceEvFrequency|0<<traceArgCountShift) data = traceAppend(data, uint64(freq)) - if timers.gp != nil { - data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) - data = traceAppend(data, uint64(timers.gp.goid)) + for i := range timers { + tb := &timers[i] + if tb.gp != nil { + data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) + data = traceAppend(data, uint64(tb.gp.goid)) + } } // This will emit a bunch of full buffers, we will pick them up // on the next iteration. @@ -514,18 +513,12 @@ func traceEvent(ev byte, skip int, args ...uint64) { buf := (*bufp).ptr() const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params if buf == nil || len(buf.arr)-buf.pos < maxSize { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() (*bufp).set(buf) } ticks := uint64(cputicks()) / traceTickDiv tickDiff := ticks - buf.lastTicks - if buf.pos == 0 { - buf.byte(traceEvBatch | 1<<traceArgCountShift) - buf.varint(uint64(pid)) - buf.varint(ticks) - tickDiff = 0 - } buf.lastTicks = ticks narg := byte(len(args)) if skip >= 0 { @@ -603,7 +596,7 @@ func traceReleaseBuffer(pid int32) { } // traceFlush puts buf onto stack of full buffers and returns an empty buffer. -func traceFlush(buf traceBufPtr) traceBufPtr { +func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { owner := trace.lockOwner dolock := owner == nil || owner != getg().m.curg if dolock { @@ -624,34 +617,51 @@ func traceFlush(buf traceBufPtr) traceBufPtr { bufp := buf.ptr() bufp.link.set(nil) bufp.pos = 0 - bufp.lastTicks = 0 + + // initialize the buffer for a new batch + ticks := uint64(cputicks()) / traceTickDiv + bufp.lastTicks = ticks + bufp.byte(traceEvBatch | 1<<traceArgCountShift) + bufp.varint(uint64(pid)) + bufp.varint(ticks) + if dolock { unlock(&trace.lock) } return buf } -func traceString(buf *traceBuf, s string) (uint64, *traceBuf) { +// traceString adds a string to the trace.strings and returns the id. +func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) { if s == "" { - return 0, buf + return 0, bufp } if id, ok := trace.strings[s]; ok { - return id, buf + return id, bufp } trace.stringSeq++ id := trace.stringSeq trace.strings[s] = id + // memory allocation in above may trigger tracing and + // cause *bufp changes. Following code now works with *bufp, + // so there must be no memory allocation or any activities + // that causes tracing after this point. + + buf := (*bufp).ptr() size := 1 + 2*traceBytesPerNumber + len(s) - if len(buf.arr)-buf.pos < size { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + if buf == nil || len(buf.arr)-buf.pos < size { + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() + (*bufp).set(buf) } buf.byte(traceEvString) buf.varint(id) buf.varint(uint64(len(s))) buf.pos += copy(buf.arr[buf.pos:], s) - return id, buf + + (*bufp).set(buf) + return id, bufp } // traceAppend appends v to buf in little-endian-base-128 encoding. @@ -781,7 +791,7 @@ func allFrames(pcs []uintptr) []Frame { // releases all memory and resets state. func (tab *traceStackTable) dump() { var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte - buf := traceFlush(0).ptr() + bufp := traceFlush(0, 0) for _, stk := range tab.tab { stk := stk.ptr() for ; stk != nil; stk = stk.link.ptr() { @@ -791,7 +801,7 @@ func (tab *traceStackTable) dump() { tmpbuf = traceAppend(tmpbuf, uint64(len(frames))) for _, f := range frames { var frame traceFrame - frame, buf = traceFrameForPC(buf, f) + frame, bufp = traceFrameForPC(bufp, 0, f) tmpbuf = traceAppend(tmpbuf, uint64(f.PC)) tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID)) tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID)) @@ -799,9 +809,10 @@ func (tab *traceStackTable) dump() { } // Now copy to the buffer. size := 1 + traceBytesPerNumber + len(tmpbuf) - if len(buf.arr)-buf.pos < size { - buf = traceFlush(traceBufPtrOf(buf)).ptr() + if buf := bufp.ptr(); len(buf.arr)-buf.pos < size { + bufp = traceFlush(bufp, 0) } + buf := bufp.ptr() buf.byte(traceEvStack | 3<<traceArgCountShift) buf.varint(uint64(len(tmpbuf))) buf.pos += copy(buf.arr[buf.pos:], tmpbuf) @@ -809,7 +820,7 @@ func (tab *traceStackTable) dump() { } lock(&trace.lock) - traceFullQueue(traceBufPtrOf(buf)) + traceFullQueue(bufp) unlock(&trace.lock) tab.mem.drop() @@ -822,7 +833,10 @@ type traceFrame struct { line uint64 } -func traceFrameForPC(buf *traceBuf, f Frame) (traceFrame, *traceBuf) { +// traceFrameForPC records the frame information. +// It may allocate memory. +func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) { + bufp := &buf var frame traceFrame fn := f.Function @@ -830,14 +844,14 @@ func traceFrameForPC(buf *traceBuf, f Frame) (traceFrame, *traceBuf) { if len(fn) > maxLen { fn = fn[len(fn)-maxLen:] } - frame.funcID, buf = traceString(buf, fn) + frame.funcID, bufp = traceString(bufp, pid, fn) frame.line = uint64(f.Line) file := f.File if len(file) > maxLen { file = file[len(file)-maxLen:] } - frame.fileID, buf = traceString(buf, file) - return frame, buf + frame.fileID, bufp = traceString(bufp, pid, file) + return frame, (*bufp) } // traceAlloc is a non-thread-safe region allocator. @@ -924,12 +938,12 @@ func traceGCDone() { traceEvent(traceEvGCDone, -1) } -func traceGCScanStart() { - traceEvent(traceEvGCScanStart, -1) +func traceGCSTWStart(kind int) { + traceEvent(traceEvGCSTWStart, -1, uint64(kind)) } -func traceGCScanDone() { - traceEvent(traceEvGCScanDone, -1) +func traceGCSTWDone() { + traceEvent(traceEvGCSTWDone, -1) } // traceGCSweepStart prepares to trace a sweep loop. This does not diff --git a/src/runtime/trace/example_test.go b/src/runtime/trace/example_test.go new file mode 100644 index 0000000000..ba96a829a3 --- /dev/null +++ b/src/runtime/trace/example_test.go @@ -0,0 +1,39 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "fmt" + "log" + "os" + "runtime/trace" +) + +// Example demonstrates the use of the trace package to trace +// the execution of a Go program. The trace output will be +// written to the file trace.out +func Example() { + f, err := os.Create("trace.out") + if err != nil { + log.Fatalf("failed to create trace output file: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Fatalf("failed to close trace file: %v", err) + } + }() + + if err := trace.Start(f); err != nil { + log.Fatalf("failed to start trace: %v", err) + } + defer trace.Stop() + + // your program here + RunMyProgram() +} + +func RunMyProgram() { + fmt.Printf("this function will be traced") +} diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go index 7cbb8a6e82..439f998c03 100644 --- a/src/runtime/trace/trace.go +++ b/src/runtime/trace/trace.go @@ -2,13 +2,36 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Go execution tracer. -// The tracer captures a wide range of execution events like goroutine -// creation/blocking/unblocking, syscall enter/exit/block, GC-related events, -// changes of heap size, processor start/stop, etc and writes them to an io.Writer -// in a compact form. A precise nanosecond-precision timestamp and a stack -// trace is captured for most events. A trace can be analyzed later with -// 'go tool trace' command. +// Package trace contains facilities for programs to generate trace +// for Go execution tracer. +// +// The execution trace captures a wide range of execution events such as +// goroutine creation/blocking/unblocking, syscall enter/exit/block, +// GC-related events, changes of heap size, processor start/stop, etc. +// A precise nanosecond-precision timestamp and a stack trace is +// captured for most events. The generated trace can be interpreted +// using `go tool trace`. +// +// Tracing a Go program +// +// Support for tracing tests and benchmarks built with the standard +// testing package is built into `go test`. For example, the following +// command runs the test in the current directory and writes the trace +// file (trace.out). +// +// go test -trace=test.out +// +// This runtime/trace package provides APIs to add equivalent tracing +// support to a standalone program. See the Example that demonstrates +// how to use this API to enable tracing. +// +// There is also a standard HTTP interface to profiling data. Adding the +// following line will install handlers under the /debug/pprof/trace URL +// to download live profiles: +// +// import _ "net/http/pprof" +// +// See the net/http/pprof package for more details. package trace import ( diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go index c5f64fcf4c..5fa5b82f8e 100644 --- a/src/runtime/trace/trace_test.go +++ b/src/runtime/trace/trace_test.go @@ -7,6 +7,7 @@ package trace_test import ( "bytes" "flag" + "internal/race" "internal/trace" "io" "io/ioutil" @@ -14,6 +15,7 @@ import ( "os" "runtime" . "runtime/trace" + "strconv" "sync" "testing" "time" @@ -23,6 +25,61 @@ var ( saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") ) +// TestEventBatch tests Flush calls that happen during Start +// don't produce corrupted traces. +func TestEventBatch(t *testing.T) { + if race.Enabled { + t.Skip("skipping in race mode") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + // During Start, bunch of records are written to reflect the current + // snapshot of the program, including state of each goroutines. + // And some string constants are written to the trace to aid trace + // parsing. This test checks Flush of the buffer occurred during + // this process doesn't cause corrupted traces. + // When a Flush is called during Start is complicated + // so we test with a range of number of goroutines hoping that one + // of them triggers Flush. + // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate + // and traceEvGoWaiting events (12~13bytes per goroutine). + for g := 4950; g < 5050; g++ { + n := g + t.Run("G="+strconv.Itoa(n), func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(n) + + in := make(chan bool, 1000) + for i := 0; i < n; i++ { + go func() { + <-in + wg.Done() + }() + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + for i := 0; i < n; i++ { + in <- true + } + wg.Wait() + Stop() + + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + }) + } +} + func TestTraceStartStop(t *testing.T) { buf := new(bytes.Buffer) if err := Start(buf); err != nil { diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index c74d438757..501ecb0411 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -184,6 +184,7 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in cgoCtxt := gp.cgoCtxt printing := pcbuf == nil && callback == nil _defer := gp._defer + elideWrapper := false for _defer != nil && _defer.sp == _NoArgs { _defer = _defer.link @@ -386,8 +387,15 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in } if printing { - // assume skip=0 for printing - if (flags&_TraceRuntimeFrames) != 0 || showframe(f, gp, nprint == 0) { + // assume skip=0 for printing. + // + // Never elide wrappers if we haven't printed + // any frames. And don't elide wrappers that + // called panic rather than the wrapped + // function. Otherwise, leave them out. + name := funcname(f) + nextElideWrapper := elideWrapperCalling(name) + if (flags&_TraceRuntimeFrames) != 0 || showframe(f, gp, nprint == 0, elideWrapper && nprint != 0) { // Print during crash. // main(0x1, 0x2, 0x3) // /home/rsc/go/src/runtime/x.go:23 +0xf @@ -411,7 +419,6 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in ix = inltree[ix].parent } } - name := funcname(f) if name == "runtime.gopanic" { name = "panic" } @@ -438,6 +445,7 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in print("\n") nprint++ } + elideWrapper = nextElideWrapper } n++ @@ -647,7 +655,7 @@ func printcreatedby(gp *g) { // Show what created goroutine, except main goroutine (goid 1). pc := gp.gopc f := findfunc(pc) - if f.valid() && showframe(f, gp, false) && gp.goid != 1 { + if f.valid() && showframe(f, gp, false, false) && gp.goid != 1 { print("created by ", funcname(f), "\n") tracepc := pc // back up to CALL instruction for funcline. if pc > f.entry { @@ -714,7 +722,7 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags uint) { func callers(skip int, pcbuf []uintptr) int { sp := getcallersp(unsafe.Pointer(&skip)) - pc := getcallerpc(unsafe.Pointer(&skip)) + pc := getcallerpc() gp := getg() var n int systemstack(func() { @@ -727,12 +735,28 @@ func gcallers(gp *g, skip int, pcbuf []uintptr) int { return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0) } -func showframe(f funcInfo, gp *g, firstFrame bool) bool { +func showframe(f funcInfo, gp *g, firstFrame, elideWrapper bool) bool { g := getg() if g.m.throwing > 0 && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig.ptr()) { return true } level, _, _ := gotraceback() + if level > 1 { + // Show all frames. + return true + } + + if !f.valid() { + return false + } + + if elideWrapper { + file, _ := funcline(f, f.entry) + if file == "<autogenerated>" { + return false + } + } + name := funcname(f) // Special case: always show runtime.gopanic frame @@ -744,7 +768,7 @@ func showframe(f funcInfo, gp *g, firstFrame bool) bool { return true } - return level > 1 || f.valid() && contains(name, ".") && (!hasprefix(name, "runtime.") || isExportedRuntime(name)) + return contains(name, ".") && (!hasprefix(name, "runtime.") || isExportedRuntime(name)) } // isExportedRuntime reports whether name is an exported runtime function. @@ -754,6 +778,14 @@ func isExportedRuntime(name string) bool { return len(name) > n && name[:n] == "runtime." && 'A' <= name[n] && name[n] <= 'Z' } +// elideWrapperCalling returns whether a wrapper function that called +// function "name" should be elided from stack traces. +func elideWrapperCalling(name string) bool { + // If the wrapper called a panic function instead of the + // wrapped function, we want to include it in stacks. + return !(name == "runtime.gopanic" || name == "runtime.sigpanic" || name == "runtime.panicwrap") +} + var gStatusStrings = [...]string{ _Gidle: "idle", _Grunnable: "runnable", @@ -795,7 +827,7 @@ func goroutineheader(gp *g) { if waitfor >= 1 { print(", ", waitfor, " minutes") } - if gp.lockedm != nil { + if gp.lockedm != 0 { print(", locked to thread") } print("]:\n") diff --git a/src/runtime/type.go b/src/runtime/type.go index bf54d54eb4..b3df3353ce 100644 --- a/src/runtime/type.go +++ b/src/runtime/type.go @@ -655,15 +655,15 @@ func typesEqual(t, v *_type, seen map[_typePair]struct{}) bool { if len(st.fields) != len(sv.fields) { return false } + if st.pkgPath.name() != sv.pkgPath.name() { + return false + } for i := range st.fields { tf := &st.fields[i] vf := &sv.fields[i] if tf.name.name() != vf.name.name() { return false } - if tf.name.pkgPath() != vf.name.pkgPath() { - return false - } if !typesEqual(tf.typ, vf.typ, seen) { return false } diff --git a/src/runtime/vdso_linux.go b/src/runtime/vdso_linux.go new file mode 100644 index 0000000000..5a4e8e578d --- /dev/null +++ b/src/runtime/vdso_linux.go @@ -0,0 +1,281 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build 386 amd64 + +package runtime + +import "unsafe" + +// Look up symbols in the Linux vDSO. + +// This code was originally based on the sample Linux vDSO parser at +// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c + +// This implements the ELF dynamic linking spec at +// http://sco.com/developers/gabi/latest/ch5.dynamic.html + +// The version section is documented at +// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html + +const ( + _AT_SYSINFO_EHDR = 33 + + _PT_LOAD = 1 /* Loadable program segment */ + _PT_DYNAMIC = 2 /* Dynamic linking information */ + + _DT_NULL = 0 /* Marks end of dynamic section */ + _DT_HASH = 4 /* Dynamic symbol hash table */ + _DT_STRTAB = 5 /* Address of string table */ + _DT_SYMTAB = 6 /* Address of symbol table */ + _DT_GNU_HASH = 0x6ffffef5 /* GNU-style dynamic symbol hash table */ + _DT_VERSYM = 0x6ffffff0 + _DT_VERDEF = 0x6ffffffc + + _VER_FLG_BASE = 0x1 /* Version definition of file itself */ + + _SHN_UNDEF = 0 /* Undefined section */ + + _SHT_DYNSYM = 11 /* Dynamic linker symbol table */ + + _STT_FUNC = 2 /* Symbol is a code object */ + + _STB_GLOBAL = 1 /* Global symbol */ + _STB_WEAK = 2 /* Weak symbol */ + + _EI_NIDENT = 16 + + // Maximum indices for the array types used when traversing the vDSO ELF structures. + // Computed from architecture-specific max provided by vdso_linux_*.go + vdsoSymTabSize = vdsoArrayMax / unsafe.Sizeof(elfSym{}) + vdsoDynSize = vdsoArrayMax / unsafe.Sizeof(elfDyn{}) + vdsoSymStringsSize = vdsoArrayMax // byte + vdsoVerSymSize = vdsoArrayMax / 2 // uint16 + vdsoHashSize = vdsoArrayMax / 4 // uint32 + + // vdsoBloomSizeScale is a scaling factor for gnuhash tables which are uint32 indexed, + // but contain uintptrs + vdsoBloomSizeScale = unsafe.Sizeof(uintptr(0)) / 4 // uint32 +) + +/* How to extract and insert information held in the st_info field. */ +func _ELF_ST_BIND(val byte) byte { return val >> 4 } +func _ELF_ST_TYPE(val byte) byte { return val & 0xf } + +type symbol_key struct { + name string + sym_hash uint32 + gnu_hash uint32 + ptr *uintptr +} + +type version_key struct { + version string + ver_hash uint32 +} + +type vdso_info struct { + valid bool + + /* Load information */ + load_addr uintptr + load_offset uintptr /* load_addr - recorded vaddr */ + + /* Symbol table */ + symtab *[vdsoSymTabSize]elfSym + symstrings *[vdsoSymStringsSize]byte + chain []uint32 + bucket []uint32 + symOff uint32 + isGNUHash bool + + /* Version table */ + versym *[vdsoVerSymSize]uint16 + verdef *elfVerdef +} + +var linux26 = version_key{"LINUX_2.6", 0x3ae75f6} + +// see vdso_linux_*.go for sym_keys[] and __vdso_* vars + +func vdso_init_from_sysinfo_ehdr(info *vdso_info, hdr *elfEhdr) { + info.valid = false + info.load_addr = uintptr(unsafe.Pointer(hdr)) + + pt := unsafe.Pointer(info.load_addr + uintptr(hdr.e_phoff)) + + // We need two things from the segment table: the load offset + // and the dynamic table. + var found_vaddr bool + var dyn *[vdsoDynSize]elfDyn + for i := uint16(0); i < hdr.e_phnum; i++ { + pt := (*elfPhdr)(add(pt, uintptr(i)*unsafe.Sizeof(elfPhdr{}))) + switch pt.p_type { + case _PT_LOAD: + if !found_vaddr { + found_vaddr = true + info.load_offset = info.load_addr + uintptr(pt.p_offset-pt.p_vaddr) + } + + case _PT_DYNAMIC: + dyn = (*[vdsoDynSize]elfDyn)(unsafe.Pointer(info.load_addr + uintptr(pt.p_offset))) + } + } + + if !found_vaddr || dyn == nil { + return // Failed + } + + // Fish out the useful bits of the dynamic table. + + var hash, gnuhash *[vdsoHashSize]uint32 + info.symstrings = nil + info.symtab = nil + info.versym = nil + info.verdef = nil + for i := 0; dyn[i].d_tag != _DT_NULL; i++ { + dt := &dyn[i] + p := info.load_offset + uintptr(dt.d_val) + switch dt.d_tag { + case _DT_STRTAB: + info.symstrings = (*[vdsoSymStringsSize]byte)(unsafe.Pointer(p)) + case _DT_SYMTAB: + info.symtab = (*[vdsoSymTabSize]elfSym)(unsafe.Pointer(p)) + case _DT_HASH: + hash = (*[vdsoHashSize]uint32)(unsafe.Pointer(p)) + case _DT_GNU_HASH: + gnuhash = (*[vdsoHashSize]uint32)(unsafe.Pointer(p)) + case _DT_VERSYM: + info.versym = (*[vdsoVerSymSize]uint16)(unsafe.Pointer(p)) + case _DT_VERDEF: + info.verdef = (*elfVerdef)(unsafe.Pointer(p)) + } + } + + if info.symstrings == nil || info.symtab == nil || (hash == nil && gnuhash == nil) { + return // Failed + } + + if info.verdef == nil { + info.versym = nil + } + + if gnuhash != nil { + // Parse the GNU hash table header. + nbucket := gnuhash[0] + info.symOff = gnuhash[1] + bloomSize := gnuhash[2] + info.bucket = gnuhash[4+bloomSize*uint32(vdsoBloomSizeScale):][:nbucket] + info.chain = gnuhash[4+bloomSize*uint32(vdsoBloomSizeScale)+nbucket:] + info.isGNUHash = true + } else { + // Parse the hash table header. + nbucket := hash[0] + nchain := hash[1] + info.bucket = hash[2 : 2+nbucket] + info.chain = hash[2+nbucket : 2+nbucket+nchain] + } + + // That's all we need. + info.valid = true +} + +func vdso_find_version(info *vdso_info, ver *version_key) int32 { + if !info.valid { + return 0 + } + + def := info.verdef + for { + if def.vd_flags&_VER_FLG_BASE == 0 { + aux := (*elfVerdaux)(add(unsafe.Pointer(def), uintptr(def.vd_aux))) + if def.vd_hash == ver.ver_hash && ver.version == gostringnocopy(&info.symstrings[aux.vda_name]) { + return int32(def.vd_ndx & 0x7fff) + } + } + + if def.vd_next == 0 { + break + } + def = (*elfVerdef)(add(unsafe.Pointer(def), uintptr(def.vd_next))) + } + + return -1 // cannot match any version +} + +func vdso_parse_symbols(info *vdso_info, version int32) { + if !info.valid { + return + } + + apply := func(symIndex uint32, k symbol_key) bool { + sym := &info.symtab[symIndex] + typ := _ELF_ST_TYPE(sym.st_info) + bind := _ELF_ST_BIND(sym.st_info) + if typ != _STT_FUNC || bind != _STB_GLOBAL && bind != _STB_WEAK || sym.st_shndx == _SHN_UNDEF { + return false + } + if k.name != gostringnocopy(&info.symstrings[sym.st_name]) { + return false + } + + // Check symbol version. + if info.versym != nil && version != 0 && int32(info.versym[symIndex]&0x7fff) != version { + return false + } + + *k.ptr = info.load_offset + uintptr(sym.st_value) + return true + } + + if !info.isGNUHash { + // Old-style DT_HASH table. + for _, k := range sym_keys { + for chain := info.bucket[k.sym_hash%uint32(len(info.bucket))]; chain != 0; chain = info.chain[chain] { + if apply(chain, k) { + break + } + } + } + return + } + + // New-style DT_GNU_HASH table. + for _, k := range sym_keys { + symIndex := info.bucket[k.gnu_hash%uint32(len(info.bucket))] + if symIndex < info.symOff { + continue + } + for ; ; symIndex++ { + hash := info.chain[symIndex-info.symOff] + if hash|1 == k.gnu_hash|1 { + // Found a hash match. + if apply(symIndex, k) { + break + } + } + if hash&1 != 0 { + // End of chain. + break + } + } + } +} + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_SYSINFO_EHDR: + if val == 0 { + // Something went wrong + return + } + var info vdso_info + // TODO(rsc): I don't understand why the compiler thinks info escapes + // when passed to the three functions below. + info1 := (*vdso_info)(noescape(unsafe.Pointer(&info))) + vdso_init_from_sysinfo_ehdr(info1, (*elfEhdr)(unsafe.Pointer(val))) + vdso_parse_symbols(info1, vdso_find_version(info1, &linux26)) + } +} diff --git a/src/runtime/vdso_linux_386.go b/src/runtime/vdso_linux_386.go new file mode 100644 index 0000000000..74ad953469 --- /dev/null +++ b/src/runtime/vdso_linux_386.go @@ -0,0 +1,93 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +// ELF32 structure definitions for use by the Linux vDSO loader + +type elfSym struct { + st_name uint32 + st_value uint32 + st_size uint32 + st_info byte + st_other byte + st_shndx uint16 +} + +type elfVerdef struct { + vd_version uint16 /* Version revision */ + vd_flags uint16 /* Version information */ + vd_ndx uint16 /* Version Index */ + vd_cnt uint16 /* Number of associated aux entries */ + vd_hash uint32 /* Version name hash value */ + vd_aux uint32 /* Offset in bytes to verdaux array */ + vd_next uint32 /* Offset in bytes to next verdef entry */ +} + +type elfEhdr struct { + e_ident [_EI_NIDENT]byte /* Magic number and other info */ + e_type uint16 /* Object file type */ + e_machine uint16 /* Architecture */ + e_version uint32 /* Object file version */ + e_entry uint32 /* Entry point virtual address */ + e_phoff uint32 /* Program header table file offset */ + e_shoff uint32 /* Section header table file offset */ + e_flags uint32 /* Processor-specific flags */ + e_ehsize uint16 /* ELF header size in bytes */ + e_phentsize uint16 /* Program header table entry size */ + e_phnum uint16 /* Program header table entry count */ + e_shentsize uint16 /* Section header table entry size */ + e_shnum uint16 /* Section header table entry count */ + e_shstrndx uint16 /* Section header string table index */ +} + +type elfPhdr struct { + p_type uint32 /* Segment type */ + p_offset uint32 /* Segment file offset */ + p_vaddr uint32 /* Segment virtual address */ + p_paddr uint32 /* Segment physical address */ + p_filesz uint32 /* Segment size in file */ + p_memsz uint32 /* Segment size in memory */ + p_flags uint32 /* Segment flags */ + p_align uint32 /* Segment alignment */ +} + +type elfShdr struct { + sh_name uint32 /* Section name (string tbl index) */ + sh_type uint32 /* Section type */ + sh_flags uint32 /* Section flags */ + sh_addr uint32 /* Section virtual addr at execution */ + sh_offset uint32 /* Section file offset */ + sh_size uint32 /* Section size in bytes */ + sh_link uint32 /* Link to another section */ + sh_info uint32 /* Additional section information */ + sh_addralign uint32 /* Section alignment */ + sh_entsize uint32 /* Entry size if section holds table */ +} + +type elfDyn struct { + d_tag int32 /* Dynamic entry type */ + d_val uint32 /* Integer value */ +} + +type elfVerdaux struct { + vda_name uint32 /* Version or dependency names */ + vda_next uint32 /* Offset in bytes to next verdaux entry */ +} + +const ( + // vdsoArrayMax is the byte-size of a maximally sized array on this architecture. + // See cmd/compile/internal/x86/galign.go arch.MAXWIDTH initialization, but must also + // be constrained to max +ve int. + vdsoArrayMax = 1<<31 - 1 +) + +var sym_keys = []symbol_key{ + {"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym}, +} + +// initialize to fall back to syscall +var ( + __vdso_clock_gettime_sym uintptr = 0 +) diff --git a/src/runtime/vdso_linux_amd64.go b/src/runtime/vdso_linux_amd64.go index 8a970dfbe6..0bbe5c2e8f 100644 --- a/src/runtime/vdso_linux_amd64.go +++ b/src/runtime/vdso_linux_amd64.go @@ -4,51 +4,9 @@ package runtime -import "unsafe" +// ELF64 structure definitions for use by the Linux vDSO loader -// Look up symbols in the Linux vDSO. - -// This code was originally based on the sample Linux vDSO parser at -// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c - -// This implements the ELF dynamic linking spec at -// http://sco.com/developers/gabi/latest/ch5.dynamic.html - -// The version section is documented at -// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html - -const ( - _AT_SYSINFO_EHDR = 33 - - _PT_LOAD = 1 /* Loadable program segment */ - _PT_DYNAMIC = 2 /* Dynamic linking information */ - - _DT_NULL = 0 /* Marks end of dynamic section */ - _DT_HASH = 4 /* Dynamic symbol hash table */ - _DT_STRTAB = 5 /* Address of string table */ - _DT_SYMTAB = 6 /* Address of symbol table */ - _DT_VERSYM = 0x6ffffff0 - _DT_VERDEF = 0x6ffffffc - - _VER_FLG_BASE = 0x1 /* Version definition of file itself */ - - _SHN_UNDEF = 0 /* Undefined section */ - - _SHT_DYNSYM = 11 /* Dynamic linker symbol table */ - - _STT_FUNC = 2 /* Symbol is a code object */ - - _STB_GLOBAL = 1 /* Global symbol */ - _STB_WEAK = 2 /* Weak symbol */ - - _EI_NIDENT = 16 -) - -/* How to extract and insert information held in the st_info field. */ -func _ELF64_ST_BIND(val byte) byte { return val >> 4 } -func _ELF64_ST_TYPE(val byte) byte { return val & 0xf } - -type elf64Sym struct { +type elfSym struct { st_name uint32 st_info byte st_other byte @@ -57,7 +15,7 @@ type elf64Sym struct { st_size uint64 } -type elf64Verdef struct { +type elfVerdef struct { vd_version uint16 /* Version revision */ vd_flags uint16 /* Version information */ vd_ndx uint16 /* Version Index */ @@ -67,7 +25,7 @@ type elf64Verdef struct { vd_next uint32 /* Offset in bytes to next verdef entry */ } -type elf64Ehdr struct { +type elfEhdr struct { e_ident [_EI_NIDENT]byte /* Magic number and other info */ e_type uint16 /* Object file type */ e_machine uint16 /* Architecture */ @@ -84,7 +42,7 @@ type elf64Ehdr struct { e_shstrndx uint16 /* Section header string table index */ } -type elf64Phdr struct { +type elfPhdr struct { p_type uint32 /* Segment type */ p_flags uint32 /* Segment flags */ p_offset uint64 /* Segment file offset */ @@ -95,7 +53,7 @@ type elf64Phdr struct { p_align uint64 /* Segment alignment */ } -type elf64Shdr struct { +type elfShdr struct { sh_name uint32 /* Section name (string tbl index) */ sh_type uint32 /* Section type */ sh_flags uint64 /* Section flags */ @@ -108,56 +66,26 @@ type elf64Shdr struct { sh_entsize uint64 /* Entry size if section holds table */ } -type elf64Dyn struct { +type elfDyn struct { d_tag int64 /* Dynamic entry type */ d_val uint64 /* Integer value */ } -type elf64Verdaux struct { +type elfVerdaux struct { vda_name uint32 /* Version or dependency names */ vda_next uint32 /* Offset in bytes to next verdaux entry */ } -type elf64Auxv struct { - a_type uint64 /* Entry type */ - a_val uint64 /* Integer value */ -} - -type symbol_key struct { - name string - sym_hash uint32 - ptr *uintptr -} - -type version_key struct { - version string - ver_hash uint32 -} - -type vdso_info struct { - valid bool - - /* Load information */ - load_addr uintptr - load_offset uintptr /* load_addr - recorded vaddr */ - - /* Symbol table */ - symtab *[1 << 32]elf64Sym - symstrings *[1 << 32]byte - chain []uint32 - bucket []uint32 - - /* Version table */ - versym *[1 << 32]uint16 - verdef *elf64Verdef -} - -var linux26 = version_key{"LINUX_2.6", 0x3ae75f6} +const ( + // vdsoArrayMax is the byte-size of a maximally sized array on this architecture. + // See cmd/compile/internal/amd64/galign.go arch.MAXWIDTH initialization. + vdsoArrayMax = 1<<50 - 1 +) var sym_keys = []symbol_key{ - {"__vdso_time", 0xa33c485, &__vdso_time_sym}, - {"__vdso_gettimeofday", 0x315ca59, &__vdso_gettimeofday_sym}, - {"__vdso_clock_gettime", 0xd35ec75, &__vdso_clock_gettime_sym}, + {"__vdso_time", 0xa33c485, 0x821e8e0d, &__vdso_time_sym}, + {"__vdso_gettimeofday", 0x315ca59, 0xb01bca00, &__vdso_gettimeofday_sym}, + {"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym}, } // initialize with vsyscall fallbacks @@ -166,141 +94,3 @@ var ( __vdso_gettimeofday_sym uintptr = 0xffffffffff600000 __vdso_clock_gettime_sym uintptr = 0 ) - -func vdso_init_from_sysinfo_ehdr(info *vdso_info, hdr *elf64Ehdr) { - info.valid = false - info.load_addr = uintptr(unsafe.Pointer(hdr)) - - pt := unsafe.Pointer(info.load_addr + uintptr(hdr.e_phoff)) - - // We need two things from the segment table: the load offset - // and the dynamic table. - var found_vaddr bool - var dyn *[1 << 20]elf64Dyn - for i := uint16(0); i < hdr.e_phnum; i++ { - pt := (*elf64Phdr)(add(pt, uintptr(i)*unsafe.Sizeof(elf64Phdr{}))) - switch pt.p_type { - case _PT_LOAD: - if !found_vaddr { - found_vaddr = true - info.load_offset = info.load_addr + uintptr(pt.p_offset-pt.p_vaddr) - } - - case _PT_DYNAMIC: - dyn = (*[1 << 20]elf64Dyn)(unsafe.Pointer(info.load_addr + uintptr(pt.p_offset))) - } - } - - if !found_vaddr || dyn == nil { - return // Failed - } - - // Fish out the useful bits of the dynamic table. - - var hash *[1 << 30]uint32 - hash = nil - info.symstrings = nil - info.symtab = nil - info.versym = nil - info.verdef = nil - for i := 0; dyn[i].d_tag != _DT_NULL; i++ { - dt := &dyn[i] - p := info.load_offset + uintptr(dt.d_val) - switch dt.d_tag { - case _DT_STRTAB: - info.symstrings = (*[1 << 32]byte)(unsafe.Pointer(p)) - case _DT_SYMTAB: - info.symtab = (*[1 << 32]elf64Sym)(unsafe.Pointer(p)) - case _DT_HASH: - hash = (*[1 << 30]uint32)(unsafe.Pointer(p)) - case _DT_VERSYM: - info.versym = (*[1 << 32]uint16)(unsafe.Pointer(p)) - case _DT_VERDEF: - info.verdef = (*elf64Verdef)(unsafe.Pointer(p)) - } - } - - if info.symstrings == nil || info.symtab == nil || hash == nil { - return // Failed - } - - if info.verdef == nil { - info.versym = nil - } - - // Parse the hash table header. - nbucket := hash[0] - nchain := hash[1] - info.bucket = hash[2 : 2+nbucket] - info.chain = hash[2+nbucket : 2+nbucket+nchain] - - // That's all we need. - info.valid = true -} - -func vdso_find_version(info *vdso_info, ver *version_key) int32 { - if !info.valid { - return 0 - } - - def := info.verdef - for { - if def.vd_flags&_VER_FLG_BASE == 0 { - aux := (*elf64Verdaux)(add(unsafe.Pointer(def), uintptr(def.vd_aux))) - if def.vd_hash == ver.ver_hash && ver.version == gostringnocopy(&info.symstrings[aux.vda_name]) { - return int32(def.vd_ndx & 0x7fff) - } - } - - if def.vd_next == 0 { - break - } - def = (*elf64Verdef)(add(unsafe.Pointer(def), uintptr(def.vd_next))) - } - - return -1 // cannot match any version -} - -func vdso_parse_symbols(info *vdso_info, version int32) { - if !info.valid { - return - } - - for _, k := range sym_keys { - for chain := info.bucket[k.sym_hash%uint32(len(info.bucket))]; chain != 0; chain = info.chain[chain] { - sym := &info.symtab[chain] - typ := _ELF64_ST_TYPE(sym.st_info) - bind := _ELF64_ST_BIND(sym.st_info) - if typ != _STT_FUNC || bind != _STB_GLOBAL && bind != _STB_WEAK || sym.st_shndx == _SHN_UNDEF { - continue - } - if k.name != gostringnocopy(&info.symstrings[sym.st_name]) { - continue - } - - // Check symbol version. - if info.versym != nil && version != 0 && int32(info.versym[chain]&0x7fff) != version { - continue - } - - *k.ptr = info.load_offset + uintptr(sym.st_value) - break - } - } -} - -func archauxv(tag, val uintptr) { - switch tag { - case _AT_SYSINFO_EHDR: - if val == 0 { - // Something went wrong - return - } - var info vdso_info - // TODO(rsc): I don't understand why the compiler thinks info escapes - // when passed to the three functions below. - info1 := (*vdso_info)(noescape(unsafe.Pointer(&info))) - vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(val))) - vdso_parse_symbols(info1, vdso_find_version(info1, &linux26)) - } -} diff --git a/src/runtime/vdso_linux_test.go b/src/runtime/vdso_linux_test.go new file mode 100644 index 0000000000..f507ee98ee --- /dev/null +++ b/src/runtime/vdso_linux_test.go @@ -0,0 +1,63 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build 386 amd64 + +package runtime_test + +import ( + "testing" + "time" + _ "unsafe" +) + +// These tests are a little risky because they overwrite the __vdso_clock_gettime_sym value. +// It's normally initialized at startup and remains unchanged after that. + +//go:linkname __vdso_clock_gettime_sym runtime.__vdso_clock_gettime_sym +var __vdso_clock_gettime_sym uintptr + +func TestClockVDSOAndFallbackPaths(t *testing.T) { + // Check that we can call walltime() and nanotime() with and without their (1st) fast-paths. + // This just checks that fast and fallback paths can be called, rather than testing their + // results. + // + // Call them indirectly via time.Now(), so we don't need auxiliary .s files to allow us to + // use go:linkname to refer to the functions directly. + + save := __vdso_clock_gettime_sym + if save == 0 { + t.Log("__vdso_clock_gettime symbol not found; fallback path will be used by default") + } + + // Call with fast-path enabled (if vDSO symbol found at startup) + time.Now() + + // Call with fast-path disabled + __vdso_clock_gettime_sym = 0 + time.Now() + __vdso_clock_gettime_sym = save +} + +func BenchmarkClockVDSOAndFallbackPaths(b *testing.B) { + run := func(b *testing.B) { + for i := 0; i < b.N; i++ { + // Call via time.Now() - see comment in test above. + time.Now() + } + } + + save := __vdso_clock_gettime_sym + b.Run("vDSO", run) + __vdso_clock_gettime_sym = 0 + b.Run("Fallback", run) + __vdso_clock_gettime_sym = save +} + +func BenchmarkTimeNow(b *testing.B) { + for i := 0; i < b.N; i++ { + time.Now() + } +} |
