From 92bda33d2771a9b12868d9025f113538fa7a84de Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 31 Jul 2020 15:58:00 -0400 Subject: runtime: revert signal stack mlocking Go 1.14 included a (rather awful) workaround for a Linux kernel bug that corrupted vector registers on x86 CPUs during signal delivery (https://bugzilla.kernel.org/show_bug.cgi?id=205663). This bug was introduced in Linux 5.2 and fixed in 5.3.15, 5.4.2 and all 5.5 and later kernels. The fix was also back-ported by major distros. This workaround was necessary, but had unfortunate downsides, including causing Go programs to exceed the mlock ulimit in many configurations (#37436). We're reasonably confident that by the Go 1.16 release, the number of systems running affected kernels will be vanishingly small. Hence, this CL removes this workaround. This effectively reverts CLs 209597 (version parser), 209899 (mlock top of signal stack), 210299 (better failure message), 223121 (soft mlock failure handling), and 244059 (special-case patched Ubuntu kernels). The one thing we keep is the osArchInit function. It's empty everywhere now, but is a reasonable hook to have. Updates #35326, #35777 (the original register corruption bugs). Updates #40184 (request to revert in 1.15). Fixes #35979. Change-Id: Ie213270837095576f1f3ef46bf3de187dc486c50 Reviewed-on: https://go-review.googlesource.com/c/go/+/246200 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/defs_linux_386.go | 11 ---- src/runtime/defs_linux_amd64.go | 11 ---- src/runtime/export_test.go | 2 - src/runtime/os_linux.go | 9 --- src/runtime/os_linux_x86.go | 118 +--------------------------------------- src/runtime/panic.go | 10 ---- src/runtime/string.go | 34 ------------ src/runtime/string_test.go | 31 ----------- src/runtime/sys_linux_386.s | 19 ------- src/runtime/sys_linux_amd64.s | 19 ------- 10 files changed, 1 insertion(+), 263 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go index f4db8cf927..64a0fbcaaa 100644 --- a/src/runtime/defs_linux_386.go +++ b/src/runtime/defs_linux_386.go @@ -226,14 +226,3 @@ type sockaddr_un struct { family uint16 path [108]byte } - -const __NEW_UTS_LEN = 64 - -type new_utsname struct { - sysname [__NEW_UTS_LEN + 1]byte - nodename [__NEW_UTS_LEN + 1]byte - release [__NEW_UTS_LEN + 1]byte - version [__NEW_UTS_LEN + 1]byte - machine [__NEW_UTS_LEN + 1]byte - domainname [__NEW_UTS_LEN + 1]byte -} diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go index 8480d85219..1ae18a309b 100644 --- a/src/runtime/defs_linux_amd64.go +++ b/src/runtime/defs_linux_amd64.go @@ -262,14 +262,3 @@ type sockaddr_un struct { family uint16 path [108]byte } - -const __NEW_UTS_LEN = 64 - -type new_utsname struct { - sysname [__NEW_UTS_LEN + 1]byte - nodename [__NEW_UTS_LEN + 1]byte - release [__NEW_UTS_LEN + 1]byte - version [__NEW_UTS_LEN + 1]byte - machine [__NEW_UTS_LEN + 1]byte - domainname [__NEW_UTS_LEN + 1]byte -} diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 5ab03f3f99..d591fdc4e9 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -43,8 +43,6 @@ var PhysHugePageSize = physHugePageSize var NetpollGenericInit = netpollGenericInit -var ParseRelease = parseRelease - var Memmove = memmove var MemclrNoHeapPointers = memclrNoHeapPointers diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 7b95ff2428..22931b4d5c 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -328,20 +328,11 @@ func libpreinit() { initsig(true) } -// gsignalInitQuirk, if non-nil, is called for every allocated gsignal G. -// -// TODO(austin): Remove this after Go 1.15 when we remove the -// mlockGsignal workaround. -var gsignalInitQuirk func(gsignal *g) - // Called to initialize a new m (including the bootstrap m). // Called on the parent thread (main thread in case of bootstrap), can allocate memory. func mpreinit(mp *m) { mp.gsignal = malg(32 * 1024) // Linux wants >= 2K mp.gsignal.m = mp - if gsignalInitQuirk != nil { - gsignalInitQuirk(mp.gsignal) - } } func gettid() uint32 diff --git a/src/runtime/os_linux_x86.go b/src/runtime/os_linux_x86.go index 97f870707d..d91fa1a0d1 100644 --- a/src/runtime/os_linux_x86.go +++ b/src/runtime/os_linux_x86.go @@ -7,120 +7,4 @@ package runtime -import ( - "runtime/internal/atomic" - "unsafe" -) - -//go:noescape -func uname(utsname *new_utsname) int - -func mlock(addr, len uintptr) int - -func osArchInit() { - // Linux 5.2 introduced a bug that can corrupt vector - // registers on return from a signal if the signal stack isn't - // faulted in: - // https://bugzilla.kernel.org/show_bug.cgi?id=205663 - // - // It was fixed in 5.3.15, 5.4.2, and all 5.5 and later - // kernels. - // - // If we're on an affected kernel, work around this issue by - // mlocking the top page of every signal stack. This doesn't - // help for signal stacks created in C, but there's not much - // we can do about that. - // - // TODO(austin): Remove this in Go 1.15, at which point it - // will be unlikely to encounter any of the affected kernels - // in the wild. - - var uts new_utsname - if uname(&uts) < 0 { - throw("uname failed") - } - // Check for null terminator to ensure gostringnocopy doesn't - // walk off the end of the release string. - found := false - for _, b := range uts.release { - if b == 0 { - found = true - break - } - } - if !found { - return - } - rel := gostringnocopy(&uts.release[0]) - - major, minor, patch, ok := parseRelease(rel) - if !ok { - return - } - - if major == 5 && minor == 4 && patch < 2 { - // All 5.4 versions of Ubuntu are patched. - procVersion := []byte("/proc/version\000") - f := open(&procVersion[0], _O_RDONLY, 0) - if f >= 0 { - var buf [512]byte - p := noescape(unsafe.Pointer(&buf[0])) - n := read(f, p, int32(len(buf))) - closefd(f) - - needle := []byte("Ubuntu") - contains: - for i, c := range buf[:n] { - if c != needle[0] { - continue - } - if int(n)-i < len(needle) { - break - } - for j, c2 := range needle { - if c2 != buf[i+j] { - continue contains - } - } - // This is an Ubuntu system. - return - } - } - } - - if major == 5 && (minor == 2 || minor == 3 && patch < 15 || minor == 4 && patch < 2) { - gsignalInitQuirk = mlockGsignal - if m0.gsignal != nil { - throw("gsignal quirk too late") - } - throwReportQuirk = throwBadKernel - } -} - -func mlockGsignal(gsignal *g) { - if atomic.Load(&touchStackBeforeSignal) != 0 { - // mlock has already failed, don't try again. - return - } - - // This mlock call may fail, but we don't report the failure. - // Instead, if something goes badly wrong, we rely on prepareSignalM - // and throwBadKernel to do further mitigation and to report a problem - // to the user if mitigation fails. This is because many - // systems have a limit on the total mlock size, and many kernels - // that appear to have bad versions are actually patched to avoid the - // bug described above. We want Go 1.14 to run on those systems. - // See #37436. - if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 { - atomic.Store(&touchStackBeforeSignal, uint32(-errno)) - } -} - -// throwBadKernel is called, via throwReportQuirk, by throw. -func throwBadKernel() { - if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 { - println("runtime: note: your Linux kernel may be buggy") - println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug") - println("runtime: note: mlock workaround for kernel bug failed with errno", errno) - } -} +func osArchInit() {} diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 615249f33c..127843b081 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -1283,12 +1283,6 @@ func startpanic_m() bool { } } -// throwReportQuirk, if non-nil, is called by throw after dumping the stacks. -// -// TODO(austin): Remove this after Go 1.15 when we remove the -// mlockGsignal workaround. -var throwReportQuirk func() - var didothers bool var deadlock mutex @@ -1335,10 +1329,6 @@ func dopanic_m(gp *g, pc, sp uintptr) bool { printDebugLog() - if throwReportQuirk != nil { - throwReportQuirk() - } - return docrash } diff --git a/src/runtime/string.go b/src/runtime/string.go index 0515b56573..251044231e 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -499,37 +499,3 @@ func gostringw(strw *uint16) string { b[n2] = 0 // for luck return s[:n2] } - -// parseRelease parses a dot-separated version number. It follows the -// semver syntax, but allows the minor and patch versions to be -// elided. -func parseRelease(rel string) (major, minor, patch int, ok bool) { - // Strip anything after a dash or plus. - for i := 0; i < len(rel); i++ { - if rel[i] == '-' || rel[i] == '+' { - rel = rel[:i] - break - } - } - - next := func() (int, bool) { - for i := 0; i < len(rel); i++ { - if rel[i] == '.' { - ver, ok := atoi(rel[:i]) - rel = rel[i+1:] - return ver, ok - } - } - ver, ok := atoi(rel) - rel = "" - return ver, ok - } - if major, ok = next(); !ok || rel == "" { - return - } - if minor, ok = next(); !ok || rel == "" { - return - } - patch, ok = next() - return -} diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go index b9ac667533..4eda12c35d 100644 --- a/src/runtime/string_test.go +++ b/src/runtime/string_test.go @@ -454,34 +454,3 @@ func TestAtoi32(t *testing.T) { } } } - -type parseReleaseTest struct { - in string - major, minor, patch int -} - -var parseReleaseTests = []parseReleaseTest{ - {"", -1, -1, -1}, - {"x", -1, -1, -1}, - {"5", 5, 0, 0}, - {"5.12", 5, 12, 0}, - {"5.12-x", 5, 12, 0}, - {"5.12.1", 5, 12, 1}, - {"5.12.1-x", 5, 12, 1}, - {"5.12.1.0", 5, 12, 1}, - {"5.20496382327982653440", -1, -1, -1}, -} - -func TestParseRelease(t *testing.T) { - for _, test := range parseReleaseTests { - major, minor, patch, ok := runtime.ParseRelease(test.in) - if !ok { - major, minor, patch = -1, -1, -1 - } - if test.major != major || test.minor != minor || test.patch != patch { - t.Errorf("parseRelease(%q) = (%v, %v, %v) want (%v, %v, %v)", - test.in, major, minor, patch, - test.major, test.minor, test.patch) - } - } -} diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index 5b9b638ad7..1e3a834812 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -39,8 +39,6 @@ #define SYS_socketcall 102 #define SYS_setittimer 104 #define SYS_clone 120 -#define SYS_uname 122 -#define SYS_mlock 150 #define SYS_sched_yield 158 #define SYS_nanosleep 162 #define SYS_rt_sigreturn 173 @@ -808,20 +806,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-4 INVOKE_SYSCALL MOVL AX, ret+0(FP) RET - -// func uname(utsname *new_utsname) int -TEXT ·uname(SB),NOSPLIT,$0-8 - MOVL $SYS_uname, AX - MOVL utsname+0(FP), BX - INVOKE_SYSCALL - MOVL AX, ret+4(FP) - RET - -// func mlock(addr, len uintptr) int -TEXT ·mlock(SB),NOSPLIT,$0-12 - MOVL $SYS_mlock, AX - MOVL addr+0(FP), BX - MOVL len+4(FP), CX - INVOKE_SYSCALL - MOVL AX, ret+8(FP) - RET diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index fe9c6bce85..b60057ce83 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -33,10 +33,8 @@ #define SYS_clone 56 #define SYS_exit 60 #define SYS_kill 62 -#define SYS_uname 63 #define SYS_fcntl 72 #define SYS_sigaltstack 131 -#define SYS_mlock 149 #define SYS_arch_prctl 158 #define SYS_gettid 186 #define SYS_futex 202 @@ -789,20 +787,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 SYSCALL MOVQ AX, ret+0(FP) RET - -// func uname(utsname *new_utsname) int -TEXT ·uname(SB),NOSPLIT,$0-16 - MOVQ utsname+0(FP), DI - MOVL $SYS_uname, AX - SYSCALL - MOVQ AX, ret+8(FP) - RET - -// func mlock(addr, len uintptr) int -TEXT ·mlock(SB),NOSPLIT,$0-24 - MOVQ addr+0(FP), DI - MOVQ len+8(FP), SI - MOVL $SYS_mlock, AX - SYSCALL - MOVQ AX, ret+16(FP) - RET -- cgit v1.3 From c0dded04f7ded5048b44200078a1f723f5e1bcc1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 14 Jul 2020 01:41:03 -0600 Subject: runtime: do not explicitly exit on ctrl handler The default ctrl+c handler should process exits in situations where it makes sense, like console apps, but not in situations where it doesn't, like libraries or services. Therefore, we should remove the exit(2) so that the default handler is used for this. This also uses the more proper windows exit code of STATUS_CONTROL_C_EXIT, with the base case handler installed by KernelBase.dll. In particular, this helps in the case of services, which previously would terminate when receiving shutdown signals, instead of passing them onward to the service program. In this CL, contrary to CL 244959, we do not need to special case services with expensive detection algorithms, or rely on hard-coded library/archive flags. Fixes #40167. Fixes #40074. Change-Id: I9bf6ed6f65cefeff754d270aa33fa4df8d0b451f Reviewed-on: https://go-review.googlesource.com/c/go/+/243597 Run-TryBot: Jason A. Donenfeld TryBot-Result: Gobot Gobot Reviewed-by: Alex Brainman Reviewed-by: Jason A. Donenfeld --- src/runtime/os_windows.go | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index a584ada702..a62e941229 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -1010,11 +1010,6 @@ func ctrlhandler1(_type uint32) uint32 { if sigsend(s) { return 1 } - if !islibrary && !isarchive { - // Only exit the program if we don't have a DLL. - // See https://golang.org/issues/35965. - exit(2) // SIGINT, SIGTERM, etc - } return 0 } -- cgit v1.3 From cf9b4f63a57b4360be700831781885fc6cf5a0b1 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 5 May 2020 03:15:58 +1000 Subject: runtime: use riscv64 RDTIME instruction Use the actual RDTIME instruction, rather than a WORD. Generated code is the same. Change-Id: I6f6f5a1836eae2d05af34d4a22db2ede4fdcb458 Reviewed-on: https://go-review.googlesource.com/c/go/+/231997 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/runtime/asm_riscv64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index d7c45a183d..8f6c8773eb 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -79,7 +79,7 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0-0 // func cputicks() int64 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 - WORD $0xc0102573 // rdtime a0 + RDTIME A0 MOV A0, ret+0(FP) RET -- cgit v1.3 From c6a11f0dd279f374602794af60c7cde4585a1e6f Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 11 Aug 2020 13:04:48 -0700 Subject: crypto,internal/bytealg: fix assembly that clobbers BP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BP should be callee-save. It will be saved automatically if there is a nonzero frame size. Otherwise, we need to avoid this register. Change-Id: If3f551efa42d830c8793d9f0183cb8daad7a2ab5 Reviewed-on: https://go-review.googlesource.com/c/go/+/248260 Run-TryBot: Keith Randall Reviewed-by: Michael Knyszek Reviewed-by: Martin Möhrmann TryBot-Result: Gobot Gobot --- src/crypto/elliptic/p256_asm_amd64.s | 5 ++-- src/crypto/md5/md5block_amd64.s | 2 +- src/internal/bytealg/index_amd64.s | 52 ++++++++++++++++++------------------ src/runtime/sys_linux_amd64.s | 8 +++--- 4 files changed, 33 insertions(+), 34 deletions(-) (limited to 'src/runtime') diff --git a/src/crypto/elliptic/p256_asm_amd64.s b/src/crypto/elliptic/p256_asm_amd64.s index 7afa54a58c..c77b11bcf2 100644 --- a/src/crypto/elliptic/p256_asm_amd64.s +++ b/src/crypto/elliptic/p256_asm_amd64.s @@ -1336,7 +1336,7 @@ TEXT p256SubInternal(SB),NOSPLIT,$0 RET /* ---------------------------------------*/ -TEXT p256MulInternal(SB),NOSPLIT,$0 +TEXT p256MulInternal(SB),NOSPLIT,$8 MOVQ acc4, mul0 MULQ t0 MOVQ mul0, acc0 @@ -1519,7 +1519,7 @@ TEXT p256MulInternal(SB),NOSPLIT,$0 RET /* ---------------------------------------*/ -TEXT p256SqrInternal(SB),NOSPLIT,$0 +TEXT p256SqrInternal(SB),NOSPLIT,$8 MOVQ acc4, mul0 MULQ acc5 @@ -2345,4 +2345,3 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48 RET /* ---------------------------------------*/ - diff --git a/src/crypto/md5/md5block_amd64.s b/src/crypto/md5/md5block_amd64.s index 90d932b146..7c7d92d7e8 100644 --- a/src/crypto/md5/md5block_amd64.s +++ b/src/crypto/md5/md5block_amd64.s @@ -13,7 +13,7 @@ // Licence: I hereby disclaim the copyright on this code and place it // in the public domain. -TEXT ·block(SB),NOSPLIT,$0-32 +TEXT ·block(SB),NOSPLIT,$8-32 MOVQ dig+0(FP), BP MOVQ p+8(FP), SI MOVQ p_len+16(FP), DX diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s index 4459820801..6193b57239 100644 --- a/src/internal/bytealg/index_amd64.s +++ b/src/internal/bytealg/index_amd64.s @@ -8,7 +8,7 @@ TEXT ·Index(SB),NOSPLIT,$0-56 MOVQ a_base+0(FP), DI MOVQ a_len+8(FP), DX - MOVQ b_base+24(FP), BP + MOVQ b_base+24(FP), R8 MOVQ b_len+32(FP), AX MOVQ DI, R10 LEAQ ret+48(FP), R11 @@ -17,7 +17,7 @@ TEXT ·Index(SB),NOSPLIT,$0-56 TEXT ·IndexString(SB),NOSPLIT,$0-40 MOVQ a_base+0(FP), DI MOVQ a_len+8(FP), DX - MOVQ b_base+16(FP), BP + MOVQ b_base+16(FP), R8 MOVQ b_len+24(FP), AX MOVQ DI, R10 LEAQ ret+32(FP), R11 @@ -26,7 +26,7 @@ TEXT ·IndexString(SB),NOSPLIT,$0-40 // AX: length of string, that we are searching for // DX: length of string, in which we are searching // DI: pointer to string, in which we are searching -// BP: pointer to string, that we are searching for +// R8: pointer to string, that we are searching for // R11: address, where to put return value // Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them TEXT indexbody<>(SB),NOSPLIT,$0 @@ -37,11 +37,11 @@ TEXT indexbody<>(SB),NOSPLIT,$0 no_sse42: CMPQ AX, $2 JA _3_or_more - MOVW (BP), BP + MOVW (R8), R8 LEAQ -1(DI)(DX*1), DX loop2: MOVW (DI), SI - CMPW SI,BP + CMPW SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX @@ -50,12 +50,12 @@ loop2: _3_or_more: CMPQ AX, $3 JA _4_or_more - MOVW 1(BP), BX - MOVW (BP), BP + MOVW 1(R8), BX + MOVW (R8), R8 LEAQ -2(DI)(DX*1), DX loop3: MOVW (DI), SI - CMPW SI,BP + CMPW SI,R8 JZ partial_success3 ADDQ $1,DI CMPQ DI,DX @@ -72,11 +72,11 @@ partial_success3: _4_or_more: CMPQ AX, $4 JA _5_or_more - MOVL (BP), BP + MOVL (R8), R8 LEAQ -3(DI)(DX*1), DX loop4: MOVL (DI), SI - CMPL SI,BP + CMPL SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX @@ -87,11 +87,11 @@ _5_or_more: JA _8_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX - MOVL -4(BP)(AX*1), BX - MOVL (BP), BP + MOVL -4(R8)(AX*1), BX + MOVL (R8), R8 loop5to7: MOVL (DI), SI - CMPL SI,BP + CMPL SI,R8 JZ partial_success5to7 ADDQ $1,DI CMPQ DI,DX @@ -108,11 +108,11 @@ partial_success5to7: _8_or_more: CMPQ AX, $8 JA _9_or_more - MOVQ (BP), BP + MOVQ (R8), R8 LEAQ -7(DI)(DX*1), DX loop8: MOVQ (DI), SI - CMPQ SI,BP + CMPQ SI,R8 JZ success ADDQ $1,DI CMPQ DI,DX @@ -123,11 +123,11 @@ _9_or_more: JA _16_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX - MOVQ -8(BP)(AX*1), BX - MOVQ (BP), BP + MOVQ -8(R8)(AX*1), BX + MOVQ (R8), R8 loop9to15: MOVQ (DI), SI - CMPQ SI,BP + CMPQ SI,R8 JZ partial_success9to15 ADDQ $1,DI CMPQ DI,DX @@ -144,7 +144,7 @@ partial_success9to15: _16_or_more: CMPQ AX, $16 JA _17_or_more - MOVOU (BP), X1 + MOVOU (R8), X1 LEAQ -15(DI)(DX*1), DX loop16: MOVOU (DI), X2 @@ -161,8 +161,8 @@ _17_or_more: JA _32_or_more LEAQ 1(DI)(DX*1), DX SUBQ AX, DX - MOVOU -16(BP)(AX*1), X0 - MOVOU (BP), X1 + MOVOU -16(R8)(AX*1), X0 + MOVOU (R8), X1 loop17to31: MOVOU (DI), X2 PCMPEQB X1,X2 @@ -188,7 +188,7 @@ partial_success17to31: _32_or_more: CMPQ AX, $32 JA _33_to_63 - VMOVDQU (BP), Y1 + VMOVDQU (R8), Y1 LEAQ -31(DI)(DX*1), DX loop32: VMOVDQU (DI), Y2 @@ -203,8 +203,8 @@ loop32: _33_to_63: LEAQ 1(DI)(DX*1), DX SUBQ AX, DX - VMOVDQU -32(BP)(AX*1), Y0 - VMOVDQU (BP), Y1 + VMOVDQU -32(R8)(AX*1), Y0 + VMOVDQU (R8), Y1 loop33to63: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 @@ -241,10 +241,10 @@ sse42: // This value was determined experimentally and is the ~same // on Nehalem (first with SSE42) and Haswell. JAE _9_or_more - LEAQ 16(BP), SI + LEAQ 16(R8), SI TESTW $0xff0, SI JEQ no_sse42 - MOVOU (BP), X1 + MOVOU (R8), X1 LEAQ -15(DI)(DX*1), SI MOVQ $16, R9 SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9 diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index b60057ce83..621c01b365 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -212,7 +212,7 @@ TEXT runtime·walltime1(SB),NOSPLIT,$16-12 // due to stack probes inserted to avoid stack/heap collisions. // See issue #20427. - MOVQ SP, BP // Save old SP; BP unchanged by C code. + MOVQ SP, R12 // Save old SP; R12 unchanged by C code. get_tls(CX) MOVQ g(CX), AX @@ -250,7 +250,7 @@ noswitch: MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec ret: - MOVQ BP, SP // Restore real SP + MOVQ R12, SP // Restore real SP // Restore vdsoPC, vdsoSP // We don't worry about being signaled between the two stores. // If we are not in a signal handler, we'll restore vdsoSP to 0, @@ -277,7 +277,7 @@ fallback: TEXT runtime·nanotime1(SB),NOSPLIT,$16-8 // Switch to g0 stack. See comment above in runtime·walltime. - MOVQ SP, BP // Save old SP; BP unchanged by C code. + MOVQ SP, R12 // Save old SP; R12 unchanged by C code. get_tls(CX) MOVQ g(CX), AX @@ -315,7 +315,7 @@ noswitch: MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec ret: - MOVQ BP, SP // Restore real SP + MOVQ R12, SP // Restore real SP // Restore vdsoPC, vdsoSP // We don't worry about being signaled between the two stores. // If we are not in a signal handler, we'll restore vdsoSP to 0, -- cgit v1.3 From 6f99b33c18266a8858af96163de97173bdf6f081 Mon Sep 17 00:00:00 2001 From: Polina Osadcha Date: Thu, 18 Jun 2020 16:17:13 +0300 Subject: all: replace Replace(..., -1) with ReplaceAll(...) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I8f7cff7a83a9c50bfa3331e8b40e4a6c2e1c0eee Reviewed-on: https://go-review.googlesource.com/c/go/+/245198 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- doc/progs/run.go | 2 +- src/cmd/cover/cover_test.go | 2 +- src/cmd/go/internal/version/version.go | 2 +- src/runtime/mkpreempt.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/runtime') diff --git a/doc/progs/run.go b/doc/progs/run.go index baef3f79f9..8ac75cdcff 100644 --- a/doc/progs/run.go +++ b/doc/progs/run.go @@ -105,7 +105,7 @@ func test(tmpdir, file, want string) error { // Canonicalize output. out = bytes.TrimRight(out, "\n") - out = bytes.Replace(out, []byte{'\n'}, []byte{' '}, -1) + out = bytes.ReplaceAll(out, []byte{'\n'}, []byte{' '}) // Check the result. match, err := regexp.Match(want, out) diff --git a/src/cmd/cover/cover_test.go b/src/cmd/cover/cover_test.go index 8a56e39011..1c252e6e45 100644 --- a/src/cmd/cover/cover_test.go +++ b/src/cmd/cover/cover_test.go @@ -179,7 +179,7 @@ func TestCover(t *testing.T) { } lines := bytes.Split(file, []byte("\n")) for i, line := range lines { - lines[i] = bytes.Replace(line, []byte("LINE"), []byte(fmt.Sprint(i+1)), -1) + lines[i] = bytes.ReplaceAll(line, []byte("LINE"), []byte(fmt.Sprint(i+1))) } // Add a function that is not gofmt'ed. This used to cause a crash. diff --git a/src/cmd/go/internal/version/version.go b/src/cmd/go/internal/version/version.go index 056db7bf9e..c2de8d326d 100644 --- a/src/cmd/go/internal/version/version.go +++ b/src/cmd/go/internal/version/version.go @@ -138,7 +138,7 @@ func scanFile(file string, info os.FileInfo, mustPrint bool) { fmt.Printf("%s: %s\n", file, vers) if *versionM && mod != "" { - fmt.Printf("\t%s\n", strings.Replace(mod[:len(mod)-1], "\n", "\n\t", -1)) + fmt.Printf("\t%s\n", strings.ReplaceAll(mod[:len(mod)-1], "\n", "\n\t")) } } diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 1fe77663b9..44dea22ef3 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -131,7 +131,7 @@ func header(arch string) { func p(f string, args ...interface{}) { fmted := fmt.Sprintf(f, args...) - fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1)) + fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t")) } func label(l string) { -- cgit v1.3 From f979d072d339a24e4938d46588c153587d61af19 Mon Sep 17 00:00:00 2001 From: Martin Möhrmann Date: Sun, 3 May 2020 16:26:05 +0200 Subject: runtime: avoid memclr call for keys in mapdelete_fast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace memclrHasPointers calls for keys in mapdelete_fast* functions with direct writes since the key sizes are known at compile time. name old time/op new time/op delta MapDelete/Pointer/100 33.7ns ± 1% 23.7ns ± 2% -29.68% (p=0.000 n=7+9) MapDelete/Pointer/1000 41.6ns ± 5% 34.9ns ± 4% -16.01% (p=0.000 n=9+10) MapDelete/Pointer/10000 45.6ns ± 1% 38.2ns ± 2% -16.34% (p=0.000 n=8+10) Change-Id: Icaac43b520b93c2cf9fd192b822fae7203a7bbf7 Reviewed-on: https://go-review.googlesource.com/c/go/+/231737 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/map_fast32.go | 8 ++++++-- src/runtime/map_fast64.go | 8 +++++++- src/runtime/map_test.go | 22 ++++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/map_fast32.go b/src/runtime/map_fast32.go index 534454f3ad..d035ed0386 100644 --- a/src/runtime/map_fast32.go +++ b/src/runtime/map_fast32.go @@ -299,8 +299,12 @@ search: continue } // Only clear key if there are pointers in it. - if t.key.ptrdata != 0 { - memclrHasPointers(k, t.key.size) + // This can only happen if pointers are 32 bit + // wide as 64 bit pointers do not fit into a 32 bit key. + if sys.PtrSize == 4 && t.key.ptrdata != 0 { + // The key must be a pointer as we checked pointers are + // 32 bits wide and the key is 32 bits wide also. + *(*unsafe.Pointer)(k) = nil } e := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize)) if t.elem.ptrdata != 0 { diff --git a/src/runtime/map_fast64.go b/src/runtime/map_fast64.go index 1669c7cfe9..f1f3927598 100644 --- a/src/runtime/map_fast64.go +++ b/src/runtime/map_fast64.go @@ -300,7 +300,13 @@ search: } // Only clear key if there are pointers in it. if t.key.ptrdata != 0 { - memclrHasPointers(k, t.key.size) + if sys.PtrSize == 8 { + *(*unsafe.Pointer)(k) = nil + } else { + // There are three ways to squeeze at one ore more 32 bit pointers into 64 bits. + // Just call memclrHasPointers instead of trying to handle all cases here. + memclrHasPointers(k, 8) + } } e := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize)) if t.elem.ptrdata != 0 { diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go index 1b7ccad6ed..302b3c23c1 100644 --- a/src/runtime/map_test.go +++ b/src/runtime/map_test.go @@ -993,6 +993,27 @@ func benchmarkMapDeleteStr(b *testing.B, n int) { } } +func benchmarkMapDeletePointer(b *testing.B, n int) { + i2p := make([]*int, n) + for i := 0; i < n; i++ { + i2p[i] = new(int) + } + a := make(map[*int]int, n) + b.ResetTimer() + k := 0 + for i := 0; i < b.N; i++ { + if len(a) == 0 { + b.StopTimer() + for j := 0; j < n; j++ { + a[i2p[j]] = j + } + k = i + b.StartTimer() + } + delete(a, i2p[i-k]) + } +} + func runWith(f func(*testing.B, int), v ...int) func(*testing.B) { return func(b *testing.B) { for _, n := range v { @@ -1023,6 +1044,7 @@ func BenchmarkMapDelete(b *testing.B) { b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000)) b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000)) b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000)) + b.Run("Pointer", runWith(benchmarkMapDeletePointer, 100, 1000, 10000)) } func TestDeferDeleteSlow(t *testing.T) { -- cgit v1.3 From 681559e1f10f83a053b4ebab101de3d77ede8353 Mon Sep 17 00:00:00 2001 From: "zero.xu" Date: Mon, 17 Aug 2020 07:06:32 +0000 Subject: runtime: update comment: modTimer is called by Timer.Reset Change-Id: I97d0d1343d41b603a68388e496411fb040dc6d66 GitHub-Last-Rev: d11177ad249bd844dd9e7e355eea28596d0b1fa8 GitHub-Pull-Request: golang/go#38625 Reviewed-on: https://go-review.googlesource.com/c/go/+/229767 Reviewed-by: Emmanuel Odeke --- src/runtime/time.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/time.go b/src/runtime/time.go index fdb5066b24..f895bf8443 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -403,7 +403,7 @@ func dodeltimer0(pp *p) { } // modtimer modifies an existing timer. -// This is called by the netpoll code or time.Ticker.Reset. +// This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset. // Reports whether the timer was modified before it was run. func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool { if when < 0 { -- cgit v1.3 From d79350bac73670c04a91b6761d334b810201f6ee Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 4 May 2020 18:36:31 +0200 Subject: runtime: use hw.ncpuonline sysctl in getncpu on netbsd Since NetBSD 7, hw.ncpuonline reports the number of CPUs online, while hw.cpu reports the number of CPUs configured. Try hw.cpuonline first and fall back to hw.ncpu in case it fails (which is the case on NetBSD before 7.0). This follows the behavior on OpenBSD (see CL 161757). Also, Go in pkgsrc is patched to use hw.cpuonline, so this CL would allow said patch to be dropped. Updates #30824 Change-Id: Id1c19dff2c1e4401e6074179fae7c708ba0e3098 Reviewed-on: https://go-review.googlesource.com/c/go/+/231957 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor Reviewed-by: Benny Siegert --- src/runtime/os_netbsd.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go index 97106c7b9d..f7f90cedc1 100644 --- a/src/runtime/os_netbsd.go +++ b/src/runtime/os_netbsd.go @@ -95,18 +95,28 @@ var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0) // From NetBSD's const ( - _CTL_HW = 6 - _HW_NCPU = 3 - _HW_PAGESIZE = 7 + _CTL_HW = 6 + _HW_NCPU = 3 + _HW_PAGESIZE = 7 + _HW_NCPUONLINE = 16 ) -func getncpu() int32 { - mib := [2]uint32{_CTL_HW, _HW_NCPU} - out := uint32(0) +func sysctlInt(mib []uint32) (int32, bool) { + var out int32 nout := unsafe.Sizeof(out) - ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) - if ret >= 0 { - return int32(out) + ret := sysctl(&mib[0], uint32(len(mib)), (*byte)(unsafe.Pointer(&out)), &nout, nil, 0) + if ret < 0 { + return 0, false + } + return out, true +} + +func getncpu() int32 { + if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPUONLINE}); ok { + return int32(n) + } + if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPU}); ok { + return int32(n) } return 1 } -- cgit v1.3 From dc12d5b0f5e9c1cfec2a8eb6dd7ff3473c36d45c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 17 Aug 2020 11:28:26 +0200 Subject: all: add empty line between copyright header and package clause Makes sure the copyright notice is not interpreted as the package level godoc. Change-Id: I2afce7c9d620f19d51ec1438b1d0db1774b57146 Reviewed-on: https://go-review.googlesource.com/c/go/+/248760 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Dave Cheney --- src/cmd/compile/internal/ssa/debug.go | 1 + src/cmd/compile/internal/ssa/passbm_test.go | 1 + src/cmd/go/internal/trace/trace.go | 1 + src/cmd/link/internal/benchmark/bench_test.go | 1 + src/cmd/link/internal/ld/errors.go | 1 + src/runtime/closure_test.go | 1 + src/runtime/map_benchmark_test.go | 1 + src/runtime/slice_test.go | 1 + src/sync/cond_test.go | 1 + test/fixedbugs/issue15281.go | 1 + 10 files changed, 10 insertions(+) (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/ssa/debug.go b/src/cmd/compile/internal/ssa/debug.go index 13fe67cbca..6353f72897 100644 --- a/src/cmd/compile/internal/ssa/debug.go +++ b/src/cmd/compile/internal/ssa/debug.go @@ -1,6 +1,7 @@ // Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package ssa import ( diff --git a/src/cmd/compile/internal/ssa/passbm_test.go b/src/cmd/compile/internal/ssa/passbm_test.go index eefdbb8722..3fd3eb579b 100644 --- a/src/cmd/compile/internal/ssa/passbm_test.go +++ b/src/cmd/compile/internal/ssa/passbm_test.go @@ -1,6 +1,7 @@ // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package ssa import ( diff --git a/src/cmd/go/internal/trace/trace.go b/src/cmd/go/internal/trace/trace.go index 7cb7636a34..c8fac92c9f 100644 --- a/src/cmd/go/internal/trace/trace.go +++ b/src/cmd/go/internal/trace/trace.go @@ -1,6 +1,7 @@ // Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package trace import ( diff --git a/src/cmd/link/internal/benchmark/bench_test.go b/src/cmd/link/internal/benchmark/bench_test.go index d8ec717c7c..419dc55724 100644 --- a/src/cmd/link/internal/benchmark/bench_test.go +++ b/src/cmd/link/internal/benchmark/bench_test.go @@ -1,6 +1,7 @@ // Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package benchmark import ( diff --git a/src/cmd/link/internal/ld/errors.go b/src/cmd/link/internal/ld/errors.go index c5ce097fde..d6e8ff236d 100644 --- a/src/cmd/link/internal/ld/errors.go +++ b/src/cmd/link/internal/ld/errors.go @@ -1,6 +1,7 @@ // Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package ld import ( diff --git a/src/runtime/closure_test.go b/src/runtime/closure_test.go index ea65fbd5f5..741c932eab 100644 --- a/src/runtime/closure_test.go +++ b/src/runtime/closure_test.go @@ -1,6 +1,7 @@ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import "testing" diff --git a/src/runtime/map_benchmark_test.go b/src/runtime/map_benchmark_test.go index 893cb6c5b6..d0becc9ddb 100644 --- a/src/runtime/map_benchmark_test.go +++ b/src/runtime/map_benchmark_test.go @@ -1,6 +1,7 @@ // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import ( diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index e963a43dd3..cd2bc26d1e 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -1,6 +1,7 @@ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package runtime_test import ( diff --git a/src/sync/cond_test.go b/src/sync/cond_test.go index 9d0d9adc74..859cae59bc 100644 --- a/src/sync/cond_test.go +++ b/src/sync/cond_test.go @@ -1,6 +1,7 @@ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package sync_test import ( diff --git a/test/fixedbugs/issue15281.go b/test/fixedbugs/issue15281.go index 187c96f218..390867c848 100644 --- a/test/fixedbugs/issue15281.go +++ b/test/fixedbugs/issue15281.go @@ -3,6 +3,7 @@ // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package main import "runtime" -- cgit v1.3 From 7bbd5ca5a6a94f58d33de6b1244248a32dc8cd9c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 22 Jul 2020 11:21:36 -0400 Subject: runtime: replace index and contains with bytealg calls The runtime has its own implementation of string indexing. To reduce code duplication and cognitive load, replace this with calls to the internal/bytealg package. We can't do this on Plan 9 because it needs string indexing in a note handler (which isn't allowed to use the optimized bytealg version because it uses SSE), so we can't just eliminate the index function, but this CL does down-scope it so make it clear it's only for note handlers on Plan 9. Change-Id: Ie1a142678262048515c481e8c26313b80c5875df Reviewed-on: https://go-review.googlesource.com/c/go/+/244537 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/os_plan9.go | 18 ++++++++++++++++-- src/runtime/proc.go | 3 ++- src/runtime/runtime1.go | 5 +++-- src/runtime/string.go | 16 ---------------- src/runtime/traceback.go | 3 ++- 5 files changed, 23 insertions(+), 22 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go index 9e187d2220..128c30adeb 100644 --- a/src/runtime/os_plan9.go +++ b/src/runtime/os_plan9.go @@ -82,10 +82,10 @@ func sigpanic() { note := gostringnocopy((*byte)(unsafe.Pointer(g.m.notesig))) switch g.sig { case _SIGRFAULT, _SIGWFAULT: - i := index(note, "addr=") + i := indexNoFloat(note, "addr=") if i >= 0 { i += 5 - } else if i = index(note, "va="); i >= 0 { + } else if i = indexNoFloat(note, "va="); i >= 0 { i += 3 } else { panicmem() @@ -111,6 +111,20 @@ func sigpanic() { } } +// indexNoFloat is bytealg.IndexString but safe to use in a note +// handler. +func indexNoFloat(s, t string) int { + if len(t) == 0 { + return 0 + } + for i := 0; i < len(s); i++ { + if s[i] == t[0] && hasPrefix(s[i:], t) { + return i + } + } + return -1 +} + func atolwhex(p string) int64 { for hasPrefix(p, " ") || hasPrefix(p, "\t") { p = p[1:] diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 035822216d..ed7e2128ae 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" @@ -5460,7 +5461,7 @@ func haveexperiment(name string) bool { x := sys.Goexperiment for x != "" { xname := "" - i := index(x, ",") + i := bytealg.IndexByteString(x, ',') if i < 0 { xname, x = x, "" } else { diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index c65a534ef6..7c893aa25c 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -347,13 +348,13 @@ func parsedebugvars() { for p := gogetenv("GODEBUG"); p != ""; { field := "" - i := index(p, ",") + i := bytealg.IndexByteString(p, ',') if i < 0 { field, p = p, "" } else { field, p = p[:i], p[i+1:] } - i = index(field, "=") + i = bytealg.IndexByteString(field, '=') if i < 0 { continue } diff --git a/src/runtime/string.go b/src/runtime/string.go index 251044231e..9a601f0094 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -335,22 +335,6 @@ func gostringn(p *byte, l int) string { return s } -func index(s, t string) int { - if len(t) == 0 { - return 0 - } - for i := 0; i < len(s); i++ { - if s[i] == t[0] && hasPrefix(s[i:], t) { - return i - } - } - return -1 -} - -func contains(s, t string) bool { - return index(s, t) >= 0 -} - func hasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[:len(prefix)] == prefix } diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 944c8473d2..96e552524e 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/bytealg" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -848,7 +849,7 @@ func showfuncinfo(f funcInfo, firstFrame bool, funcID, childID funcID) bool { return true } - return contains(name, ".") && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) + return bytealg.IndexByteString(name, '.') >= 0 && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) } // isExportedRuntime reports whether name is an exported runtime function. -- cgit v1.3 From 7148abc1b900555199998aac25af11783a9eb41c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 5 Jun 2020 16:44:29 -0400 Subject: runtime: simplify heapBitsSetType doubleCheck The heapBitsSetType function has a slow doubleCheck debugging mode that checks the bitmap written out by the rest of the function using far more obvious logic. But even this has some surprisingly complex logic in it. Simplify it a bit. This also happens to fix the logic on 32-bit. Fixes #40335. Change-Id: I5cee482ad8adbd01cf5b98e35a270fe941ba4940 Reviewed-on: https://go-review.googlesource.com/c/go/+/244538 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek --- src/runtime/mbitmap.go | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 35332c91c4..cad6f56404 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1403,17 +1403,20 @@ Phase4: // Double check the whole bitmap. if doubleCheck { // x+size may not point to the heap, so back up one - // word and then call next(). - end := heapBitsForAddr(x + size - sys.PtrSize).next() - endAI := arenaIdx(end.arena) - if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { - // The unrolling code above walks hbitp just - // past the bitmap without moving to the next - // arena. Synthesize this for end.bitp. - end.arena-- - endAI = arenaIdx(end.arena) - end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) - end.last = nil + // word and then advance it the way we do above. + end := heapBitsForAddr(x + size - sys.PtrSize) + if outOfPlace { + // In out-of-place copying, we just advance + // using next. + end = end.next() + } else { + // Don't use next because that may advance to + // the next arena and the in-place logic + // doesn't do that. + end.shift += heapBitsShift + if end.shift == 4*heapBitsShift { + end.bitp, end.shift = add1(end.bitp), 0 + } } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) @@ -1437,8 +1440,9 @@ Phase4: var have, want uint8 have = (*h.bitp >> h.shift) & (bitPointer | bitScan) if i >= totalptr { - want = 0 // deadmarker if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { + // heapBitsSetTypeGCProg always fills + // in full nibbles of bitScan. want = bitScan } } else { -- cgit v1.3 From d19fedd180fceb6a60961e19387893ddb047e4e6 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 5 Jun 2020 16:48:03 -0400 Subject: runtime: move checkmarks to a separate bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the GC stores the object marks for checkmarks mode in the heap bitmap using a rather complex encoding: for one word objects, the checkmark is stored in the pointer/scalar bit since one word objects must be pointers; for larger objects, the checkmark is stored in what would be the scan/dead bit for the second word of the object. This encoding made more sense when the runtime used the first scan/dead bit as the regular mark bit, but we moved away from that long ago. This encoding and overloading of the heap bitmap bits causes a great deal of complexity in many parts of the allocator and garbage collector and leads to some subtle bugs like #15903. This CL moves the checkmarks mark bits into their own per-arena bitmap and reclaims the second scan/dead bit as a regular scan/dead bit. I tested this by enabling doubleCheck mode in heapBitsSetType and running in both regular and GODEBUG=gccheckmark=1 mode. Fixes #15903. No performance degradation. (Very slight improvement on a few benchmarks, but it's probably just noise.) name old time/op new time/op delta BiogoIgor 16.6s ± 1% 16.4s ± 1% -0.94% (p=0.000 n=25+24) BiogoKrishna 19.2s ± 3% 19.2s ± 3% ~ (p=0.638 n=23+25) BleveIndexBatch100 6.12s ± 5% 6.17s ± 4% ~ (p=0.170 n=25+25) CompileTemplate 206ms ± 1% 205ms ± 1% -0.43% (p=0.005 n=24+24) CompileUnicode 82.2ms ± 2% 81.5ms ± 2% -0.95% (p=0.001 n=22+22) CompileGoTypes 755ms ± 3% 754ms ± 4% ~ (p=0.715 n=25+25) CompileCompiler 3.73s ± 1% 3.73s ± 1% ~ (p=0.445 n=25+24) CompileSSA 8.67s ± 1% 8.66s ± 1% ~ (p=0.836 n=24+22) CompileFlate 134ms ± 2% 133ms ± 1% -0.66% (p=0.001 n=24+23) CompileGoParser 164ms ± 1% 163ms ± 1% -0.85% (p=0.000 n=24+24) CompileReflect 466ms ± 5% 466ms ± 3% ~ (p=0.863 n=25+25) CompileTar 182ms ± 1% 182ms ± 1% -0.31% (p=0.048 n=24+24) CompileXML 249ms ± 1% 248ms ± 1% -0.32% (p=0.031 n=21+25) CompileStdCmd 10.3s ± 1% 10.3s ± 1% ~ (p=0.459 n=23+23) FoglemanFauxGLRenderRotateBoat 8.66s ± 1% 8.62s ± 1% -0.47% (p=0.000 n=23+24) FoglemanPathTraceRenderGopherIter1 20.3s ± 3% 20.2s ± 2% ~ (p=0.893 n=25+25) GopherLuaKNucleotide 29.7s ± 1% 29.8s ± 2% ~ (p=0.421 n=24+25) MarkdownRenderXHTML 246ms ± 1% 247ms ± 1% ~ (p=0.558 n=25+24) Tile38WithinCircle100kmRequest 779µs ± 4% 779µs ± 3% ~ (p=0.954 n=25+25) Tile38IntersectsCircle100kmRequest 1.02ms ± 3% 1.01ms ± 4% ~ (p=0.658 n=25+25) Tile38KNearestLimit100Request 984µs ± 4% 986µs ± 4% ~ (p=0.627 n=24+25) [Geo mean] 552ms 551ms -0.19% https://perf.golang.org/search?q=upload:20200723.6 Change-Id: Ic703f26a83fb034941dc6f4788fc997d56890dec Reviewed-on: https://go-review.googlesource.com/c/go/+/244539 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek Reviewed-by: Martin Möhrmann --- src/reflect/all_test.go | 5 +- src/runtime/cgocall.go | 2 +- src/runtime/gcinfo_test.go | 19 +---- src/runtime/heapdump.go | 2 +- src/runtime/mbitmap.go | 173 +++++++++------------------------------------ src/runtime/mcheckmark.go | 100 ++++++++++++++++++++++++++ src/runtime/mgc.go | 4 +- src/runtime/mgcmark.go | 70 +----------------- src/runtime/mheap.go | 4 ++ 9 files changed, 148 insertions(+), 231 deletions(-) create mode 100644 src/runtime/mcheckmark.go (limited to 'src/runtime') diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index 6b31568bb9..ed2f225077 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -6467,12 +6467,9 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) { // Repeat the bitmap for the slice size, trimming scalars in // the last element. bits = rep(cap, bits) - for len(bits) > 2 && bits[len(bits)-1] == 0 { + for len(bits) > 0 && bits[len(bits)-1] == 0 { bits = bits[:len(bits)-1] } - if len(bits) == 2 && bits[0] == 0 && bits[1] == 0 { - bits = bits[:0] - } if !bytes.Equal(heapBits, bits) { t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits) } diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index a4e64b00cc..099aa540e0 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -605,7 +605,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { hbits := heapBitsForAddr(base) n := span.elemsize for i = uintptr(0); i < n; i += sys.PtrSize { - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { // No more possible pointers. break } diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index ec1ba90c2e..0808b416f0 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -77,7 +77,7 @@ func TestGCInfo(t *testing.T) { } for i := 0; i < 10; i++ { - verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr))) + verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(infoPtr)) verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10)) verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr)) verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4)) @@ -97,25 +97,10 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) { } } -func padDead(mask []byte) []byte { - // Because the dead bit isn't encoded in the second word, - // and because on 32-bit systems a one-word allocation - // uses a two-word block, the pointer info for a one-word - // object needs to be expanded to include an extra scalar - // on 32-bit systems to match the heap bitmap. - if runtime.PtrSize == 4 && len(mask) == 1 { - return []byte{mask[0], 0} - } - return mask -} - func trimDead(mask []byte) []byte { - for len(mask) > 2 && mask[len(mask)-1] == typeScalar { + for len(mask) > 0 && mask[len(mask)-1] == typeScalar { mask = mask[:len(mask)-1] } - if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar { - mask = mask[:0] - } return mask } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index cfd5c251b4..4c35309211 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -713,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i != 1 && !hbits.morePointers() { + if !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index cad6f56404..8de44c14b9 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -6,10 +6,11 @@ // // Stack, data, and bss bitmaps // -// Stack frames and global variables in the data and bss sections are described -// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer -// to be visited during GC. The bits in each byte are consumed starting with -// the low bit: 1<<0, 1<<1, and so on. +// Stack frames and global variables in the data and bss sections are +// described by bitmaps with 1 bit per pointer-sized word. A "1" bit +// means the word is a live pointer to be visited by the GC (referred to +// as "pointer"). A "0" bit means the word should be ignored by GC +// (referred to as "scalar", though it could be a dead pointer value). // // Heap bitmap // @@ -20,18 +21,13 @@ // through start+3*ptrSize, ha.bitmap[1] holds the entries for // start+4*ptrSize through start+7*ptrSize, and so on. // -// In each 2-bit entry, the lower bit holds the same information as in the 1-bit -// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. -// The meaning of the high bit depends on the position of the word being described -// in its allocated object. In all words *except* the second word, the -// high bit indicates that the object is still being described. In -// these words, if a bit pair with a high bit 0 is encountered, the -// low bit can also be assumed to be 0, and the object description is -// over. This 00 is called the ``dead'' encoding: it signals that the -// rest of the words in the object are uninteresting to the garbage -// collector. -// -// In the second word, the high bit is the GC ``checkmarked'' bit (see below). +// In each 2-bit entry, the lower bit is a pointer/scalar bit, just +// like in the stack/data bitmaps described above. The upper bit +// indicates scan/dead: a "1" value ("scan") indicates that there may +// be pointers in later words of the allocation, and a "0" value +// ("dead") indicates there are no more pointers in the allocation. If +// the upper bit is 0, the lower bit must also be 0, and this +// indicates scanning can ignore the rest of the allocation. // // The 2-bit entries are split when written into the byte, so that the top half // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) @@ -39,38 +35,14 @@ // This form allows a copy from the 1-bit to the 4-bit form to keep the // pointer bits contiguous, instead of having to space them out. // -// The code makes use of the fact that the zero value for a heap bitmap -// has no live pointer bit set and is (depending on position), not used, -// not checkmarked, and is the dead encoding. -// These properties must be preserved when modifying the encoding. +// The code makes use of the fact that the zero value for a heap +// bitmap means scalar/dead. This property must be preserved when +// modifying the encoding. // // The bitmap for noscan spans is not maintained. Code must ensure // that an object is scannable before consulting its bitmap by // checking either the noscan bit in the span or by consulting its // type's information. -// -// Checkmarks -// -// In a concurrent garbage collector, one worries about failing to mark -// a live object due to mutations without write barriers or bugs in the -// collector implementation. As a sanity check, the GC has a 'checkmark' -// mode that retraverses the object graph with the world stopped, to make -// sure that everything that should be marked is marked. -// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry -// for the second word of the object holds the checkmark bit. -// When not in checkmark mode, this bit is set to 1. -// -// The smallest possible allocation is 8 bytes. On a 32-bit machine, that -// means every allocated object has two words, so there is room for the -// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is -// just one word, so the second bit pair is not available for encoding the -// checkmark. However, because non-pointer allocations are combined -// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation -// must be a pointer, so the type bit in the first word is not actually needed. -// It is still used in general, except in checkmark the type bit is repurposed -// as the checkmark bit and then reinitialized (to 1) as the type bit when -// finished. -// package runtime @@ -551,33 +523,6 @@ func (h heapBits) isPointer() bool { return h.bits()&bitPointer != 0 } -// isCheckmarked reports whether the heap bits have the checkmarked bit set. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) isCheckmarked(size uintptr) bool { - if size == sys.PtrSize { - return (*h.bitp>>h.shift)&bitPointer != 0 - } - // All multiword objects are 2-word aligned, - // so we know that the initial word's 2-bit pair - // and the second word's 2-bit pair are in the - // same heap bitmap byte, *h.bitp. - return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 -} - -// setCheckmarked sets the checkmarked bit. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) setCheckmarked(size uintptr) { - if size == sys.PtrSize { - atomic.Or8(h.bitp, bitPointer<= nw { goto Phase3 } @@ -1203,14 +1103,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // We took care of 1-word and 2-word objects above, // so this is at least a 6-word object. hb = (b & (bitPointer | bitPointer< 1 { + hb |= bitScan << (3 * heapBitsShift) + } b >>= 2 nb -= 2 - // Note: no bitScan for second word because that's - // the checkmark. - *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) + *hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) hbitp = add1(hbitp) if w += 2; w >= nw { @@ -1449,11 +1348,7 @@ Phase4: if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { want |= bitPointer } - if i != 1 { - want |= bitScan - } else { - have &^= bitScan - } + want |= bitScan } if have != want { println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) @@ -2013,7 +1908,7 @@ func getgcmask(ep interface{}) (mask []byte) { if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go new file mode 100644 index 0000000000..1fd8e4e78f --- /dev/null +++ b/src/runtime/mcheckmark.go @@ -0,0 +1,100 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GC checkmarks +// +// In a concurrent garbage collector, one worries about failing to mark +// a live object due to mutations without write barriers or bugs in the +// collector implementation. As a sanity check, the GC has a 'checkmark' +// mode that retraverses the object graph with the world stopped, to make +// sure that everything that should be marked is marked. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// A checkmarksMap stores the GC marks in "checkmarks" mode. It is a +// per-arena bitmap with a bit for every word in the arena. The mark +// is stored on the bit corresponding to the first word of the marked +// allocation. +// +//go:notinheap +type checkmarksMap [heapArenaBytes / sys.PtrSize / 8]uint8 + +// If useCheckmark is true, marking of an object uses the checkmark +// bits instead of the standard mark bits. +var useCheckmark = false + +// startCheckmarks prepares for the checkmarks phase. +// +// The world must be stopped. +func startCheckmarks() { + // Clear all checkmarks. + for _, ai := range mheap_.allArenas { + arena := mheap_.arenas[ai.l1()][ai.l2()] + bitmap := arena.checkmarks + + if bitmap == nil { + // Allocate bitmap on first use. + bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys)) + if bitmap == nil { + throw("out of memory allocating checkmarks bitmap") + } + arena.checkmarks = bitmap + } else { + // Otherwise clear the existing bitmap. + for i := range bitmap { + bitmap[i] = 0 + } + } + } + // Enable checkmarking. + useCheckmark = true +} + +// endCheckmarks ends the checkmarks phase. +func endCheckmarks() { + if gcMarkWorkAvailable(nil) { + throw("GC work not flushed") + } + useCheckmark = false +} + +// setCheckmark throws if marking object is a checkmarks violation, +// and otherwise sets obj's checkmark. It returns true if obj was +// already checkmarked. +func setCheckmark(obj, base, off uintptr, mbits markBits) bool { + if !mbits.isMarked() { + printlock() + print("runtime: checkmarks found unexpected unmarked object obj=", hex(obj), "\n") + print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") + + // Dump the source (base) object + gcDumpObject("base", base, off) + + // Dump the object + gcDumpObject("obj", obj, ^uintptr(0)) + + getg().m.traceback = 2 + throw("checkmark found unmarked object") + } + + ai := arenaIndex(obj) + arena := mheap_.arenas[ai.l1()][ai.l2()] + arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks)) + mask := byte(1 << ((obj / heapArenaBytes) % 8)) + bytep := &arena.checkmarks[arenaWord] + + if atomic.Load8(bytep)&mask != 0 { + // Already checkmarked. + return true + } + + atomic.Or8(bytep, mask) + return false +} diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b3499516f6..c8c4a4c758 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1670,13 +1670,13 @@ func gcMarkTermination(nextTriggerRatio float64) { // mark using checkmark bits, to check that we // didn't forget to mark anything during the // concurrent mark process. + startCheckmarks() gcResetMarkState() - initCheckmarks() gcw := &getg().m.p.ptr().gcw gcDrain(gcw, 0) wbBufFlush1(getg().m.p.ptr()) gcw.dispose() - clearCheckmarks() + endCheckmarks() } // marking is complete so we can turn the write barrier off diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index fe988c46d9..96910ff729 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1354,11 +1354,7 @@ func scanobject(b uintptr, gcw *gcWork) { } // Load bits once. See CL 22712 and issue 16973 for discussion. bits := hbits.bits() - // During checkmarking, 1-word objects store the checkmark - // in the type bit for the one word. The only one-word objects - // are pointers, or else they'd be merged with other non-pointer - // data into larger allocations. - if i != 1*sys.PtrSize && bits&bitScan == 0 { + if bits&bitScan == 0 { break // no more pointers in this object } if bits&bitPointer == 0 { @@ -1511,28 +1507,10 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp mbits := span.markBitsForIndex(objIndex) if useCheckmark { - if !mbits.isMarked() { - printlock() - print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") - print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") - - // Dump the source (base) object - gcDumpObject("base", base, off) - - // Dump the object - gcDumpObject("obj", obj, ^uintptr(0)) - - getg().m.traceback = 2 - throw("checkmark found unmarked object") - } - hbits := heapBitsForAddr(obj) - if hbits.isCheckmarked(span.elemsize) { + if setCheckmark(obj, base, off, mbits) { + // Already marked. return } - hbits.setCheckmarked(span.elemsize) - if !hbits.isCheckmarked(span.elemsize) { - throw("setCheckmarked and isCheckmarked disagree") - } } else { if debug.gccheckmark > 0 && span.isFree(objIndex) { print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") @@ -1661,45 +1639,3 @@ func gcMarkTinyAllocs() { greyobject(c.tiny, 0, 0, span, gcw, objIndex) } } - -// Checkmarking - -// To help debug the concurrent GC we remark with the world -// stopped ensuring that any object encountered has their normal -// mark bit set. To do this we use an orthogonal bit -// pattern to indicate the object is marked. The following pattern -// uses the upper two bits in the object's boundary nibble. -// 01: scalar not marked -// 10: pointer not marked -// 11: pointer marked -// 00: scalar marked -// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. -// The higher bit is 1 for pointers and 0 for scalars, whether the object -// is marked or not. -// The first nibble no longer holds the typeDead pattern indicating that the -// there are no more pointers in the object. This information is held -// in the second nibble. - -// If useCheckmark is true, marking of an object uses the -// checkmark bits (encoding above) instead of the standard -// mark bits. -var useCheckmark = false - -//go:nowritebarrier -func initCheckmarks() { - useCheckmark = true - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) - } - } -} - -func clearCheckmarks() { - useCheckmark = false - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) - } - } -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 2c7bfd8a59..6341375160 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -300,6 +300,10 @@ type heapArena struct { // during marking. pageSpecials [pagesPerArena / 8]uint8 + // checkmarks stores the debug.gccheckmark state. It is only + // used if debug.gccheckmark > 0. + checkmarks *checkmarksMap + // zeroedBase marks the first byte of the first page in this // arena which hasn't been used yet and is therefore already // zero. zeroedBase is relative to the arena base. -- cgit v1.3 From 7ee26224436d80dca3f7e98c8fcf21185522d8e6 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 12 Aug 2020 20:27:57 -0400 Subject: cmd/link: link dynamic library automatically cgo_import_dynamic pragma indicates a symbol is imported from a dynamic library. Currently, the linker does not actually link against the dynamic library, so we have to "force" it by using //go:cgo_import_dynamic _ _ "dylib" syntax, which links in the library unconditionally. This CL changes it to link in the library automatically when a symbol is imported from the library, without using the "force" syntax. (The "force" syntax is still supported.) Remove the unconditional imports in the runtime. Now, Security.framework and CoreFoundation.framework are only linked when the x509 package is imported (or otherwise specified). Fixes #40727. Change-Id: Ied36b1f621cdcc5dc4a8f497cdf1c554a182d0e0 Reviewed-on: https://go-review.googlesource.com/c/go/+/248333 Run-TryBot: Cherry Zhang Reviewed-by: Filippo Valsorda Reviewed-by: Than McIntosh Reviewed-by: Keith Randall TryBot-Result: Gobot Gobot --- src/cmd/link/internal/ld/go.go | 3 +++ src/runtime/sys_darwin.go | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/cmd/link/internal/ld/go.go b/src/cmd/link/internal/ld/go.go index bf5c9ca1ba..b3541c46c0 100644 --- a/src/cmd/link/internal/ld/go.go +++ b/src/cmd/link/internal/ld/go.go @@ -183,6 +183,9 @@ func setCgoAttr(ctxt *Link, lookup func(string, int) loader.Sym, file string, pk hostObjSyms[s] = struct{}{} } havedynamic = 1 + if lib != "" && ctxt.IsDarwin() { + machoadddynlib(lib, ctxt.LinkMode) + } } continue diff --git a/src/runtime/sys_darwin.go b/src/runtime/sys_darwin.go index 06474434c9..e4f19bbf41 100644 --- a/src/runtime/sys_darwin.go +++ b/src/runtime/sys_darwin.go @@ -489,9 +489,3 @@ func setNonblock(fd int32) { //go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib" //go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib" //go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib" - -// Magic incantation to get libSystem and friends actually dynamically linked. -// TODO: Why does the code require this? See cmd/link/internal/ld/go.go -//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib" -//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/Security.framework/Versions/A/Security" -//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation" -- cgit v1.3 From 260dff3ca3b06385dc298523791a2079162f546e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 19 Feb 2020 19:45:57 +0000 Subject: runtime: clean up old markrootSpans This change removes the old markrootSpans implementation and deletes the feature flag. Updates #37487. Change-Id: Idb5a2559abcc3be5a7da6f2ccce1a86e1d7634e3 Reviewed-on: https://go-review.googlesource.com/c/go/+/221183 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Michael Pratt Reviewed-by: Austin Clements --- src/runtime/mgcmark.go | 120 ++++----------------------------------------- src/runtime/mgcsweep.go | 2 +- src/runtime/mgcsweepbuf.go | 38 -------------- src/runtime/mheap.go | 8 ++- 4 files changed, 14 insertions(+), 154 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 96910ff729..2b84945471 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -47,10 +47,6 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divide pagesPerArena. pagesPerSpanRoot = 512 - - // go115NewMarkrootSpans is a feature flag that indicates whether - // to use the new bitmap-based markrootSpans implementation. - go115NewMarkrootSpans = true ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and @@ -87,24 +83,16 @@ func gcMarkRootPrepare() { // // We depend on addfinalizer to mark objects that get // finalizers after root marking. - if go115NewMarkrootSpans { - // We're going to scan the whole heap (that was available at the time the - // mark phase started, i.e. markArenas) for in-use spans which have specials. - // - // Break up the work into arenas, and further into chunks. - // - // Snapshot allArenas as markArenas. This snapshot is safe because allArenas - // is append-only. - mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] - work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) - } else { - // We're only interested in scanning the in-use spans, - // which will all be swept at this point. More spans - // may be added to this list during concurrent GC, but - // we only care about spans that were allocated before - // this mark phase. - work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() - } + // + // We're going to scan the whole heap (that was available at the time the + // mark phase started, i.e. markArenas) for in-use spans which have specials. + // + // Break up the work into arenas, and further into chunks. + // + // Snapshot allArenas as markArenas. This snapshot is safe because allArenas + // is append-only. + mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] + work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) // Scan stacks. // @@ -316,10 +304,6 @@ func markrootFreeGStacks() { // //go:nowritebarrier func markrootSpans(gcw *gcWork, shard int) { - if !go115NewMarkrootSpans { - oldMarkrootSpans(gcw, shard) - return - } // Objects with finalizers have two GC-related invariants: // // 1) Everything reachable from the object must be marked. @@ -396,90 +380,6 @@ func markrootSpans(gcw *gcWork, shard int) { } } -// oldMarkrootSpans marks roots for one shard of work.spans. -// -// For go115NewMarkrootSpans = false. -// -//go:nowritebarrier -func oldMarkrootSpans(gcw *gcWork, shard int) { - // Objects with finalizers have two GC-related invariants: - // - // 1) Everything reachable from the object must be marked. - // This ensures that when we pass the object to its finalizer, - // everything the finalizer can reach will be retained. - // - // 2) Finalizer specials (which are not in the garbage - // collected heap) are roots. In practice, this means the fn - // field must be scanned. - // - // TODO(austin): There are several ideas for making this more - // efficient in issue #11485. - - sg := mheap_.sweepgen - spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) - // Note that work.spans may not include spans that were - // allocated between entering the scan phase and now. We may - // also race with spans being added into sweepSpans when they're - // just created, and as a result we may see nil pointers in the - // spans slice. This is okay because any objects with finalizers - // in those spans must have been allocated and given finalizers - // after we entered the scan phase, so addfinalizer will have - // ensured the above invariants for them. - for i := 0; i < len(spans); i++ { - // sweepBuf.block requires that we read pointers from the block atomically. - // It also requires that we ignore nil pointers. - s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i]))) - - // This is racing with spans being initialized, so - // check the state carefully. - if s == nil || s.state.get() != mSpanInUse { - continue - } - // Check that this span was swept (it may be cached or uncached). - if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { - // sweepgen was updated (+2) during non-checkmark GC pass - print("sweep ", s.sweepgen, " ", sg, "\n") - throw("gc: unswept span") - } - - // Speculatively check if there are any specials - // without acquiring the span lock. This may race with - // adding the first special to a span, but in that - // case addfinalizer will observe that the GC is - // active (which is globally synchronized) and ensure - // the above invariants. We may also ensure the - // invariants, but it's okay to scan an object twice. - if s.specials == nil { - continue - } - - // Lock the specials to prevent a special from being - // removed from the list while we're traversing it. - lock(&s.speciallock) - - for sp := s.specials; sp != nil; sp = sp.next { - if sp.kind != _KindSpecialFinalizer { - continue - } - // don't mark finalized object, but scan it so we - // retain everything it points to. - spf := (*specialfinalizer)(unsafe.Pointer(sp)) - // A finalizer can be set for an inner byte of an object, find object beginning. - p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize - - // Mark everything that can be reached from - // the object (but *not* the object itself or - // we'll never collect it). - scanobject(p, gcw) - - // The special itself is a root. - scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil) - } - - unlock(&s.speciallock) - } -} - // gcAssistAlloc performs GC work to make gp's assist debt positive. // gp must be the calling user gorountine. // diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 3aa3afc028..9244174403 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -662,7 +662,7 @@ func (s *mspan) oldSweep(preserve bool) bool { special = *specialp } } - if go115NewMarkrootSpans && hadSpecials && s.specials == nil { + if hadSpecials && s.specials == nil { spanHasNoSpecials(s) } diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go index 1f722c3d58..5e5ca3dd2f 100644 --- a/src/runtime/mgcsweepbuf.go +++ b/src/runtime/mgcsweepbuf.go @@ -136,41 +136,3 @@ func (b *gcSweepBuf) pop() *mspan { block.spans[bottom] = nil return s } - -// numBlocks returns the number of blocks in buffer b. numBlocks is -// safe to call concurrently with any other operation. Spans that have -// been pushed prior to the call to numBlocks are guaranteed to appear -// in some block in the range [0, numBlocks()), assuming there are no -// intervening pops. Spans that are pushed after the call may also -// appear in these blocks. -func (b *gcSweepBuf) numBlocks() int { - return int(divRoundUp(uintptr(atomic.Load(&b.index)), gcSweepBlockEntries)) -} - -// block returns the spans in the i'th block of buffer b. block is -// safe to call concurrently with push. The block may contain nil -// pointers that must be ignored, and each entry in the block must be -// loaded atomically. -func (b *gcSweepBuf) block(i int) []*mspan { - // Perform bounds check before loading spine address since - // push ensures the allocated length is at least spineLen. - if i < 0 || uintptr(i) >= atomic.Loaduintptr(&b.spineLen) { - throw("block index out of range") - } - - // Get block i. - spine := atomic.Loadp(unsafe.Pointer(&b.spine)) - blockp := add(spine, sys.PtrSize*uintptr(i)) - block := (*gcSweepBlock)(atomic.Loadp(blockp)) - - // Slice the block if necessary. - cursor := uintptr(atomic.Load(&b.index)) - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - var spans []*mspan - if uintptr(i) < top { - spans = block.spans[:] - } else { - spans = block.spans[:bottom] - } - return spans -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 6341375160..0807726863 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -52,7 +52,7 @@ const ( // The definition of this flag helps ensure that if there's a problem with // the new markroot spans implementation and it gets turned off, that the new // mcentral implementation also gets turned off so the runtime isn't broken. - go115NewMCentralImpl = true && go115NewMarkrootSpans + go115NewMCentralImpl = true ) // Main malloc heap. @@ -1705,9 +1705,7 @@ func addspecial(p unsafe.Pointer, s *special) bool { s.offset = uint16(offset) s.next = *t *t = s - if go115NewMarkrootSpans { - spanHasSpecials(span) - } + spanHasSpecials(span) unlock(&span.speciallock) releasem(mp) @@ -1748,7 +1746,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { } t = &s.next } - if go115NewMarkrootSpans && span.specials == nil { + if span.specials == nil { spanHasNoSpecials(span) } unlock(&span.speciallock) -- cgit v1.3 From e6d0bd2b8951bde6f0ac6421f20e18efc7ba0cdb Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 19 Feb 2020 16:37:48 +0000 Subject: runtime: clean up old mcentral code This change deletes the old mcentral implementation from the code base and the newMCentralImpl feature flag along with it. Updates #37487. Change-Id: Ibca8f722665f0865051f649ffe699cbdbfdcfcf2 Reviewed-on: https://go-review.googlesource.com/c/go/+/221184 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: Michael Pratt --- src/runtime/lockrank.go | 16 +-- src/runtime/malloc.go | 8 +- src/runtime/mcache.go | 6 +- src/runtime/mcentral.go | 239 +-------------------------------------------- src/runtime/mgc.go | 10 +- src/runtime/mgcsweep.go | 237 ++------------------------------------------ src/runtime/mgcsweepbuf.go | 138 -------------------------- src/runtime/mheap.go | 36 +------ 8 files changed, 25 insertions(+), 665 deletions(-) delete mode 100644 src/runtime/mgcsweepbuf.go (limited to 'src/runtime') diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go index 000193585d..b23cf767be 100644 --- a/src/runtime/lockrank.go +++ b/src/runtime/lockrank.go @@ -67,8 +67,6 @@ const ( lockRankRwmutexW lockRankRwmutexR - lockRankMcentral // For !go115NewMCentralImpl - lockRankSpine // For !go115NewMCentralImpl lockRankSpanSetSpine lockRankGscan lockRankStackpool @@ -149,8 +147,6 @@ var lockNames = []string{ lockRankRwmutexW: "rwmutexW", lockRankRwmutexR: "rwmutexR", - lockRankMcentral: "mcentral", - lockRankSpine: "spine", lockRankSpanSetSpine: "spanSetSpine", lockRankGscan: "gscan", lockRankStackpool: "stackpool", @@ -228,18 +224,16 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankRwmutexW: {}, lockRankRwmutexR: {lockRankRwmutexW}, - lockRankMcentral: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankSpine: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine}, - lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan}, - lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan}, + lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine}, + lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan}, + lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan}, lockRankDefer: {}, lockRankSudog: {lockRankNotifyList, lockRankHchan}, lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog}, - lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, + lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, - lockRankGlobalAlloc: {lockRankProf, lockRankSpine, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, + lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, lockRankGFree: {lockRankSched}, lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf}, diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index b3fac3de24..e46327f9ce 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1178,11 +1178,9 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { if s == nil { throw("out of memory") } - if go115NewMCentralImpl { - // Put the large span in the mcentral swept list so that it's - // visible to the background sweeper. - mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) - } + // Put the large span in the mcentral swept list so that it's + // visible to the background sweeper. + mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) s.limit = s.base() + size heapBitsForAddr(s.base()).initSpan(s) return s diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 5bceb51ac9..7a7d33ccae 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -131,11 +131,7 @@ func (c *mcache) refill(spc spanClass) { if s.sweepgen != mheap_.sweepgen+3 { throw("bad sweepgen in refill") } - if go115NewMCentralImpl { - mheap_.central[spc].mcentral.uncacheSpan(s) - } else { - atomic.Store(&s.sweepgen, mheap_.sweepgen) - } + mheap_.central[spc].mcentral.uncacheSpan(s) } // Get a new cached span from the central lists. diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index ed49d86d0c..ed49e01677 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -18,7 +18,6 @@ import "runtime/internal/atomic" // //go:notinheap type mcentral struct { - lock mutex spanclass spanClass // For !go115NewMCentralImpl. @@ -55,16 +54,10 @@ type mcentral struct { // Initialize a single central free list. func (c *mcentral) init(spc spanClass) { c.spanclass = spc - if go115NewMCentralImpl { - lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine) - lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine) - lockInit(&c.full[0].spineLock, lockRankSpanSetSpine) - lockInit(&c.full[1].spineLock, lockRankSpanSetSpine) - } else { - c.nonempty.init() - c.empty.init() - lockInit(&c.lock, lockRankMcentral) - } + lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine) + lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine) + lockInit(&c.full[0].spineLock, lockRankSpanSetSpine) + lockInit(&c.full[1].spineLock, lockRankSpanSetSpine) } // partialUnswept returns the spanSet which holds partially-filled @@ -93,9 +86,6 @@ func (c *mcentral) fullSwept(sweepgen uint32) *spanSet { // Allocate a span to use in an mcache. func (c *mcentral) cacheSpan() *mspan { - if !go115NewMCentralImpl { - return c.oldCacheSpan() - } // Deduct credit for this span allocation and sweep if necessary. spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize deductSweepCredit(spanBytes, 0) @@ -213,127 +203,11 @@ havespan: return s } -// Allocate a span to use in an mcache. -// -// For !go115NewMCentralImpl. -func (c *mcentral) oldCacheSpan() *mspan { - // Deduct credit for this span allocation and sweep if necessary. - spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize - deductSweepCredit(spanBytes, 0) - - lock(&c.lock) - traceDone := false - if trace.enabled { - traceGCSweepStart() - } - sg := mheap_.sweepgen -retry: - var s *mspan - for s = c.nonempty.first; s != nil; s = s.next { - if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { - c.nonempty.remove(s) - c.empty.insertBack(s) - unlock(&c.lock) - s.sweep(true) - goto havespan - } - if s.sweepgen == sg-1 { - // the span is being swept by background sweeper, skip - continue - } - // we have a nonempty span that does not require sweeping, allocate from it - c.nonempty.remove(s) - c.empty.insertBack(s) - unlock(&c.lock) - goto havespan - } - - for s = c.empty.first; s != nil; s = s.next { - if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { - // we have an empty span that requires sweeping, - // sweep it and see if we can free some space in it - c.empty.remove(s) - // swept spans are at the end of the list - c.empty.insertBack(s) - unlock(&c.lock) - s.sweep(true) - freeIndex := s.nextFreeIndex() - if freeIndex != s.nelems { - s.freeindex = freeIndex - goto havespan - } - lock(&c.lock) - // the span is still empty after sweep - // it is already in the empty list, so just retry - goto retry - } - if s.sweepgen == sg-1 { - // the span is being swept by background sweeper, skip - continue - } - // already swept empty span, - // all subsequent ones must also be either swept or in process of sweeping - break - } - if trace.enabled { - traceGCSweepDone() - traceDone = true - } - unlock(&c.lock) - - // Replenish central list if empty. - s = c.grow() - if s == nil { - return nil - } - lock(&c.lock) - c.empty.insertBack(s) - unlock(&c.lock) - - // At this point s is a non-empty span, queued at the end of the empty list, - // c is unlocked. -havespan: - if trace.enabled && !traceDone { - traceGCSweepDone() - } - n := int(s.nelems) - int(s.allocCount) - if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { - throw("span has no free objects") - } - // Assume all objects from this span will be allocated in the - // mcache. If it gets uncached, we'll adjust this. - atomic.Xadd64(&c.nmalloc, int64(n)) - usedBytes := uintptr(s.allocCount) * s.elemsize - atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes)) - if trace.enabled { - // heap_live changed. - traceHeapAlloc() - } - if gcBlackenEnabled != 0 { - // heap_live changed. - gcController.revise() - } - freeByteBase := s.freeindex &^ (64 - 1) - whichByte := freeByteBase / 8 - // Init alloc bits cache. - s.refillAllocCache(whichByte) - - // Adjust the allocCache so that s.freeindex corresponds to the low bit in - // s.allocCache. - s.allocCache >>= s.freeindex % 64 - - return s -} - // Return span from an mcache. // // s must have a span class corresponding to this // mcentral and it must not be empty. func (c *mcentral) uncacheSpan(s *mspan) { - if !go115NewMCentralImpl { - c.oldUncacheSpan(s) - return - } if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } @@ -393,111 +267,6 @@ func (c *mcentral) uncacheSpan(s *mspan) { } } -// Return span from an mcache. -// -// For !go115NewMCentralImpl. -func (c *mcentral) oldUncacheSpan(s *mspan) { - if s.allocCount == 0 { - throw("uncaching span but s.allocCount == 0") - } - - sg := mheap_.sweepgen - stale := s.sweepgen == sg+1 - if stale { - // Span was cached before sweep began. It's our - // responsibility to sweep it. - // - // Set sweepgen to indicate it's not cached but needs - // sweeping and can't be allocated from. sweep will - // set s.sweepgen to indicate s is swept. - atomic.Store(&s.sweepgen, sg-1) - } else { - // Indicate that s is no longer cached. - atomic.Store(&s.sweepgen, sg) - } - - n := int(s.nelems) - int(s.allocCount) - if n > 0 { - // cacheSpan updated alloc assuming all objects on s - // were going to be allocated. Adjust for any that - // weren't. We must do this before potentially - // sweeping the span. - atomic.Xadd64(&c.nmalloc, -int64(n)) - - lock(&c.lock) - c.empty.remove(s) - c.nonempty.insert(s) - if !stale { - // mCentral_CacheSpan conservatively counted - // unallocated slots in heap_live. Undo this. - // - // If this span was cached before sweep, then - // heap_live was totally recomputed since - // caching this span, so we don't do this for - // stale spans. - atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) - } - unlock(&c.lock) - } - - if stale { - // Now that s is in the right mcentral list, we can - // sweep it. - s.sweep(false) - } -} - -// freeSpan updates c and s after sweeping s. -// It sets s's sweepgen to the latest generation, -// and, based on the number of free objects in s, -// moves s to the appropriate list of c or returns it -// to the heap. -// freeSpan reports whether s was returned to the heap. -// If preserve=true, it does not move s (the caller -// must take care of it). -// -// For !go115NewMCentralImpl. -func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { - if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 { - throw("freeSpan given cached span") - } - s.needzero = 1 - - if preserve { - // preserve is set only when called from (un)cacheSpan above, - // the span must be in the empty list. - if !s.inList() { - throw("can't preserve unlinked span") - } - atomic.Store(&s.sweepgen, mheap_.sweepgen) - return false - } - - lock(&c.lock) - - // Move to nonempty if necessary. - if wasempty { - c.empty.remove(s) - c.nonempty.insert(s) - } - - // delay updating sweepgen until here. This is the signal that - // the span may be used in an mcache, so it must come after the - // linked list operations above (actually, just after the - // lock of c above.) - atomic.Store(&s.sweepgen, mheap_.sweepgen) - - if s.allocCount != 0 { - unlock(&c.lock) - return false - } - - c.nonempty.remove(s) - unlock(&c.lock) - mheap_.freeSpan(s) - return true -} - // grow allocates a new empty span from the heap and initializes it for c's size class. func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c8c4a4c758..bd87144355 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2149,21 +2149,13 @@ func gcSweep(mode gcMode) { lock(&mheap_.lock) mheap_.sweepgen += 2 mheap_.sweepdone = 0 - if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { - // We should have drained this list during the last - // sweep phase. We certainly need to start this phase - // with an empty swept list. - throw("non-empty swept list") - } mheap_.pagesSwept = 0 mheap_.sweepArenas = mheap_.allArenas mheap_.reclaimIndex = 0 mheap_.reclaimCredit = 0 unlock(&mheap_.lock) - if go115NewMCentralImpl { - sweep.centralIndex.clear() - } + sweep.centralIndex.clear() if !_ConcurrentSweep || mode == gcForceBlockMode { // Special case synchronous sweep. diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 9244174403..6b8c56ce35 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -132,17 +132,15 @@ func finishsweep_m() { sweep.npausesweep++ } - if go115NewMCentralImpl { - // Reset all the unswept buffers, which should be empty. - // Do this in sweep termination as opposed to mark termination - // so that we can catch unswept spans and reclaim blocks as - // soon as possible. - sg := mheap_.sweepgen - for i := range mheap_.central { - c := &mheap_.central[i].mcentral - c.partialUnswept(sg).reset() - c.fullUnswept(sg).reset() - } + // Reset all the unswept buffers, which should be empty. + // Do this in sweep termination as opposed to mark termination + // so that we can catch unswept spans and reclaim blocks as + // soon as possible. + sg := mheap_.sweepgen + for i := range mheap_.central { + c := &mheap_.central[i].mcentral + c.partialUnswept(sg).reset() + c.fullUnswept(sg).reset() } // Sweeping is done, so if the scavenger isn't already awake, @@ -202,11 +200,7 @@ func sweepone() uintptr { var s *mspan sg := mheap_.sweepgen for { - if go115NewMCentralImpl { - s = mheap_.nextSpanForSweep() - } else { - s = mheap_.sweepSpans[1-sg/2%2].pop() - } + s = mheap_.nextSpanForSweep() if s == nil { atomic.Store(&mheap_.sweepdone, 1) break @@ -322,9 +316,6 @@ func (s *mspan) ensureSwept() { // If preserve=true, don't return it to heap nor relink in mcentral lists; // caller takes care of it. func (s *mspan) sweep(preserve bool) bool { - if !go115NewMCentralImpl { - return s.oldSweep(preserve) - } // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() @@ -568,214 +559,6 @@ func (s *mspan) sweep(preserve bool) bool { return false } -// Sweep frees or collects finalizers for blocks not marked in the mark phase. -// It clears the mark bits in preparation for the next GC round. -// Returns true if the span was returned to heap. -// If preserve=true, don't return it to heap nor relink in mcentral lists; -// caller takes care of it. -// -// For !go115NewMCentralImpl. -func (s *mspan) oldSweep(preserve bool) bool { - // It's critical that we enter this function with preemption disabled, - // GC must not start while we are in the middle of this function. - _g_ := getg() - if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { - throw("mspan.sweep: m is not locked") - } - sweepgen := mheap_.sweepgen - if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("mspan.sweep: bad span state") - } - - if trace.enabled { - traceGCSweepSpan(s.npages * _PageSize) - } - - atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) - - spc := s.spanclass - size := s.elemsize - res := false - - c := _g_.m.p.ptr().mcache - freeToHeap := false - - // The allocBits indicate which unmarked objects don't need to be - // processed since they were free at the end of the last GC cycle - // and were not allocated since then. - // If the allocBits index is >= s.freeindex and the bit - // is not marked then the object remains unallocated - // since the last GC. - // This situation is analogous to being on a freelist. - - // Unlink & free special records for any objects we're about to free. - // Two complications here: - // 1. An object can have both finalizer and profile special records. - // In such case we need to queue finalizer for execution, - // mark the object as live and preserve the profile special. - // 2. A tiny object can have several finalizers setup for different offsets. - // If such object is not marked, we need to queue all finalizers at once. - // Both 1 and 2 are possible at the same time. - hadSpecials := s.specials != nil - specialp := &s.specials - special := *specialp - for special != nil { - // A finalizer can be set for an inner byte of an object, find object beginning. - objIndex := uintptr(special.offset) / size - p := s.base() + objIndex*size - mbits := s.markBitsForIndex(objIndex) - if !mbits.isMarked() { - // This object is not marked and has at least one special record. - // Pass 1: see if it has at least one finalizer. - hasFin := false - endOffset := p - s.base() + size - for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { - if tmp.kind == _KindSpecialFinalizer { - // Stop freeing of object if it has a finalizer. - mbits.setMarkedNonAtomic() - hasFin = true - break - } - } - // Pass 2: queue all finalizers _or_ handle profile record. - for special != nil && uintptr(special.offset) < endOffset { - // Find the exact byte for which the special was setup - // (as opposed to object beginning). - p := s.base() + uintptr(special.offset) - if special.kind == _KindSpecialFinalizer || !hasFin { - // Splice out special record. - y := special - special = special.next - *specialp = special - freespecial(y, unsafe.Pointer(p), size) - } else { - // This is profile record, but the object has finalizers (so kept alive). - // Keep special record. - specialp = &special.next - special = *specialp - } - } - } else { - // object is still live: keep special record - specialp = &special.next - special = *specialp - } - } - if hadSpecials && s.specials == nil { - spanHasNoSpecials(s) - } - - if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled { - // Find all newly freed objects. This doesn't have to - // efficient; allocfreetrace has massive overhead. - mbits := s.markBitsForBase() - abits := s.allocBitsForIndex(0) - for i := uintptr(0); i < s.nelems; i++ { - if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { - x := s.base() + i*s.elemsize - if debug.allocfreetrace != 0 { - tracefree(unsafe.Pointer(x), size) - } - if debug.clobberfree != 0 { - clobberfree(unsafe.Pointer(x), size) - } - if raceenabled { - racefree(unsafe.Pointer(x), size) - } - if msanenabled { - msanfree(unsafe.Pointer(x), size) - } - } - mbits.advance() - abits.advance() - } - } - - // Count the number of free objects in this span. - nalloc := uint16(s.countAlloc()) - if spc.sizeclass() == 0 && nalloc == 0 { - s.needzero = 1 - freeToHeap = true - } - nfreed := s.allocCount - nalloc - if nalloc > s.allocCount { - print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") - throw("sweep increased allocation count") - } - - s.allocCount = nalloc - wasempty := s.nextFreeIndex() == s.nelems - s.freeindex = 0 // reset allocation index to start of span. - if trace.enabled { - getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize - } - - // gcmarkBits becomes the allocBits. - // get a fresh cleared gcmarkBits in preparation for next GC - s.allocBits = s.gcmarkBits - s.gcmarkBits = newMarkBits(s.nelems) - - // Initialize alloc bits cache. - s.refillAllocCache(0) - - // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, - // because of the potential for a concurrent free/SetFinalizer. - // But we need to set it before we make the span available for allocation - // (return it to heap or mcentral), because allocation code assumes that a - // span is already swept if available for allocation. - if freeToHeap || nfreed == 0 { - // The span must be in our exclusive ownership until we update sweepgen, - // check for potential races. - if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("mspan.sweep: bad span state after sweep") - } - // Serialization point. - // At this point the mark bits are cleared and allocation ready - // to go so release the span. - atomic.Store(&s.sweepgen, sweepgen) - } - - if nfreed > 0 && spc.sizeclass() != 0 { - c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) - res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty) - // mcentral.freeSpan updates sweepgen - } else if freeToHeap { - // Free large span to heap - - // NOTE(rsc,dvyukov): The original implementation of efence - // in CL 22060046 used sysFree instead of sysFault, so that - // the operating system would eventually give the memory - // back to us again, so that an efence program could run - // longer without running out of memory. Unfortunately, - // calling sysFree here without any kind of adjustment of the - // heap data structures means that when the memory does - // come back to us, we have the wrong metadata for it, either in - // the mspan structures or in the garbage collection bitmap. - // Using sysFault here means that the program will run out of - // memory fairly quickly in efence mode, but at least it won't - // have mysterious crashes due to confused memory reuse. - // It should be possible to switch back to sysFree if we also - // implement and then call some kind of mheap.deleteSpan. - if debug.efence > 0 { - s.limit = 0 // prevent mlookup from finding this span - sysFault(unsafe.Pointer(s.base()), size) - } else { - mheap_.freeSpan(s) - } - c.local_nlargefree++ - c.local_largefree += size - res = true - } - if !res { - // The span has been swept and is still in-use, so put - // it on the swept in-use list. - mheap_.sweepSpans[sweepgen/2%2].push(s) - } - return res -} - // reportZombies reports any marked but free objects in s and throws. // // This generally means one of the following: diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go deleted file mode 100644 index 5e5ca3dd2f..0000000000 --- a/src/runtime/mgcsweepbuf.go +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package runtime - -import ( - "internal/cpu" - "runtime/internal/atomic" - "runtime/internal/sys" - "unsafe" -) - -// A gcSweepBuf is a set of *mspans. -// -// gcSweepBuf is safe for concurrent push operations *or* concurrent -// pop operations, but not both simultaneously. -type gcSweepBuf struct { - // A gcSweepBuf is a two-level data structure consisting of a - // growable spine that points to fixed-sized blocks. The spine - // can be accessed without locks, but adding a block or - // growing it requires taking the spine lock. - // - // Because each mspan covers at least 8K of heap and takes at - // most 8 bytes in the gcSweepBuf, the growth of the spine is - // quite limited. - // - // The spine and all blocks are allocated off-heap, which - // allows this to be used in the memory manager and avoids the - // need for write barriers on all of these. We never release - // this memory because there could be concurrent lock-free - // access and we're likely to reuse it anyway. (In principle, - // we could do this during STW.) - - spineLock mutex - spine unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically - spineLen uintptr // Spine array length, accessed atomically - spineCap uintptr // Spine array cap, accessed under lock - - // index is the first unused slot in the logical concatenation - // of all blocks. It is accessed atomically. - index uint32 -} - -const ( - gcSweepBlockEntries = 512 // 4KB on 64-bit - gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit -) - -type gcSweepBlock struct { - spans [gcSweepBlockEntries]*mspan -} - -// push adds span s to buffer b. push is safe to call concurrently -// with other push operations, but NOT to call concurrently with pop. -func (b *gcSweepBuf) push(s *mspan) { - // Obtain our slot. - cursor := uintptr(atomic.Xadd(&b.index, +1) - 1) - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - - // Do we need to add a block? - spineLen := atomic.Loaduintptr(&b.spineLen) - var block *gcSweepBlock -retry: - if top < spineLen { - spine := atomic.Loadp(unsafe.Pointer(&b.spine)) - blockp := add(spine, sys.PtrSize*top) - block = (*gcSweepBlock)(atomic.Loadp(blockp)) - } else { - // Add a new block to the spine, potentially growing - // the spine. - lock(&b.spineLock) - // spineLen cannot change until we release the lock, - // but may have changed while we were waiting. - spineLen = atomic.Loaduintptr(&b.spineLen) - if top < spineLen { - unlock(&b.spineLock) - goto retry - } - - if spineLen == b.spineCap { - // Grow the spine. - newCap := b.spineCap * 2 - if newCap == 0 { - newCap = gcSweepBufInitSpineCap - } - newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys) - if b.spineCap != 0 { - // Blocks are allocated off-heap, so - // no write barriers. - memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) - } - // Spine is allocated off-heap, so no write barrier. - atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) - b.spineCap = newCap - // We can't immediately free the old spine - // since a concurrent push with a lower index - // could still be reading from it. We let it - // leak because even a 1TB heap would waste - // less than 2MB of memory on old spines. If - // this is a problem, we could free old spines - // during STW. - } - - // Allocate a new block and add it to the spine. - block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), cpu.CacheLineSize, &memstats.gc_sys)) - blockp := add(b.spine, sys.PtrSize*top) - // Blocks are allocated off-heap, so no write barrier. - atomic.StorepNoWB(blockp, unsafe.Pointer(block)) - atomic.Storeuintptr(&b.spineLen, spineLen+1) - unlock(&b.spineLock) - } - - // We have a block. Insert the span atomically, since there may be - // concurrent readers via the block API. - atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s)) -} - -// pop removes and returns a span from buffer b, or nil if b is empty. -// pop is safe to call concurrently with other pop operations, but NOT -// to call concurrently with push. -func (b *gcSweepBuf) pop() *mspan { - cursor := atomic.Xadd(&b.index, -1) - if int32(cursor) < 0 { - atomic.Xadd(&b.index, +1) - return nil - } - - // There are no concurrent spine or block modifications during - // pop, so we can omit the atomics. - top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries - blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top))) - block := *blockp - s := block.spans[bottom] - // Clear the pointer for block(i). - block.spans[bottom] = nil - return s -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 0807726863..cb586171c4 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -44,15 +44,6 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divid pagesPerArena. pagesPerReclaimerChunk = 512 - - // go115NewMCentralImpl is a feature flag for the new mcentral implementation. - // - // This flag depends on go115NewMarkrootSpans because the new mcentral - // implementation requires that markroot spans no longer rely on mgcsweepbufs. - // The definition of this flag helps ensure that if there's a problem with - // the new markroot spans implementation and it gets turned off, that the new - // mcentral implementation also gets turned off so the runtime isn't broken. - go115NewMCentralImpl = true ) // Main malloc heap. @@ -85,19 +76,6 @@ type mheap struct { // access (since that may free the backing store). allspans []*mspan // all spans out there - // sweepSpans contains two mspan stacks: one of swept in-use - // spans, and one of unswept in-use spans. These two trade - // roles on each GC cycle. Since the sweepgen increases by 2 - // on each cycle, this means the swept spans are in - // sweepSpans[sweepgen/2%2] and the unswept spans are in - // sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the - // unswept stack and pushes spans that are still in-use on the - // swept stack. Likewise, allocating an in-use span pushes it - // on the swept stack. - // - // For !go115NewMCentralImpl. - sweepSpans [2]gcSweepBuf - _ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep @@ -220,7 +198,7 @@ type mheap struct { base, end uintptr } - // _ uint32 // ensure 64-bit alignment of central + _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the mcentrals are @@ -719,8 +697,6 @@ func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) // Initialize the heap. func (h *mheap) init() { lockInit(&h.lock, lockRankMheap) - lockInit(&h.sweepSpans[0].spineLock, lockRankSpine) - lockInit(&h.sweepSpans[1].spineLock, lockRankSpine) lockInit(&h.speciallock, lockRankMheapSpecial) h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) @@ -1294,16 +1270,6 @@ HaveSpan: h.setSpans(s.base(), npages, s) if !manual { - if !go115NewMCentralImpl { - // Add to swept in-use list. - // - // This publishes the span to root marking. - // - // h.sweepgen is guaranteed to only change during STW, - // and preemption is disabled in the page allocator. - h.sweepSpans[h.sweepgen/2%2].push(s) - } - // Mark in-use span in arena page bitmap. // // This publishes the span to the page sweeper, so -- cgit v1.3 From a61a3c378d9ce71d9b97a1b4fb3320b8b3d6a599 Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Thu, 11 Jun 2020 11:17:20 +0800 Subject: runtime: use the CBZ instruction in the assembler Use CBZ to replace the comparison and branch of arm64 and the zero instruction in the assembly file. Change-Id: Id6c03e9af13aadafc3ad3953f82d2ffa29c12926 Reviewed-on: https://go-review.googlesource.com/c/go/+/237497 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot --- src/runtime/rt0_freebsd_arm64.s | 3 +-- src/runtime/rt0_netbsd_arm64.s | 3 +-- src/runtime/rt0_openbsd_arm64.s | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s index 3a348c33e2..a938d98262 100644 --- a/src/runtime/rt0_freebsd_arm64.s +++ b/src/runtime/rt0_freebsd_arm64.s @@ -45,8 +45,7 @@ TEXT _rt0_arm64_freebsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_freebsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. diff --git a/src/runtime/rt0_netbsd_arm64.s b/src/runtime/rt0_netbsd_arm64.s index 75ecbe5176..2f3b5a5a87 100644 --- a/src/runtime/rt0_netbsd_arm64.s +++ b/src/runtime/rt0_netbsd_arm64.s @@ -44,8 +44,7 @@ TEXT _rt0_arm64_netbsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_netbsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. diff --git a/src/runtime/rt0_openbsd_arm64.s b/src/runtime/rt0_openbsd_arm64.s index 12408f2eec..722fab6129 100644 --- a/src/runtime/rt0_openbsd_arm64.s +++ b/src/runtime/rt0_openbsd_arm64.s @@ -50,8 +50,7 @@ TEXT _rt0_arm64_openbsd_lib(SB),NOSPLIT,$184 // Create a new thread to do the runtime initialization and return. MOVD _cgo_sys_thread_create(SB), R4 - CMP $0, R4 - BEQ nocgo + CBZ R4, nocgo MOVD $_rt0_arm64_openbsd_lib_go(SB), R0 MOVD $0, R1 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. -- cgit v1.3 From ba97be4b58241bebbc4ff70574bd82152ab19ffe Mon Sep 17 00:00:00 2001 From: liu-xuewen Date: Mon, 13 Jul 2020 09:15:38 +0000 Subject: runtime: remove tracebackinit and unused skipPC CL [152537](https://go-review.googlesource.com/c/go/+/152537/) changed the way inlined frames are represented in tracebacks to no longer use skipPC Change-Id: I42386fdcc5cf72f3c122e789b6af9cbd0c6bed4b GitHub-Last-Rev: 79c26dcd532907eda4ffc30951845c1c01243501 GitHub-Pull-Request: golang/go#39829 Reviewed-on: https://go-review.googlesource.com/c/go/+/239701 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/asm.s | 21 --------------------- src/runtime/proc.go | 1 - src/runtime/traceback.go | 13 ------------- 3 files changed, 35 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm.s b/src/runtime/asm.s index 95a3424de2..27d8df9e06 100644 --- a/src/runtime/asm.s +++ b/src/runtime/asm.s @@ -11,24 +11,3 @@ DATA runtime·no_pointers_stackmap+0x00(SB)/4, $2 DATA runtime·no_pointers_stackmap+0x04(SB)/4, $0 GLOBL runtime·no_pointers_stackmap(SB),RODATA, $8 - -// NaCl requires that these skips be verifiable machine code. -#ifdef GOARCH_amd64 -#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90 -#endif -#ifdef GOARCH_386 -#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90 -#endif -#ifdef GOARCH_wasm -#define SKIP4 UNDEF; UNDEF; UNDEF; UNDEF -#endif -#ifndef SKIP4 -#define SKIP4 WORD $0 -#endif - -#define SKIP16 SKIP4; SKIP4; SKIP4; SKIP4 -#define SKIP64 SKIP16; SKIP16; SKIP16; SKIP16 - -// This function must be sizeofSkipFunction bytes. -TEXT runtime·skipPleaseUseCallersFrames(SB),NOSPLIT,$0-0 - SKIP64; SKIP64; SKIP64; SKIP64 diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ed7e2128ae..9a358cd529 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -558,7 +558,6 @@ func schedinit() { sched.maxmcount = 10000 - tracebackinit() moduledataverify() stackinit() mallocinit() diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 96e552524e..7850eceafa 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -36,16 +36,6 @@ import ( const usesLR = sys.MinFrameSize > 0 -var skipPC uintptr - -func tracebackinit() { - // Go variable initialization happens late during runtime startup. - // Instead of initializing the variables above in the declarations, - // schedinit calls this function so that the variables are - // initialized and available earlier in the startup sequence. - skipPC = funcPC(skipPleaseUseCallersFrames) -} - // Traceback over the deferred function calls. // Report them like calls that have been invoked but not started executing yet. func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer) { @@ -83,9 +73,6 @@ func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v uns const sizeofSkipFunction = 256 -// This function is defined in asm.s to be sizeofSkipFunction bytes long. -func skipPleaseUseCallersFrames() - // Generic traceback. Handles runtime stack prints (pcbuf == nil), // the runtime.Callers function (pcbuf != nil), as well as the garbage // collector (callback != nil). A little clunky to merge these, but avoids -- cgit v1.3 From 88c094c96a164aef2134e548d495c4bc14dc4687 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 25 Jun 2020 09:10:23 -0700 Subject: runtime: print faulting instruction on a SIGFPE Just like SIGILL, it might be useful to see what the instruction that generated the SIGFPE is. Update #39816 Change-Id: I8b2ff692998f0b770289339537dceab96b09d1ee Reviewed-on: https://go-review.googlesource.com/c/go/+/239999 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/signal_unix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index dd6d79f8ec..6a11c91fb9 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -616,7 +616,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { print("signal arrived during cgo execution\n") gp = _g_.m.lockedg.ptr() } - if sig == _SIGILL { + if sig == _SIGILL || sig == _SIGFPE { // It would be nice to know how long the instruction is. // Unfortunately, that's complicated to do in general (mostly for x86 // and s930x, but other archs have non-standard instruction lengths also). -- cgit v1.3 From 4e5ed83e8d2fbbbc8f6524f40ab3b6733dc57a38 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 3 Jul 2020 11:28:50 -0700 Subject: runtime: use bit-parallel operations to compute heap bit summaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new implementation is much faster in all cases. name old time/op new time/op delta PallocBitsSummarize/Unpacked00-16 142ns ± 1% 7ns ± 2% -94.75% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedFFFFFFFFFFFFFFFF-16 172ns ± 0% 24ns ± 0% -86.02% (p=0.000 n=9+9) PallocBitsSummarize/UnpackedAA-16 145ns ± 0% 32ns ± 0% -78.16% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedAAAAAAAAAAAAAAAA-16 172ns ± 0% 33ns ± 0% -80.95% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000AAAAAAAA-16 162ns ± 1% 60ns ± 0% -62.69% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedAAAAAAAA00000001-16 163ns ± 0% 68ns ± 1% -58.47% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBBBBBBBBBB-16 172ns ± 0% 35ns ± 0% -79.70% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000BBBBBBBB-16 161ns ± 0% 63ns ± 0% -60.61% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBB00000001-16 163ns ± 0% 60ns ± 0% -63.14% (p=0.000 n=9+10) PallocBitsSummarize/UnpackedCCCCCCCCCCCCCCCC-16 172ns ± 0% 39ns ± 0% -77.41% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4444444444444444-16 172ns ± 0% 39ns ± 0% -77.42% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4040404040404040-16 173ns ± 2% 51ns ± 1% -70.55% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked4000400040004000-16 160ns ± 1% 53ns ± 0% -66.78% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked1000404044CCAAFF-16 169ns ± 1% 59ns ± 1% -65.28% (p=0.000 n=10+10) Change-Id: I94daa645b76a9cf9c93edeb2058d7132216fcb72 Reviewed-on: https://go-review.googlesource.com/c/go/+/240900 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek --- src/runtime/mpallocbits.go | 147 +++++++++++++++++++++++----------------- src/runtime/mpallocbits_test.go | 36 ++++++---- 2 files changed, 108 insertions(+), 75 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go index a8011341bc..ff79bfbc1a 100644 --- a/src/runtime/mpallocbits.go +++ b/src/runtime/mpallocbits.go @@ -120,78 +120,99 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) { // sake of documentation, 0s are free pages and 1s are allocated pages. type pallocBits pageBits -// consec8tab is a table containing the number of consecutive -// zero bits for any uint8 value. -// -// The table is generated by calling consec8(i) for each -// possible uint8 value, which is defined as: -// -// // consec8 counts the maximum number of consecutive 0 bits -// // in a uint8. -// func consec8(n uint8) int { -// n = ^n -// i := 0 -// for n != 0 { -// n &= (n << 1) -// i++ -// } -// return i -// } -var consec8tab = [256]uint{ - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0, -} - // summarize returns a packed summary of the bitmap in pallocBits. func (b *pallocBits) summarize() pallocSum { - // TODO(mknyszek): There may be something more clever to be done - // here to make the summarize operation more efficient. For example, - // we can compute start and end with 64-bit wide operations easily, - // but max is a bit more complex. Perhaps there exists some way to - // leverage the 64-bit start and end to our advantage? - var start, max, end uint + var start, max, cur uint + const notSetYet = ^uint(0) // sentinel for start value + start = notSetYet for i := 0; i < len(b); i++ { - a := b[i] - for j := 0; j < 64; j += 8 { - k := uint8(a >> j) - - // Compute start. - si := uint(sys.TrailingZeros8(k)) - if start == uint(i*64+j) { - start += si - } + x := b[i] + if x == 0 { + cur += 64 + continue + } + t := uint(sys.TrailingZeros64(x)) + l := uint(sys.LeadingZeros64(x)) - // Compute max. - if end+si > max { - max = end + si - } - if mi := consec8tab[k]; mi > max { - max = mi + // Finish any region spanning the uint64s + cur += t + if start == notSetYet { + start = cur + } + if cur > max { + max = cur + } + // Final region that might span to next uint64 + cur = l + } + if start == notSetYet { + // Made it all the way through without finding a single 1 bit. + const n = uint(64 * len(b)) + return packPallocSum(n, n, n) + } + if cur > max { + max = cur + } + if max >= 64-2 { + // There is no way an internal run of zeros could beat max. + return packPallocSum(start, max, cur) + } + // Now look inside each uint64 for runs of zeros. + // All uint64s must be nonzero, or we would have aborted above. +outer: + for i := 0; i < len(b); i++ { + x := b[i] + + // Look inside this uint64. We have a pattern like + // 000000 1xxxxx1 000000 + // We need to look inside the 1xxxxx1 for any contiguous + // region of zeros. + + // We already know the trailing zeros are no larger than max. Remove them. + x >>= sys.TrailingZeros64(x) & 63 + if x&(x+1) == 0 { // no more zeros (except at the top). + continue + } + + // Strategy: shrink all runs of zeros by max. If any runs of zero + // remain, then we've identified a larger maxiumum zero run. + p := max // number of zeros we still need to shrink by. + k := uint(1) // current minimum length of runs of ones in x. + for { + // Shrink all runs of zeros by p places (except the top zeros). + for p > 0 { + if p <= k { + // Shift p ones down into the top of each run of zeros. + x |= x >> (p & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + break + } + // Shift k ones down into the top of each run of zeros. + x |= x >> (k & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + p -= k + // We've just doubled the minimum length of 1-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - // Compute end. - if k == 0 { - end += 8 - } else { - end = uint(sys.LeadingZeros8(k)) + // The length of the lowest-order zero run is an increment to our maximum. + j := uint(sys.TrailingZeros64(^x)) // count contiguous trailing ones + x >>= j & 63 // remove trailing ones + j = uint(sys.TrailingZeros64(x)) // count contiguous trailing zeros + x >>= j & 63 // remove zeros + max += j // we have a new maximum! + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer } + p = j // remove j more zeros from each zero run. } } - return packPallocSum(start, max, end) + return packPallocSum(start, max, cur) } // find searches for npages contiguous free pages in pallocBits and returns diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 71a29f3b3a..42268a1698 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) { // Ensures two packed summaries are identical, and reports a detailed description // of the difference if they're not. -func checkPallocSum(t *testing.T, got, want PallocSum) { +func checkPallocSum(t testing.TB, got, want PallocSum) { if got.Start() != want.Start() { t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start()) } @@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) { // Benchmarks how quickly we can summarize a PallocBits. func BenchmarkPallocBitsSummarize(b *testing.B) { - buf0 := new(PallocBits) - buf1 := new(PallocBits) - for i := 0; i < len(buf1); i++ { - buf1[i] = ^uint64(0) - } - bufa := new(PallocBits) - for i := 0; i < len(bufa); i++ { - bufa[i] = 0xaa - } - for _, buf := range []*PallocBits{buf0, buf1, bufa} { - b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) { + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + 0x1000404044ccaaff, + } + for _, p := range patterns { + buf := new(PallocBits) + for i := 0; i < len(buf); i++ { + buf[i] = p + } + b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) { + checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf)) for i := 0; i < b.N; i++ { buf.Summarize() } -- cgit v1.3 From 8b8f926fc3f0e8f002d0a8e97aab9500e4db83a7 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 6 Jul 2020 20:46:31 -0700 Subject: runtime: bit parallel implementation of findBitRange64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a bit-parallel implementation of findBitRange64. It uses a repeated shift-'N-and technique to erase all the free marks that are too small for the allocation. Also some small improvements to find1. name old time/op new time/op delta FindBitRange64/Pattern00Size2-16 4.19ns ± 0% 2.26ns ± 0% -46.04% (p=0.000 n=10+8) FindBitRange64/Pattern00Size8-16 4.19ns ± 0% 2.12ns ± 0% -49.35% (p=0.000 n=9+10) FindBitRange64/Pattern00Size32-16 4.20ns ± 0% 2.12ns ± 0% -49.49% (p=0.000 n=10+8) FindBitRange64/PatternFFFFFFFFFFFFFFFFSize2-16 2.13ns ± 0% 2.27ns ± 0% +6.28% (p=0.000 n=10+10) FindBitRange64/PatternFFFFFFFFFFFFFFFFSize8-16 2.13ns ± 0% 4.46ns ± 0% +109.39% (p=0.000 n=10+9) FindBitRange64/PatternFFFFFFFFFFFFFFFFSize32-16 2.13ns ± 1% 5.58ns ± 0% +162.37% (p=0.000 n=10+9) FindBitRange64/PatternAASize2-16 22.2ns ± 0% 2.3ns ± 0% -89.82% (p=0.000 n=9+8) FindBitRange64/PatternAASize8-16 22.2ns ± 0% 2.1ns ± 1% -90.41% (p=0.000 n=9+10) FindBitRange64/PatternAASize32-16 22.2ns ± 0% 2.1ns ± 1% -90.43% (p=0.000 n=10+10) FindBitRange64/PatternAAAAAAAAAAAAAAAASize2-16 156ns ± 1% 2ns ± 0% -98.54% (p=0.000 n=10+10) FindBitRange64/PatternAAAAAAAAAAAAAAAASize8-16 155ns ± 1% 2ns ± 0% -98.63% (p=0.000 n=10+8) FindBitRange64/PatternAAAAAAAAAAAAAAAASize32-16 155ns ± 0% 2ns ± 1% -98.63% (p=0.000 n=8+10) FindBitRange64/Pattern80000000AAAAAAAASize2-16 81.2ns ± 0% 2.3ns ± 1% -97.21% (p=0.000 n=10+10) FindBitRange64/Pattern80000000AAAAAAAASize8-16 81.1ns ± 0% 2.1ns ± 0% -97.39% (p=0.000 n=10+9) FindBitRange64/Pattern80000000AAAAAAAASize32-16 81.1ns ± 0% 2.1ns ± 0% -97.38% (p=0.000 n=10+10) FindBitRange64/PatternAAAAAAAA00000001Size2-16 76.8ns ± 1% 2.3ns ± 0% -97.05% (p=0.000 n=10+10) FindBitRange64/PatternAAAAAAAA00000001Size8-16 76.6ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=8+10) FindBitRange64/PatternAAAAAAAA00000001Size32-16 76.7ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=9+9) FindBitRange64/PatternBBBBBBBBBBBBBBBBSize2-16 2.13ns ± 0% 2.27ns ± 0% +6.57% (p=0.000 n=8+8) FindBitRange64/PatternBBBBBBBBBBBBBBBBSize8-16 76.7ns ± 0% 2.9ns ± 0% -96.20% (p=0.000 n=9+10) FindBitRange64/PatternBBBBBBBBBBBBBBBBSize32-16 76.7ns ± 0% 2.9ns ± 0% -96.20% (p=0.000 n=10+10) FindBitRange64/Pattern80000000BBBBBBBBSize2-16 2.12ns ± 0% 2.27ns ± 1% +6.74% (p=0.000 n=10+10) FindBitRange64/Pattern80000000BBBBBBBBSize8-16 44.8ns ± 0% 2.9ns ± 0% -93.49% (p=0.000 n=9+10) FindBitRange64/Pattern80000000BBBBBBBBSize32-16 44.9ns ± 0% 2.9ns ± 0% -93.49% (p=0.000 n=10+8) FindBitRange64/PatternBBBBBBBB00000001Size2-16 4.20ns ± 1% 2.27ns ± 1% -46.02% (p=0.000 n=10+10) FindBitRange64/PatternBBBBBBBB00000001Size8-16 44.9ns ± 0% 2.9ns ± 1% -93.51% (p=0.000 n=10+9) FindBitRange64/PatternBBBBBBBB00000001Size32-16 44.9ns ± 0% 2.9ns ± 0% -93.51% (p=0.000 n=10+9) FindBitRange64/PatternCCCCCCCCCCCCCCCCSize2-16 4.19ns ± 0% 2.26ns ± 0% -46.10% (p=0.000 n=10+10) FindBitRange64/PatternCCCCCCCCCCCCCCCCSize8-16 76.5ns ± 0% 2.9ns ± 0% -96.19% (p=0.000 n=8+7) FindBitRange64/PatternCCCCCCCCCCCCCCCCSize32-16 76.5ns ± 0% 2.9ns ± 0% -96.19% (p=0.000 n=10+8) FindBitRange64/Pattern4444444444444444Size2-16 76.4ns ± 0% 2.3ns ± 0% -97.04% (p=0.000 n=8+10) FindBitRange64/Pattern4444444444444444Size8-16 76.5ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=9+10) FindBitRange64/Pattern4444444444444444Size32-16 76.5ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=8+10) FindBitRange64/Pattern4040404040404040Size2-16 40.3ns ± 0% 2.3ns ± 0% -94.38% (p=0.000 n=7+10) FindBitRange64/Pattern4040404040404040Size8-16 40.2ns ± 0% 2.1ns ± 0% -94.75% (p=0.000 n=10+10) FindBitRange64/Pattern4040404040404040Size32-16 40.2ns ± 0% 2.1ns ± 0% -94.76% (p=0.000 n=10+6) FindBitRange64/Pattern4000400040004000Size2-16 22.2ns ± 0% 2.2ns ± 0% -89.86% (p=0.001 n=8+9) FindBitRange64/Pattern4000400040004000Size8-16 22.2ns ± 0% 2.1ns ± 0% -90.52% (p=0.000 n=8+10) FindBitRange64/Pattern4000400040004000Size32-16 22.2ns ± 1% 2.1ns ± 0% -90.50% (p=0.000 n=10+10) The cases that slow down aren't really that slow, and those inputs never actually occur (there's a short circuit before the call to findBitRange64 for that case). Change-Id: I50fae62915098032d8ce7fa57ef29eee9deb01ba Reviewed-on: https://go-review.googlesource.com/c/go/+/241279 Reviewed-by: Michael Knyszek --- src/runtime/mpallocbits.go | 41 ++++++++++++++++++++++++++++++----------- src/runtime/mpallocbits_test.go | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 13 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go index ff79bfbc1a..ff112300c3 100644 --- a/src/runtime/mpallocbits.go +++ b/src/runtime/mpallocbits.go @@ -218,7 +218,7 @@ outer: // find searches for npages contiguous free pages in pallocBits and returns // the index where that run starts, as well as the index of the first free page // it found in the search. searchIdx represents the first known free page and -// where to begin the search from. +// where to begin the next search from. // // If find fails to find any free space, it returns an index of ^uint(0) and // the new searchIdx should be ignored. @@ -239,9 +239,10 @@ func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) { // // See find for an explanation of the searchIdx parameter. func (b *pallocBits) find1(searchIdx uint) uint { + _ = b[0] // lift nil check out of loop for i := searchIdx / 64; i < uint(len(b)); i++ { x := b[i] - if x == ^uint64(0) { + if ^x == 0 { continue } return i*64 + uint(sys.TrailingZeros64(^x)) @@ -263,18 +264,18 @@ func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) { end, newSearchIdx := uint(0), ^uint(0) for i := searchIdx / 64; i < uint(len(b)); i++ { bi := b[i] - if bi == ^uint64(0) { + if ^bi == 0 { end = 0 continue } // First see if we can pack our allocation in the trailing // zeros plus the end of the last 64 bits. - start := uint(sys.TrailingZeros64(bi)) if newSearchIdx == ^uint(0) { // The new searchIdx is going to be at these 64 bits after any // 1s we file, so count trailing 1s. newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi)) } + start := uint(sys.TrailingZeros64(bi)) if end+start >= uint(npages) { return i*64 - end, newSearchIdx } @@ -369,15 +370,33 @@ func (b *pallocBits) pages64(i uint) uint64 { // findBitRange64 returns the bit index of the first set of // n consecutive 1 bits. If no consecutive set of 1 bits of // size n may be found in c, then it returns an integer >= 64. +// n must be > 0. func findBitRange64(c uint64, n uint) uint { - i := uint(0) - cont := uint(sys.TrailingZeros64(^c)) - for cont < n && i < 64 { - i += cont - i += uint(sys.TrailingZeros64(c >> i)) - cont = uint(sys.TrailingZeros64(^(c >> i))) + // This implementation is based on shrinking the length of + // runs of contiguous 1 bits. We remove the top n-1 1 bits + // from each run of 1s, then look for the first remaining 1 bit. + p := n - 1 // number of 1s we want to remove. + k := uint(1) // current minimum width of runs of 0 in c. + for p > 0 { + if p <= k { + // Shift p 0s down into the top of each run of 1s. + c &= c >> (p & 63) + break + } + // Shift k 0s down into the top of each run of 1s. + c &= c >> (k & 63) + if c == 0 { + return 64 + } + p -= k + // We've just doubled the minimum length of 0-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - return i + // Find first remaining 1. + // Since we shrunk from the top down, the first 1 is in + // its correct original position. + return uint(sys.TrailingZeros64(c)) } // pallocData encapsulates pallocBits and a bitmap for diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 42268a1698..5095e24220 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -504,10 +504,9 @@ func TestFindBitRange64(t *testing.T) { t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result) } } - for i := uint(0); i <= 64; i++ { + for i := uint(1); i <= 64; i++ { check(^uint64(0), i, 0) } - check(0, 0, 0) for i := uint(1); i <= 64; i++ { check(0, i, ^uint(0)) } @@ -520,3 +519,33 @@ func TestFindBitRange64(t *testing.T) { check(0xffff03ff0107ffff, 16, 0) check(0x0fff03ff01079fff, 16, ^uint(0)) } + +func BenchmarkFindBitRange64(b *testing.B) { + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + } + sizes := []uint{ + 2, 8, 32, + } + for _, pattern := range patterns { + for _, size := range sizes { + b.Run(fmt.Sprintf("Pattern%02XSize%d", pattern, size), func(b *testing.B) { + for i := 0; i < b.N; i++ { + FindBitRange64(pattern, size) + } + }) + } + } +} -- cgit v1.3 From 7fbd8c75c6c57e713069a3a405e5cde26cfae090 Mon Sep 17 00:00:00 2001 From: lihaowei Date: Fri, 14 Aug 2020 10:35:46 +0000 Subject: all: fix spelling mistakes Change-Id: I7d512281d8442d306594b57b5deaecd132b5ea9e GitHub-Last-Rev: 251e1d6857516b21fd71f654133f81f23ffec654 GitHub-Pull-Request: golang/go#40793 Reviewed-on: https://go-review.googlesource.com/c/go/+/248441 Reviewed-by: Dave Cheney --- src/bufio/bufio.go | 2 +- src/net/http/client.go | 2 +- src/runtime/mheap.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/runtime') diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go index 7cbd5424ea..6baf9b9e40 100644 --- a/src/bufio/bufio.go +++ b/src/bufio/bufio.go @@ -425,7 +425,7 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) { // of bytes in the combined first two elements, error). // The complete result is equal to // `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a -// length of `totalLen`. The result is strucured in this way to allow callers +// length of `totalLen`. The result is structured in this way to allow callers // to minimize allocations and copies. func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) { var frag []byte diff --git a/src/net/http/client.go b/src/net/http/client.go index 3860d97d8f..6ca0d2e6cf 100644 --- a/src/net/http/client.go +++ b/src/net/http/client.go @@ -321,7 +321,7 @@ func knownRoundTripperImpl(rt RoundTripper, req *Request) bool { return true } // There's a very minor chance of a false positive with this. - // Insted of detecting our golang.org/x/net/http2.Transport, + // Instead of detecting our golang.org/x/net/http2.Transport, // it might detect a Transport type in a different http2 // package. But I know of none, and the only problem would be // some temporarily leaked goroutines if the transport didn't diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index cb586171c4..1a57bcd66e 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -42,7 +42,7 @@ const ( // roughly 100µs. // // Must be a multiple of the pageInUse bitmap element size and - // must also evenly divid pagesPerArena. + // must also evenly divide pagesPerArena. pagesPerReclaimerChunk = 512 ) -- cgit v1.3 From 613388315e29d4e906805e602602500ca1e7e334 Mon Sep 17 00:00:00 2001 From: Cholerae Hu Date: Mon, 11 May 2020 11:18:57 +0800 Subject: runtime: reduce critical path in injectglist Change-Id: Ia3fb30ac9add39c803f11f69d967c6604fdeacf8 Reviewed-on: https://go-review.googlesource.com/c/go/+/233217 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/proc.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 9a358cd529..5e38b3194c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2575,15 +2575,20 @@ func injectglist(glist *gList) { return } - lock(&sched.lock) - npidle := int(sched.npidle) + npidle := int(atomic.Load(&sched.npidle)) + var globq gQueue var n int for n = 0; n < npidle && !q.empty(); n++ { - globrunqput(q.pop()) + g := q.pop() + globq.pushBack(g) + } + if n > 0 { + lock(&sched.lock) + globrunqputbatch(&globq, int32(n)) + unlock(&sched.lock) + startIdle(n) + qsize -= n } - unlock(&sched.lock) - startIdle(n) - qsize -= n if !q.empty() { runqputbatch(pp, &q, qsize) -- cgit v1.3 From 4149493443f09c14d9f0fad7030704ed57149b55 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 17 Aug 2020 16:32:33 +0200 Subject: runtime: move startupRandomData declaration to os_linux.go startupRandomData is only used in sysauxv and getRandomData on linux, thus move it closer to where it is used. Also adjust its godoc comment. Change-Id: Ice51d579ec33436adbfdf247caf4ba00bae865e0 Reviewed-on: https://go-review.googlesource.com/c/go/+/248761 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/os_linux.go | 4 ++++ src/runtime/runtime2.go | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 22931b4d5c..9702920bcf 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -249,6 +249,10 @@ func sysargs(argc int32, argv **byte) { sysauxv(buf[:]) } +// startupRandomData holds random bytes initialized at startup. These come from +// the ELF AT_RANDOM auxiliary vector. +var startupRandomData []byte + func sysauxv(auxv []uintptr) int { var i int for ; auxv[i] != _AT_NULL; i += 2 { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 0bddcaa789..959878400d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -845,10 +845,6 @@ type forcegcstate struct { idle uint32 } -// startup_random_data holds random bytes initialized at startup. These come from -// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go). -var startupRandomData []byte - // extendRandom extends the random numbers in r[:n] to the whole slice r. // Treats n<0 as n==0. func extendRandom(r []byte, n int) { -- cgit v1.3 From b58d29741650c7bf10b17f455666e2727e1cdd2e Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 17 Aug 2020 19:06:19 -0400 Subject: cmd/compile, runtime: mark R12 clobbered for write barrier call on PPC64 When external linking, for large binaries, the external linker may insert a trampoline for the write barrier call, which looks 0000000005a98cc8 <__long_branch_runtime.gcWriteBarrier>: 5a98cc8: 86 01 82 3d addis r12,r2,390 5a98ccc: d8 bd 8c e9 ld r12,-16936(r12) 5a98cd0: a6 03 89 7d mtctr r12 5a98cd4: 20 04 80 4e bctr It clobbers R12 (and CTR, which is never live across a call). As at compile time we don't know whether the binary is big and what link mode will be used, I think we need to mark R12 as clobbered for write barrier call. For extra safety (future-proof) we mark caller-saved register that cannot be used for function arguments, which includes R11, as potentially clobbered as well. Fixes #40851. Change-Id: Iedd901c5072f1127cc59b0a48cfeb4aaec81b519 Reviewed-on: https://go-review.googlesource.com/c/go/+/248917 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements --- src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 4 +-- src/cmd/compile/internal/ssa/opGen.go | 2 +- src/runtime/asm_ppc64x.s | 41 ++++++++++++++-------------- 3 files changed, 24 insertions(+), 23 deletions(-) (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index f8bc6cb20b..0261dc283b 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -645,9 +645,9 @@ func init() { {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier - // It preserves R0 through R15, g, and its arguments R20 and R21, + // It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21, // but may clobber anything else, including R31 (REGTMP). - {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"}, + {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 408c855dbd..df2a27368b 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -26889,7 +26889,7 @@ var opcodeTable = [...]opInfo{ {0, 1048576}, // R20 {1, 2097152}, // R21 }, - clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 + clobbers: 576460746931312640, // R11 R12 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 }, }, { diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 11d2f2f51a..23387a2165 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -916,23 +916,23 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 // - R20 is the destination of the write // - R21 is the value being written at R20. // It clobbers condition codes. -// It does not clobber R0 through R15, +// It does not clobber R0 through R17 (except special registers), // but may clobber any other register, *including* R31. TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112 // The standard prologue clobbers R31. - // We use R16 and R17 as scratch registers. - MOVD g_m(g), R16 - MOVD m_p(R16), R16 - MOVD (p_wbBuf+wbBuf_next)(R16), R17 + // We use R18 and R19 as scratch registers. + MOVD g_m(g), R18 + MOVD m_p(R18), R18 + MOVD (p_wbBuf+wbBuf_next)(R18), R19 // Increment wbBuf.next position. - ADD $16, R17 - MOVD R17, (p_wbBuf+wbBuf_next)(R16) - MOVD (p_wbBuf+wbBuf_end)(R16), R16 - CMP R16, R17 + ADD $16, R19 + MOVD R19, (p_wbBuf+wbBuf_next)(R18) + MOVD (p_wbBuf+wbBuf_end)(R18), R18 + CMP R18, R19 // Record the write. - MOVD R21, -16(R17) // Record value - MOVD (R20), R16 // TODO: This turns bad writes into bad reads. - MOVD R16, -8(R17) // Record *slot + MOVD R21, -16(R19) // Record value + MOVD (R20), R18 // TODO: This turns bad writes into bad reads. + MOVD R18, -8(R19) // Record *slot // Is the buffer full? (flags set in CMP above) BEQ flush ret: @@ -956,11 +956,12 @@ flush: MOVD R8, (FIXED_FRAME+56)(R1) MOVD R9, (FIXED_FRAME+64)(R1) MOVD R10, (FIXED_FRAME+72)(R1) - MOVD R11, (FIXED_FRAME+80)(R1) - MOVD R12, (FIXED_FRAME+88)(R1) + // R11, R12 may be clobbered by external-linker-inserted trampoline // R13 is REGTLS - MOVD R14, (FIXED_FRAME+96)(R1) - MOVD R15, (FIXED_FRAME+104)(R1) + MOVD R14, (FIXED_FRAME+80)(R1) + MOVD R15, (FIXED_FRAME+88)(R1) + MOVD R16, (FIXED_FRAME+96)(R1) + MOVD R17, (FIXED_FRAME+104)(R1) // This takes arguments R20 and R21. CALL runtime·wbBufFlush(SB) @@ -975,10 +976,10 @@ flush: MOVD (FIXED_FRAME+56)(R1), R8 MOVD (FIXED_FRAME+64)(R1), R9 MOVD (FIXED_FRAME+72)(R1), R10 - MOVD (FIXED_FRAME+80)(R1), R11 - MOVD (FIXED_FRAME+88)(R1), R12 - MOVD (FIXED_FRAME+96)(R1), R14 - MOVD (FIXED_FRAME+104)(R1), R15 + MOVD (FIXED_FRAME+80)(R1), R14 + MOVD (FIXED_FRAME+88)(R1), R15 + MOVD (FIXED_FRAME+96)(R1), R16 + MOVD (FIXED_FRAME+104)(R1), R17 JMP ret // Note: these functions use a special calling convention to save generated code space. -- cgit v1.3 From 30a68bfb806b5217932e280f5a5f521237e69077 Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 27 Jul 2020 12:40:18 -0700 Subject: runtime: add "success" field to sudog The current wakeup protocol for channel communications is that the second goroutine sets gp.param to the sudog when a value is successfully communicated over the channel, and to nil when the wakeup is due to closing the channel. Setting nil to indicate channel closure works okay for chansend and chanrecv, because they're only communicating with one channel, so they know it must be the channel that was closed. However, it means selectgo has to re-poll all of the channels to figure out which one was closed. This commit adds a "success" field to sudog, and changes the wakeup protocol to always set gp.param to sg, and to use sg.success to indicate successful communication vs channel closure. While here, this also reorganizes the chansend code slightly so that the sudog is still released to the pool if the send blocks and then is awoken because the channel closed. Updates #40410. Change-Id: I6cd9a20ebf9febe370a15af1b8afe24c5539efc6 Reviewed-on: https://go-review.googlesource.com/c/go/+/245019 Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot Reviewed-by: Cuong Manh Le Reviewed-by: Keith Randall --- src/runtime/chan.go | 25 +++++++++++++++---------- src/runtime/runtime2.go | 6 ++++++ src/runtime/select.go | 19 +++++++------------ 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/chan.go b/src/runtime/chan.go index f6f4ffd02e..0afe5d962b 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -263,18 +263,19 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { } gp.waiting = nil gp.activeStackChans = false - if gp.param == nil { - if c.closed == 0 { - throw("chansend: spurious wakeup") - } - panic(plainError("send on closed channel")) - } + closed := !mysg.success gp.param = nil if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } mysg.c = nil releaseSudog(mysg) + if closed { + if c.closed == 0 { + throw("chansend: spurious wakeup") + } + panic(plainError("send on closed channel")) + } return true } @@ -311,6 +312,7 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) + sg.success = true if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -384,7 +386,8 @@ func closechan(c *hchan) { sg.releasetime = cputicks() } gp := sg.g - gp.param = nil + gp.param = unsafe.Pointer(sg) + sg.success = false if raceenabled { raceacquireg(gp, c.raceaddr()) } @@ -402,7 +405,8 @@ func closechan(c *hchan) { sg.releasetime = cputicks() } gp := sg.g - gp.param = nil + gp.param = unsafe.Pointer(sg) + sg.success = false if raceenabled { raceacquireg(gp, c.raceaddr()) } @@ -575,11 +579,11 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } - closed := gp.param == nil + success := mysg.success gp.param = nil mysg.c = nil releaseSudog(mysg) - return true, !closed + return true, success } // recv processes a receive operation on a full channel c. @@ -632,6 +636,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) + sg.success = true if sg.releasetime != 0 { sg.releasetime = cputicks() } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 959878400d..b7d0739e54 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -366,6 +366,12 @@ type sudog struct { // g.selectDone must be CAS'd to win the wake-up race. isSelect bool + // success indicates whether communication over channel c + // succeeded. It is true if the goroutine was awoken because a + // value was delivered over channel c, and false if awoken + // because c was closed. + success bool + parent *sudog // semaRoot binary tree waitlink *sudog // g.waiting list or semaRoot waittail *sudog // semaRoot diff --git a/src/runtime/select.go b/src/runtime/select.go index a069e3e050..081db7bad4 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -221,12 +221,12 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { nextp **sudog ) -loop: // pass 1 - look for something already waiting var dfli int var dfl *scase var casi int var cas *scase + var caseSuccess bool var recvOK bool for i := 0; i < ncases; i++ { casi = int(pollorder[i]) @@ -331,6 +331,7 @@ loop: // We singly-linked up the SudoGs in lock order. casi = -1 cas = nil + caseSuccess = false sglist = gp.waiting // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { @@ -352,6 +353,7 @@ loop: // sg has already been dequeued by the G that woke us up. casi = int(casei) cas = k + caseSuccess = sglist.success } else { c = k.c if k.kind == caseSend { @@ -367,16 +369,7 @@ loop: } if cas == nil { - // We can wake up with gp.param == nil (so cas == nil) - // when a channel involved in the select has been closed. - // It is easiest to loop and re-run the operation; - // we'll see that it's now closed. - // Maybe some day we can signal the close explicitly, - // but we'd have to distinguish close-on-reader from close-on-writer. - // It's easiest not to duplicate the code and just recheck above. - // We know that something closed, and things never un-close, - // so we won't block again. - goto loop + throw("selectgo: bad wakeup") } c = cas.c @@ -386,7 +379,9 @@ loop: } if cas.kind == caseRecv { - recvOK = true + recvOK = caseSuccess + } else if cas.kind == caseSend && !caseSuccess { + goto sclose } if raceenabled { -- cgit v1.3 From 78a1064d5dd05fc669342df3a6a5e11d49749d85 Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 27 Jul 2020 14:05:05 -0700 Subject: runtime: remove scase.releasetime field selectgo will report at most one block event, so there's no need to keep a releasetime for every select case. It suffices to simply track the releasetime of the case responsible for the wakeup. Updates #40410. Change-Id: I72679cd43dde80d7e6dbab21a78952a4372d1e79 Reviewed-on: https://go-review.googlesource.com/c/go/+/245122 Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/select.go | 1 - src/runtime/select.go | 23 ++++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go index 49cc23cd3d..eb5ff8469b 100644 --- a/src/cmd/compile/internal/gc/select.go +++ b/src/cmd/compile/internal/gc/select.go @@ -386,7 +386,6 @@ func scasetype() *types.Type { namedfield("elem", types.Types[TUNSAFEPTR]), namedfield("kind", types.Types[TUINT16]), namedfield("pc", types.Types[TUINTPTR]), - namedfield("releasetime", types.Types[TINT64]), }) scase.SetNoalg(true) } diff --git a/src/runtime/select.go b/src/runtime/select.go index 081db7bad4..2f8b139155 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -26,11 +26,10 @@ const ( // Known to compiler. // Changes here must also be made in src/cmd/internal/gc/select.go's scasetype. type scase struct { - c *hchan // chan - elem unsafe.Pointer // data element - kind uint16 - pc uintptr // race pc (for race detector / msan) - releasetime int64 + c *hchan // chan + elem unsafe.Pointer // data element + kind uint16 + pc uintptr // race pc (for race detector / msan) } var ( @@ -142,9 +141,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { var t0 int64 if blockprofilerate > 0 { t0 = cputicks() - for i := 0; i < ncases; i++ { - scases[i].releasetime = -1 - } } // The compiler rewrites selects that statically have @@ -227,6 +223,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { var casi int var cas *scase var caseSuccess bool + var caseReleaseTime int64 = -1 var recvOK bool for i := 0; i < ncases; i++ { casi = int(pollorder[i]) @@ -346,14 +343,14 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { if k.kind == caseNil { continue } - if sglist.releasetime > 0 { - k.releasetime = sglist.releasetime - } if sg == sglist { // sg has already been dequeued by the G that woke us up. casi = int(casei) cas = k caseSuccess = sglist.success + if sglist.releasetime > 0 { + caseReleaseTime = sglist.releasetime + } } else { c = k.c if k.kind == caseSend { @@ -483,8 +480,8 @@ send: goto retc retc: - if cas.releasetime > 0 { - blockevent(cas.releasetime-t0, 1) + if caseReleaseTime > 0 { + blockevent(caseReleaseTime-t0, 1) } return casi, recvOK -- cgit v1.3 From 8a984e8e3f2cf4101f448ea9b9d9880b9e83c11e Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 27 Jul 2020 14:47:47 -0700 Subject: runtime: omit nil-channel cases from selectgo's orders Currently, selectgo does an initial pass over the cases array to look for entries with nil channels, so they can be easily recognized and skipped later on. But this still involves actually visiting the cases. This commit changes selectgo to omit cases with nil channels when constructing pollorder, so that they'll be skipped over entirely later on. It also checks for caseDefault up front, which will facilitate changing it to use a "block bool" parameter instead. Updates #40410. Change-Id: Icaebcb8f08df03cc33b6d8087616fb5585f7fedd Reviewed-on: https://go-review.googlesource.com/c/go/+/245123 Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/select.go | 66 +++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 39 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/select.go b/src/runtime/select.go index 2f8b139155..d540dd2e69 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -45,7 +45,7 @@ func sellock(scases []scase, lockorder []uint16) { var c *hchan for _, o := range lockorder { c0 := scases[o].c - if c0 != nil && c0 != c { + if c0 != c { c = c0 lock(&c.lock) } @@ -61,11 +61,8 @@ func selunlock(scases []scase, lockorder []uint16) { // the G that calls select runnable again and schedules it for execution. // When the G runs on another M, it locks all the locks and frees sel. // Now if the first M touches sel, it will access freed memory. - for i := len(scases) - 1; i >= 0; i-- { + for i := len(lockorder) - 1; i >= 0; i-- { c := scases[lockorder[i]].c - if c == nil { - break - } if i > 0 && c == scases[lockorder[i-1]].c { continue // will unlock it on the next iteration } @@ -129,15 +126,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { pollorder := order1[:ncases:ncases] lockorder := order1[ncases:][:ncases:ncases] - // Replace send/receive cases involving nil channels with - // caseNil so logic below can assume non-nil channel. - for i := range scases { - cas := &scases[i] - if cas.c == nil && cas.kind != caseDefault { - *cas = scase{} - } - } - var t0 int64 if blockprofilerate > 0 { t0 = cputicks() @@ -152,15 +140,31 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { // optimizing (and needing to test). // generate permuted order - for i := 1; i < ncases; i++ { - j := fastrandn(uint32(i + 1)) - pollorder[i] = pollorder[j] + dfli := -1 + norder := 0 + for i := range scases { + cas := &scases[i] + + // Omit cases without channels from the poll and lock orders. + if cas.c == nil { + if cas.kind == caseDefault { + dfli = i + } + cas.elem = nil // allow GC + continue + } + + j := fastrandn(uint32(norder + 1)) + pollorder[norder] = pollorder[j] pollorder[j] = uint16(i) + norder++ } + pollorder = pollorder[:norder] + lockorder = lockorder[:norder] // sort the cases by Hchan address to get the locking order. // simple heap sort, to guarantee n log n time and constant stack footprint. - for i := 0; i < ncases; i++ { + for i := range lockorder { j := i // Start with the pollorder to permute cases on the same channel. c := scases[pollorder[i]].c @@ -171,7 +175,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { } lockorder[j] = pollorder[i] } - for i := ncases - 1; i >= 0; i-- { + for i := len(lockorder) - 1; i >= 0; i-- { o := lockorder[i] c := scases[o].c lockorder[i] = lockorder[0] @@ -195,7 +199,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { } if debugSelect { - for i := 0; i+1 < ncases; i++ { + for i := 0; i+1 < len(lockorder); i++ { if scases[lockorder[i]].c.sortkey() > scases[lockorder[i+1]].c.sortkey() { print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n") throw("select: broken sort") @@ -218,22 +222,17 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { ) // pass 1 - look for something already waiting - var dfli int - var dfl *scase var casi int var cas *scase var caseSuccess bool var caseReleaseTime int64 = -1 var recvOK bool - for i := 0; i < ncases; i++ { - casi = int(pollorder[i]) + for _, casei := range pollorder { + casi = int(casei) cas = &scases[casi] c = cas.c switch cas.kind { - case caseNil: - continue - case caseRecv: sg = c.sendq.dequeue() if sg != nil { @@ -260,17 +259,12 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { if c.qcount < c.dataqsiz { goto bufsend } - - case caseDefault: - dfli = casi - dfl = cas } } - if dfl != nil { + if dfli >= 0 { selunlock(scases, lockorder) casi = dfli - cas = dfl goto retc } @@ -283,9 +277,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { for _, casei := range lockorder { casi = int(casei) cas = &scases[casi] - if cas.kind == caseNil { - continue - } c = cas.c sg := acquireSudog() sg.g = gp @@ -340,9 +331,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { for _, casei := range lockorder { k = &scases[casei] - if k.kind == caseNil { - continue - } if sg == sglist { // sg has already been dequeued by the G that woke us up. casi = int(casei) -- cgit v1.3 From d36bc7d78ad226b20056c08fb8bca041e25b3d1d Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 27 Jul 2020 15:20:18 -0700 Subject: runtime: split PCs out of scase Per-case PCs are only needed for race detector builds, so this allows skipping allocating stack space for them for non-race builds. It's possible to arrange the PCs and order arrays consecutively in memory so that we could just reuse the order0 pointer to identify both. However, there's more risk of that silently going wrong, so this commit passes them as separate arguments for now. We can revisit this in the future. Updates #40410. Change-Id: I8468bc25749e559891cb0cb007d1cc4a40fdd0f8 Reviewed-on: https://go-review.googlesource.com/c/go/+/245124 Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/builtin.go | 199 +++++++++++++------------ src/cmd/compile/internal/gc/builtin/runtime.go | 4 +- src/cmd/compile/internal/gc/select.go | 18 ++- src/runtime/select.go | 50 +++++-- 4 files changed, 153 insertions(+), 118 deletions(-) (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go index 2cf2f4687e..eafdb0ebe7 100644 --- a/src/cmd/compile/internal/gc/builtin.go +++ b/src/cmd/compile/internal/gc/builtin.go @@ -126,74 +126,74 @@ var runtimeDecls = [...]struct { {"selectnbsend", funcTag, 94}, {"selectnbrecv", funcTag, 95}, {"selectnbrecv2", funcTag, 97}, - {"selectsetpc", funcTag, 62}, - {"selectgo", funcTag, 98}, + {"selectsetpc", funcTag, 98}, + {"selectgo", funcTag, 99}, {"block", funcTag, 9}, - {"makeslice", funcTag, 99}, - {"makeslice64", funcTag, 100}, - {"makeslicecopy", funcTag, 101}, - {"growslice", funcTag, 103}, - {"memmove", funcTag, 104}, - {"memclrNoHeapPointers", funcTag, 105}, - {"memclrHasPointers", funcTag, 105}, - {"memequal", funcTag, 106}, - {"memequal0", funcTag, 107}, - {"memequal8", funcTag, 107}, - {"memequal16", funcTag, 107}, - {"memequal32", funcTag, 107}, - {"memequal64", funcTag, 107}, - {"memequal128", funcTag, 107}, - {"f32equal", funcTag, 108}, - {"f64equal", funcTag, 108}, - {"c64equal", funcTag, 108}, - {"c128equal", funcTag, 108}, - {"strequal", funcTag, 108}, - {"interequal", funcTag, 108}, - {"nilinterequal", funcTag, 108}, - {"memhash", funcTag, 109}, - {"memhash0", funcTag, 110}, - {"memhash8", funcTag, 110}, - {"memhash16", funcTag, 110}, - {"memhash32", funcTag, 110}, - {"memhash64", funcTag, 110}, - {"memhash128", funcTag, 110}, - {"f32hash", funcTag, 110}, - {"f64hash", funcTag, 110}, - {"c64hash", funcTag, 110}, - {"c128hash", funcTag, 110}, - {"strhash", funcTag, 110}, - {"interhash", funcTag, 110}, - {"nilinterhash", funcTag, 110}, - {"int64div", funcTag, 111}, - {"uint64div", funcTag, 112}, - {"int64mod", funcTag, 111}, - {"uint64mod", funcTag, 112}, - {"float64toint64", funcTag, 113}, - {"float64touint64", funcTag, 114}, - {"float64touint32", funcTag, 115}, - {"int64tofloat64", funcTag, 116}, - {"uint64tofloat64", funcTag, 117}, - {"uint32tofloat64", funcTag, 118}, - {"complex128div", funcTag, 119}, - {"racefuncenter", funcTag, 120}, + {"makeslice", funcTag, 100}, + {"makeslice64", funcTag, 101}, + {"makeslicecopy", funcTag, 102}, + {"growslice", funcTag, 104}, + {"memmove", funcTag, 105}, + {"memclrNoHeapPointers", funcTag, 106}, + {"memclrHasPointers", funcTag, 106}, + {"memequal", funcTag, 107}, + {"memequal0", funcTag, 108}, + {"memequal8", funcTag, 108}, + {"memequal16", funcTag, 108}, + {"memequal32", funcTag, 108}, + {"memequal64", funcTag, 108}, + {"memequal128", funcTag, 108}, + {"f32equal", funcTag, 109}, + {"f64equal", funcTag, 109}, + {"c64equal", funcTag, 109}, + {"c128equal", funcTag, 109}, + {"strequal", funcTag, 109}, + {"interequal", funcTag, 109}, + {"nilinterequal", funcTag, 109}, + {"memhash", funcTag, 110}, + {"memhash0", funcTag, 111}, + {"memhash8", funcTag, 111}, + {"memhash16", funcTag, 111}, + {"memhash32", funcTag, 111}, + {"memhash64", funcTag, 111}, + {"memhash128", funcTag, 111}, + {"f32hash", funcTag, 111}, + {"f64hash", funcTag, 111}, + {"c64hash", funcTag, 111}, + {"c128hash", funcTag, 111}, + {"strhash", funcTag, 111}, + {"interhash", funcTag, 111}, + {"nilinterhash", funcTag, 111}, + {"int64div", funcTag, 112}, + {"uint64div", funcTag, 113}, + {"int64mod", funcTag, 112}, + {"uint64mod", funcTag, 113}, + {"float64toint64", funcTag, 114}, + {"float64touint64", funcTag, 115}, + {"float64touint32", funcTag, 116}, + {"int64tofloat64", funcTag, 117}, + {"uint64tofloat64", funcTag, 118}, + {"uint32tofloat64", funcTag, 119}, + {"complex128div", funcTag, 120}, + {"racefuncenter", funcTag, 121}, {"racefuncenterfp", funcTag, 9}, {"racefuncexit", funcTag, 9}, - {"raceread", funcTag, 120}, - {"racewrite", funcTag, 120}, - {"racereadrange", funcTag, 121}, - {"racewriterange", funcTag, 121}, - {"msanread", funcTag, 121}, - {"msanwrite", funcTag, 121}, - {"checkptrAlignment", funcTag, 122}, - {"checkptrArithmetic", funcTag, 124}, - {"libfuzzerTraceCmp1", funcTag, 126}, - {"libfuzzerTraceCmp2", funcTag, 128}, - {"libfuzzerTraceCmp4", funcTag, 129}, - {"libfuzzerTraceCmp8", funcTag, 130}, - {"libfuzzerTraceConstCmp1", funcTag, 126}, - {"libfuzzerTraceConstCmp2", funcTag, 128}, - {"libfuzzerTraceConstCmp4", funcTag, 129}, - {"libfuzzerTraceConstCmp8", funcTag, 130}, + {"raceread", funcTag, 121}, + {"racewrite", funcTag, 121}, + {"racereadrange", funcTag, 122}, + {"racewriterange", funcTag, 122}, + {"msanread", funcTag, 122}, + {"msanwrite", funcTag, 122}, + {"checkptrAlignment", funcTag, 123}, + {"checkptrArithmetic", funcTag, 125}, + {"libfuzzerTraceCmp1", funcTag, 127}, + {"libfuzzerTraceCmp2", funcTag, 129}, + {"libfuzzerTraceCmp4", funcTag, 130}, + {"libfuzzerTraceCmp8", funcTag, 131}, + {"libfuzzerTraceConstCmp1", funcTag, 127}, + {"libfuzzerTraceConstCmp2", funcTag, 129}, + {"libfuzzerTraceConstCmp4", funcTag, 130}, + {"libfuzzerTraceConstCmp8", funcTag, 131}, {"x86HasPOPCNT", varTag, 6}, {"x86HasSSE41", varTag, 6}, {"x86HasFMA", varTag, 6}, @@ -202,7 +202,7 @@ var runtimeDecls = [...]struct { } func runtimeTypes() []*types.Type { - var typs [131]*types.Type + var typs [132]*types.Type typs[0] = types.Bytetype typs[1] = types.NewPtr(typs[0]) typs[2] = types.Types[TANY] @@ -301,38 +301,39 @@ func runtimeTypes() []*types.Type { typs[95] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[84])}, []*Node{anonfield(typs[6])}) typs[96] = types.NewPtr(typs[6]) typs[97] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[96]), anonfield(typs[84])}, []*Node{anonfield(typs[6])}) - typs[98] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])}) - typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])}) - typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])}) - typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])}) - typs[102] = types.NewSlice(typs[2]) - typs[103] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[102]), anonfield(typs[15])}, []*Node{anonfield(typs[102])}) - typs[104] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil) - typs[105] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil) - typs[106] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])}) - typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])}) - typs[108] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])}) - typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])}) - typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])}) - typs[111] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])}) - typs[112] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])}) - typs[113] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])}) - typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])}) - typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[65])}) - typs[116] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])}) - typs[117] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])}) - typs[118] = functype(nil, []*Node{anonfield(typs[65])}, []*Node{anonfield(typs[20])}) - typs[119] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])}) - typs[120] = functype(nil, []*Node{anonfield(typs[5])}, nil) - typs[121] = functype(nil, []*Node{anonfield(typs[5]), anonfield(typs[5])}, nil) - typs[122] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[1]), anonfield(typs[5])}, nil) - typs[123] = types.NewSlice(typs[7]) - typs[124] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[123])}, nil) - typs[125] = types.Types[TUINT8] - typs[126] = functype(nil, []*Node{anonfield(typs[125]), anonfield(typs[125])}, nil) - typs[127] = types.Types[TUINT16] - typs[128] = functype(nil, []*Node{anonfield(typs[127]), anonfield(typs[127])}, nil) - typs[129] = functype(nil, []*Node{anonfield(typs[65]), anonfield(typs[65])}, nil) - typs[130] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, nil) + typs[98] = functype(nil, []*Node{anonfield(typs[63])}, nil) + typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])}) + typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])}) + typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])}) + typs[102] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])}) + typs[103] = types.NewSlice(typs[2]) + typs[104] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[103]), anonfield(typs[15])}, []*Node{anonfield(typs[103])}) + typs[105] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil) + typs[106] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil) + typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])}) + typs[108] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])}) + typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])}) + typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])}) + typs[111] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])}) + typs[112] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])}) + typs[113] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])}) + typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])}) + typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])}) + typs[116] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[65])}) + typs[117] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])}) + typs[118] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])}) + typs[119] = functype(nil, []*Node{anonfield(typs[65])}, []*Node{anonfield(typs[20])}) + typs[120] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])}) + typs[121] = functype(nil, []*Node{anonfield(typs[5])}, nil) + typs[122] = functype(nil, []*Node{anonfield(typs[5]), anonfield(typs[5])}, nil) + typs[123] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[1]), anonfield(typs[5])}, nil) + typs[124] = types.NewSlice(typs[7]) + typs[125] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[124])}, nil) + typs[126] = types.Types[TUINT8] + typs[127] = functype(nil, []*Node{anonfield(typs[126]), anonfield(typs[126])}, nil) + typs[128] = types.Types[TUINT16] + typs[129] = functype(nil, []*Node{anonfield(typs[128]), anonfield(typs[128])}, nil) + typs[130] = functype(nil, []*Node{anonfield(typs[65]), anonfield(typs[65])}, nil) + typs[131] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, nil) return typs[:] } diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go index 00448272c5..25f86efdd6 100644 --- a/src/cmd/compile/internal/gc/builtin/runtime.go +++ b/src/cmd/compile/internal/gc/builtin/runtime.go @@ -169,8 +169,8 @@ func selectnbsend(hchan chan<- any, elem *any) bool func selectnbrecv(elem *any, hchan <-chan any) bool func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool -func selectsetpc(cas *byte) -func selectgo(cas0 *byte, order0 *byte, ncases int) (int, bool) +func selectsetpc(pc *uintptr) +func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, ncases int) (int, bool) func block() func makeslice(typ *byte, len int, cap int) unsafe.Pointer diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go index eb5ff8469b..8eb31eb5c1 100644 --- a/src/cmd/compile/internal/gc/select.go +++ b/src/cmd/compile/internal/gc/select.go @@ -271,6 +271,14 @@ func walkselectcases(cases *Nodes) []*Node { r = typecheck(r, ctxStmt) init = append(init, r) + var pc0, pcs *Node + if flag_race { + pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(n))) + pc0 = typecheck(nod(OADDR, nod(OINDEX, pcs, nodintconst(0)), nil), ctxExpr) + } else { + pc0 = nodnil() + } + // register cases for i, cas := range cases.Slice() { setlineno(cas) @@ -324,8 +332,8 @@ func walkselectcases(cases *Nodes) []*Node { // TODO(mdempsky): There should be a cleaner way to // handle this. - if instrumenting { - r = mkcall("selectsetpc", nil, nil, bytePtrToIndex(selv, int64(i))) + if flag_race { + r = mkcall("selectsetpc", nil, nil, nod(OADDR, nod(OINDEX, pcs, nodintconst(int64(i))), nil)) init = append(init, r) } } @@ -337,13 +345,16 @@ func walkselectcases(cases *Nodes) []*Node { r = nod(OAS2, nil, nil) r.List.Set2(chosen, recvOK) fn := syslook("selectgo") - r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), nodintconst(int64(n)))) + r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(n)))) r = typecheck(r, ctxStmt) init = append(init, r) // selv and order are no longer alive after selectgo. init = append(init, nod(OVARKILL, selv, nil)) init = append(init, nod(OVARKILL, order, nil)) + if flag_race { + init = append(init, nod(OVARKILL, pcs, nil)) + } // dispatch cases for i, cas := range cases.Slice() { @@ -385,7 +396,6 @@ func scasetype() *types.Type { namedfield("c", types.Types[TUNSAFEPTR]), namedfield("elem", types.Types[TUNSAFEPTR]), namedfield("kind", types.Types[TUINT16]), - namedfield("pc", types.Types[TUINTPTR]), }) scase.SetNoalg(true) } diff --git a/src/runtime/select.go b/src/runtime/select.go index d540dd2e69..d7c7d9f26f 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -29,7 +29,6 @@ type scase struct { c *hchan // chan elem unsafe.Pointer // data element kind uint16 - pc uintptr // race pc (for race detector / msan) } var ( @@ -37,8 +36,8 @@ var ( chanrecvpc = funcPC(chanrecv) ) -func selectsetpc(cas *scase) { - cas.pc = getcallerpc() +func selectsetpc(pc *uintptr) { + *pc = getcallerpc() } func sellock(scases []scase, lockorder []uint16) { @@ -108,11 +107,15 @@ func block() { // Both reside on the goroutine's stack (regardless of any escaping in // selectgo). // +// For race detector builds, pc0 points to an array of type +// [ncases]uintptr (also on the stack); for other builds, it's set to +// nil. +// // selectgo returns the index of the chosen scase, which matches the // ordinal position of its respective select{recv,send,default} call. // Also, if the chosen scase was a receive operation, it reports whether // a value was received. -func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { +func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) { if debugSelect { print("select: cas0=", cas0, "\n") } @@ -126,6 +129,21 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { pollorder := order1[:ncases:ncases] lockorder := order1[ncases:][:ncases:ncases] + // Even when raceenabled is true, there might be select + // statements in packages compiled without -race (e.g., + // ensureSigM in runtime/signal_unix.go). + var pcs []uintptr + if raceenabled && pc0 != nil { + pc1 := (*[1 << 16]uintptr)(unsafe.Pointer(pc0)) + pcs = pc1[:ncases:ncases] + } + casePC := func(casi int) uintptr { + if pcs == nil { + return 0 + } + return pcs[casi] + } + var t0 int64 if blockprofilerate > 0 { t0 = cputicks() @@ -247,7 +265,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { case caseSend: if raceenabled { - racereadpc(c.raceaddr(), cas.pc, chansendpc) + racereadpc(c.raceaddr(), casePC(casi), chansendpc) } if c.closed != 0 { goto sclose @@ -371,9 +389,9 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { if raceenabled { if cas.kind == caseRecv && cas.elem != nil { - raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc) + raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) } else if cas.kind == caseSend { - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) } } if msanenabled { @@ -391,7 +409,7 @@ bufrecv: // can receive from buffer if raceenabled { if cas.elem != nil { - raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc) + raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) } raceacquire(chanbuf(c, c.recvx)) racerelease(chanbuf(c, c.recvx)) @@ -418,7 +436,7 @@ bufsend: if raceenabled { raceacquire(chanbuf(c, c.sendx)) racerelease(chanbuf(c, c.sendx)) - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) } if msanenabled { msanread(cas.elem, c.elemtype.size) @@ -456,7 +474,7 @@ rclose: send: // can send to a sleeping receiver (sg) if raceenabled { - raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc) + raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) } if msanenabled { msanread(cas.elem, c.elemtype.size) @@ -519,12 +537,18 @@ func reflect_rselect(cases []runtimeSelect) (int, bool) { case selectRecv: sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val} } - if raceenabled || msanenabled { - selectsetpc(&sel[i]) + } + + var pc0 *uintptr + if raceenabled { + pcs := make([]uintptr, len(cases)) + for i := range pcs { + selectsetpc(&pcs[i]) } + pc0 = &pcs[0] } - return selectgo(&sel[0], &order[0], len(cases)) + return selectgo(&sel[0], &order[0], pc0, len(cases)) } func (q *waitq) dequeueSudoG(sgp *sudog) { -- cgit v1.3 From fe23ba4a145ce8465d16ea2a92b9a7e96e15c28e Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Mon, 27 Jul 2020 16:19:15 -0700 Subject: runtime: eliminate scase.kind field Currently, we include a "kind" field on scase to distinguish the three kinds of cases in a select statement: sends, receives, and defaults. This commit removes by kind field by instead arranging for the compiler to always place sends before receives, and to provide their counts separately. It also passes an explicit "block bool" parameter to avoid needing to include a default case in the array. It's safe to shuffle cases like this because the runtime will randomize the order they're polled in anyway. Fixes #40410. Change-Id: Iaeaed4cf7bddd576d78f2c863bd91a03a5c82df2 Reviewed-on: https://go-review.googlesource.com/c/go/+/245125 Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/builtin.go | 2 +- src/cmd/compile/internal/gc/builtin/runtime.go | 2 +- src/cmd/compile/internal/gc/select.go | 108 +++++++++++++------------ src/reflect/all_test.go | 8 ++ src/runtime/select.go | 104 +++++++++++++----------- 5 files changed, 123 insertions(+), 101 deletions(-) (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go index eafdb0ebe7..861ffaaa5b 100644 --- a/src/cmd/compile/internal/gc/builtin.go +++ b/src/cmd/compile/internal/gc/builtin.go @@ -302,7 +302,7 @@ func runtimeTypes() []*types.Type { typs[96] = types.NewPtr(typs[6]) typs[97] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[96]), anonfield(typs[84])}, []*Node{anonfield(typs[6])}) typs[98] = functype(nil, []*Node{anonfield(typs[63])}, nil) - typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])}) + typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[6])}, []*Node{anonfield(typs[15]), anonfield(typs[6])}) typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])}) typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])}) typs[102] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])}) diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go index 25f86efdd6..635da80f7c 100644 --- a/src/cmd/compile/internal/gc/builtin/runtime.go +++ b/src/cmd/compile/internal/gc/builtin/runtime.go @@ -170,7 +170,7 @@ func selectnbrecv(elem *any, hchan <-chan any) bool func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool func selectsetpc(pc *uintptr) -func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, ncases int) (int, bool) +func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, nsends int, nrecvs int, block bool) (int, bool) func block() func makeslice(typ *byte, len int, cap int) unsafe.Pointer diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go index 8eb31eb5c1..bae7ed30e2 100644 --- a/src/cmd/compile/internal/gc/select.go +++ b/src/cmd/compile/internal/gc/select.go @@ -106,18 +106,16 @@ func walkselect(sel *Node) { } func walkselectcases(cases *Nodes) []*Node { - n := cases.Len() + ncas := cases.Len() sellineno := lineno // optimization: zero-case select - if n == 0 { + if ncas == 0 { return []*Node{mkcall("block", nil, nil)} } // optimization: one-case select: single op. - // TODO(rsc): Reenable optimization once order.go can handle it. - // golang.org/issue/7672. - if n == 1 { + if ncas == 1 { cas := cases.First() setlineno(cas) l := cas.Ninit.Slice() @@ -178,10 +176,12 @@ func walkselectcases(cases *Nodes) []*Node { // convert case value arguments to addresses. // this rewrite is used by both the general code and the next optimization. + var dflt *Node for _, cas := range cases.Slice() { setlineno(cas) n := cas.Left if n == nil { + dflt = cas continue } switch n.Op { @@ -202,15 +202,10 @@ func walkselectcases(cases *Nodes) []*Node { } // optimization: two-case select but one is default: single non-blocking op. - if n == 2 && (cases.First().Left == nil || cases.Second().Left == nil) { - var cas *Node - var dflt *Node - if cases.First().Left == nil { + if ncas == 2 && dflt != nil { + cas := cases.First() + if cas == dflt { cas = cases.Second() - dflt = cases.First() - } else { - dflt = cases.Second() - cas = cases.First() } n := cas.Left @@ -257,74 +252,73 @@ func walkselectcases(cases *Nodes) []*Node { return []*Node{r, nod(OBREAK, nil, nil)} } + if dflt != nil { + ncas-- + } + casorder := make([]*Node, ncas) + nsends, nrecvs := 0, 0 + var init []*Node // generate sel-struct lineno = sellineno - selv := temp(types.NewArray(scasetype(), int64(n))) + selv := temp(types.NewArray(scasetype(), int64(ncas))) r := nod(OAS, selv, nil) r = typecheck(r, ctxStmt) init = append(init, r) - order := temp(types.NewArray(types.Types[TUINT16], 2*int64(n))) + order := temp(types.NewArray(types.Types[TUINT16], 2*int64(ncas))) r = nod(OAS, order, nil) r = typecheck(r, ctxStmt) init = append(init, r) var pc0, pcs *Node if flag_race { - pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(n))) + pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(ncas))) pc0 = typecheck(nod(OADDR, nod(OINDEX, pcs, nodintconst(0)), nil), ctxExpr) } else { pc0 = nodnil() } // register cases - for i, cas := range cases.Slice() { + for _, cas := range cases.Slice() { setlineno(cas) init = append(init, cas.Ninit.Slice()...) cas.Ninit.Set(nil) - // Keep in sync with runtime/select.go. - const ( - caseNil = iota - caseRecv - caseSend - caseDefault - ) + n := cas.Left + if n == nil { // default: + continue + } + var i int var c, elem *Node - var kind int64 = caseDefault - - if n := cas.Left; n != nil { - init = append(init, n.Ninit.Slice()...) - - switch n.Op { - default: - Fatalf("select %v", n.Op) - case OSEND: - kind = caseSend - c = n.Left - elem = n.Right - case OSELRECV, OSELRECV2: - kind = caseRecv - c = n.Right.Left - elem = n.Left - } + switch n.Op { + default: + Fatalf("select %v", n.Op) + case OSEND: + i = nsends + nsends++ + c = n.Left + elem = n.Right + case OSELRECV, OSELRECV2: + nrecvs++ + i = ncas - nrecvs + c = n.Right.Left + elem = n.Left } + casorder[i] = cas + setField := func(f string, val *Node) { r := nod(OAS, nodSym(ODOT, nod(OINDEX, selv, nodintconst(int64(i))), lookup(f)), val) r = typecheck(r, ctxStmt) init = append(init, r) } - setField("kind", nodintconst(kind)) - if c != nil { - c = convnop(c, types.Types[TUNSAFEPTR]) - setField("c", c) - } + c = convnop(c, types.Types[TUNSAFEPTR]) + setField("c", c) if elem != nil { elem = convnop(elem, types.Types[TUNSAFEPTR]) setField("elem", elem) @@ -337,6 +331,9 @@ func walkselectcases(cases *Nodes) []*Node { init = append(init, r) } } + if nsends+nrecvs != ncas { + Fatalf("walkselectcases: miscount: %v + %v != %v", nsends, nrecvs, ncas) + } // run the select lineno = sellineno @@ -345,7 +342,7 @@ func walkselectcases(cases *Nodes) []*Node { r = nod(OAS2, nil, nil) r.List.Set2(chosen, recvOK) fn := syslook("selectgo") - r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(n)))) + r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(nsends)), nodintconst(int64(nrecvs)), nodbool(dflt == nil))) r = typecheck(r, ctxStmt) init = append(init, r) @@ -357,14 +354,11 @@ func walkselectcases(cases *Nodes) []*Node { } // dispatch cases - for i, cas := range cases.Slice() { - setlineno(cas) - - cond := nod(OEQ, chosen, nodintconst(int64(i))) + dispatch := func(cond, cas *Node) { cond = typecheck(cond, ctxExpr) cond = defaultlit(cond, nil) - r = nod(OIF, cond, nil) + r := nod(OIF, cond, nil) if n := cas.Left; n != nil && n.Op == OSELRECV2 { x := nod(OAS, n.List.First(), recvOK) @@ -377,6 +371,15 @@ func walkselectcases(cases *Nodes) []*Node { init = append(init, r) } + if dflt != nil { + setlineno(dflt) + dispatch(nod(OLT, chosen, nodintconst(0)), dflt) + } + for i, cas := range casorder { + setlineno(cas) + dispatch(nod(OEQ, chosen, nodintconst(int64(i))), cas) + } + return init } @@ -395,7 +398,6 @@ func scasetype() *types.Type { scase = tostruct([]*Node{ namedfield("c", types.Types[TUNSAFEPTR]), namedfield("elem", types.Types[TUNSAFEPTR]), - namedfield("kind", types.Types[TUINT16]), }) scase.SetNoalg(true) } diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index ed2f225077..5a12699472 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -1725,6 +1725,14 @@ func TestSelectMaxCases(t *testing.T) { _, _, _ = Select(sCases) } +func TestSelectNop(t *testing.T) { + // "select { default: }" should always return the default case. + chosen, _, _ := Select([]SelectCase{{Dir: SelectDefault}}) + if chosen != 0 { + t.Fatalf("expected Select to return 0, but got %#v", chosen) + } +} + func BenchmarkSelect(b *testing.B) { channel := make(chan int) close(channel) diff --git a/src/runtime/select.go b/src/runtime/select.go index d7c7d9f26f..80768b285b 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -12,23 +12,12 @@ import ( const debugSelect = false -// scase.kind values. -// Known to compiler. -// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselectcases. -const ( - caseNil = iota - caseRecv - caseSend - caseDefault -) - // Select case descriptor. // Known to compiler. // Changes here must also be made in src/cmd/internal/gc/select.go's scasetype. type scase struct { c *hchan // chan elem unsafe.Pointer // data element - kind uint16 } var ( @@ -115,7 +104,7 @@ func block() { // ordinal position of its respective select{recv,send,default} call. // Also, if the chosen scase was a receive operation, it reports whether // a value was received. -func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) { +func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, block bool) (int, bool) { if debugSelect { print("select: cas0=", cas0, "\n") } @@ -125,6 +114,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0)) order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0)) + ncases := nsends + nrecvs scases := cas1[:ncases:ncases] pollorder := order1[:ncases:ncases] lockorder := order1[ncases:][:ncases:ncases] @@ -158,16 +148,12 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) // optimizing (and needing to test). // generate permuted order - dfli := -1 norder := 0 for i := range scases { cas := &scases[i] // Omit cases without channels from the poll and lock orders. if cas.c == nil { - if cas.kind == caseDefault { - dfli = i - } cas.elem = nil // allow GC continue } @@ -250,8 +236,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) cas = &scases[casi] c = cas.c - switch cas.kind { - case caseRecv: + if casi >= nsends { sg = c.sendq.dequeue() if sg != nil { goto recv @@ -262,8 +247,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) if c.closed != 0 { goto rclose } - - case caseSend: + } else { if raceenabled { racereadpc(c.raceaddr(), casePC(casi), chansendpc) } @@ -280,9 +264,9 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) } } - if dfli >= 0 { + if !block { selunlock(scases, lockorder) - casi = dfli + casi = -1 goto retc } @@ -311,12 +295,10 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) *nextp = sg nextp = &sg.waitlink - switch cas.kind { - case caseRecv: - c.recvq.enqueue(sg) - - case caseSend: + if casi < nsends { c.sendq.enqueue(sg) + } else { + c.recvq.enqueue(sg) } } @@ -359,7 +341,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) } } else { c = k.c - if k.kind == caseSend { + if int(casei) < nsends { c.sendq.dequeueSudoG(sglist) } else { c.recvq.dequeueSudoG(sglist) @@ -378,27 +360,29 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) c = cas.c if debugSelect { - print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " kind=", cas.kind, "\n") + print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " send=", casi < nsends, "\n") } - if cas.kind == caseRecv { + if casi < nsends { + if !caseSuccess { + goto sclose + } + } else { recvOK = caseSuccess - } else if cas.kind == caseSend && !caseSuccess { - goto sclose } if raceenabled { - if cas.kind == caseRecv && cas.elem != nil { - raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) - } else if cas.kind == caseSend { + if casi < nsends { raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc) + } else if cas.elem != nil { + raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc) } } if msanenabled { - if cas.kind == caseRecv && cas.elem != nil { - msanwrite(cas.elem, c.elemtype.size) - } else if cas.kind == caseSend { + if casi < nsends { msanread(cas.elem, c.elemtype.size) + } else if cas.elem != nil { + msanwrite(cas.elem, c.elemtype.size) } } @@ -526,29 +510,57 @@ func reflect_rselect(cases []runtimeSelect) (int, bool) { block() } sel := make([]scase, len(cases)) - order := make([]uint16, 2*len(cases)) - for i := range cases { - rc := &cases[i] + orig := make([]int, len(cases)) + nsends, nrecvs := 0, 0 + dflt := -1 + for i, rc := range cases { + var j int switch rc.dir { case selectDefault: - sel[i] = scase{kind: caseDefault} + dflt = i + continue case selectSend: - sel[i] = scase{kind: caseSend, c: rc.ch, elem: rc.val} + j = nsends + nsends++ case selectRecv: - sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val} + nrecvs++ + j = len(cases) - nrecvs } + + sel[j] = scase{c: rc.ch, elem: rc.val} + orig[j] = i } + // Only a default case. + if nsends+nrecvs == 0 { + return dflt, false + } + + // Compact sel and orig if necessary. + if nsends+nrecvs < len(cases) { + copy(sel[nsends:], sel[len(cases)-nrecvs:]) + copy(orig[nsends:], orig[len(cases)-nrecvs:]) + } + + order := make([]uint16, 2*(nsends+nrecvs)) var pc0 *uintptr if raceenabled { - pcs := make([]uintptr, len(cases)) + pcs := make([]uintptr, nsends+nrecvs) for i := range pcs { selectsetpc(&pcs[i]) } pc0 = &pcs[0] } - return selectgo(&sel[0], &order[0], pc0, len(cases)) + chosen, recvOK := selectgo(&sel[0], &order[0], pc0, nsends, nrecvs, dflt == -1) + + // Translate chosen back to caller's ordering. + if chosen < 0 { + chosen = dflt + } else { + chosen = orig[chosen] + } + return chosen, recvOK } func (q *waitq) dequeueSudoG(sgp *sudog) { -- cgit v1.3 From 0941fc3f9ff43598d25fa6e964e7829a268102bf Mon Sep 17 00:00:00 2001 From: cui Date: Wed, 12 Aug 2020 17:33:41 +0000 Subject: runtime: reduce syscall when call runtime.clone Change-Id: I3ea398fd86aae4c86557dd6fff65d90a6f756890 GitHub-Last-Rev: 4c295388f7b5e6768ffd2530337f78b4c75a9310 GitHub-Pull-Request: golang/go#40392 Reviewed-on: https://go-review.googlesource.com/c/go/+/244626 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/sys_linux_amd64.s | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 621c01b365..8d90813589 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -592,13 +592,25 @@ TEXT runtime·clone(SB),NOSPLIT,$0 MOVQ stk+8(FP), SI MOVQ $0, DX MOVQ $0, R10 - + MOVQ $0, R8 // Copy mp, gp, fn off parent stack for use by child. // Careful: Linux system call clobbers CX and R11. - MOVQ mp+16(FP), R8 + MOVQ mp+16(FP), R13 MOVQ gp+24(FP), R9 MOVQ fn+32(FP), R12 - + CMPQ R13, $0 // m + JEQ nog1 + CMPQ R9, $0 // g + JEQ nog1 + LEAQ m_tls(R13), R8 +#ifdef GOOS_android + // Android stores the TLS offset in runtime·tls_g. + SUBQ runtime·tls_g(SB), R8 +#else + ADDQ $8, R8 // ELF wants to use -8(FS) +#endif + ORQ $0x00080000, DI //add flag CLONE_SETTLS(0x00080000) to call clone +nog1: MOVL $SYS_clone, AX SYSCALL @@ -612,27 +624,23 @@ TEXT runtime·clone(SB),NOSPLIT,$0 MOVQ SI, SP // If g or m are nil, skip Go-related setup. - CMPQ R8, $0 // m - JEQ nog + CMPQ R13, $0 // m + JEQ nog2 CMPQ R9, $0 // g - JEQ nog + JEQ nog2 // Initialize m->procid to Linux tid MOVL $SYS_gettid, AX SYSCALL - MOVQ AX, m_procid(R8) - - // Set FS to point at m->tls. - LEAQ m_tls(R8), DI - CALL runtime·settls(SB) + MOVQ AX, m_procid(R13) // In child, set up new stack get_tls(CX) - MOVQ R8, g_m(R9) + MOVQ R13, g_m(R9) MOVQ R9, g(CX) CALL runtime·stackcheck(SB) -nog: +nog2: // Call fn CALL R12 -- cgit v1.3 From bd519d0c8734c3e30cb1a8b8217dd9934cd61e25 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 25 Jun 2020 14:50:10 -0700 Subject: runtime: don't call setitimer for each thread Previously, on Unix systems, when the profiler was enabled or disabled, we called setitimer once per thread. With this change we instead call it once per process. Change-Id: I90f0189b562e11232816390dc7d55ed154bd836d Reviewed-on: https://go-review.googlesource.com/c/go/+/240003 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/signal_unix.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index 6a11c91fb9..064a0ea100 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -272,6 +272,12 @@ func setProcessCPUProfiler(hz int32) { atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF)) setsig(_SIGPROF, funcPC(sighandler)) } + + var it itimerval + it.it_interval.tv_sec = 0 + it.it_interval.set_usec(1000000 / hz) + it.it_value = it.it_interval + setitimer(_ITIMER_PROF, &it, nil) } else { // If the Go signal handler should be disabled by default, // switch back to the signal handler that was installed @@ -296,23 +302,16 @@ func setProcessCPUProfiler(hz int32) { setsig(_SIGPROF, h) } } + + setitimer(_ITIMER_PROF, &itimerval{}, nil) } } // setThreadCPUProfiler makes any thread-specific changes required to // implement profiling at a rate of hz. +// No changes required on Unix systems. func setThreadCPUProfiler(hz int32) { - var it itimerval - if hz == 0 { - setitimer(_ITIMER_PROF, &it, nil) - } else { - it.it_interval.tv_sec = 0 - it.it_interval.set_usec(1000000 / hz) - it.it_value = it.it_interval - setitimer(_ITIMER_PROF, &it, nil) - } - _g_ := getg() - _g_.m.profilehz = hz + getg().m.profilehz = hz } func sigpipe() { -- cgit v1.3 From 6b420169d798c7ebe733487b56ea5c3fa4aab5ce Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 18 Aug 2020 16:46:24 -0700 Subject: os, internal/poll: loop on EINTR for all file syscalls When using a FUSE file system, any system call that touches the file system can return EINTR. Fixes #40846 Change-Id: I25d32da22cec08dea81ab297291a85ad72db2df7 Reviewed-on: https://go-review.googlesource.com/c/go/+/249178 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- src/internal/poll/fd_fsync_posix.go | 4 +++- src/internal/poll/fd_opendir_darwin.go | 8 ++++++- src/internal/poll/fd_posix.go | 28 +++++++++++++++++++++--- src/internal/poll/fd_unix.go | 22 ++++++++----------- src/os/dir_darwin.go | 3 +++ src/os/file.go | 5 ++++- src/os/file_plan9.go | 4 ++++ src/os/file_posix.go | 32 ++++++++++++++++++++++++--- src/os/file_unix.go | 40 +++++++++++++++++++++++++++------- src/os/getwd.go | 11 +++++++++- src/os/stat_unix.go | 8 +++++-- src/runtime/trace/trace_stack_test.go | 2 +- 12 files changed, 133 insertions(+), 34 deletions(-) (limited to 'src/runtime') diff --git a/src/internal/poll/fd_fsync_posix.go b/src/internal/poll/fd_fsync_posix.go index 69358297f4..dd7956f14d 100644 --- a/src/internal/poll/fd_fsync_posix.go +++ b/src/internal/poll/fd_fsync_posix.go @@ -14,5 +14,7 @@ func (fd *FD) Fsync() error { return err } defer fd.decref() - return syscall.Fsync(fd.Sysfd) + return ignoringEINTR(func() error { + return syscall.Fsync(fd.Sysfd) + }) } diff --git a/src/internal/poll/fd_opendir_darwin.go b/src/internal/poll/fd_opendir_darwin.go index c7d3318c72..8eb770c358 100644 --- a/src/internal/poll/fd_opendir_darwin.go +++ b/src/internal/poll/fd_opendir_darwin.go @@ -19,7 +19,13 @@ func (fd *FD) OpenDir() (uintptr, string, error) { if err != nil { return 0, call, err } - dir, err := fdopendir(fd2) + var dir uintptr + for { + dir, err = fdopendir(fd2) + if err != syscall.EINTR { + break + } + } if err != nil { syscall.Close(fd2) return 0, "fdopendir", err diff --git a/src/internal/poll/fd_posix.go b/src/internal/poll/fd_posix.go index 54747b4c99..e5fb05c9c2 100644 --- a/src/internal/poll/fd_posix.go +++ b/src/internal/poll/fd_posix.go @@ -35,7 +35,9 @@ func (fd *FD) Fchmod(mode uint32) error { return err } defer fd.decref() - return syscall.Fchmod(fd.Sysfd, mode) + return ignoringEINTR(func() error { + return syscall.Fchmod(fd.Sysfd, mode) + }) } // Fchown wraps syscall.Fchown. @@ -44,7 +46,9 @@ func (fd *FD) Fchown(uid, gid int) error { return err } defer fd.decref() - return syscall.Fchown(fd.Sysfd, uid, gid) + return ignoringEINTR(func() error { + return syscall.Fchown(fd.Sysfd, uid, gid) + }) } // Ftruncate wraps syscall.Ftruncate. @@ -53,7 +57,9 @@ func (fd *FD) Ftruncate(size int64) error { return err } defer fd.decref() - return syscall.Ftruncate(fd.Sysfd, size) + return ignoringEINTR(func() error { + return syscall.Ftruncate(fd.Sysfd, size) + }) } // RawControl invokes the user-defined function f for a non-IO @@ -66,3 +72,19 @@ func (fd *FD) RawControl(f func(uintptr)) error { f(uintptr(fd.Sysfd)) return nil } + +// ignoringEINTR makes a function call and repeats it if it returns +// an EINTR error. This appears to be required even though we install all +// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846. +// Also #20400 and #36644 are issues in which a signal handler is +// installed without setting SA_RESTART. None of these are the common case, +// but there are enough of them that it seems that we can't avoid +// an EINTR loop. +func ignoringEINTR(fn func() error) error { + for { + err := fn() + if err != syscall.EINTR { + return err + } + } +} diff --git a/src/internal/poll/fd_unix.go b/src/internal/poll/fd_unix.go index 4872fa9851..1d5101eac3 100644 --- a/src/internal/poll/fd_unix.go +++ b/src/internal/poll/fd_unix.go @@ -152,7 +152,7 @@ func (fd *FD) Read(p []byte) (int, error) { p = p[:maxRW] } for { - n, err := ignoringEINTR(syscall.Read, fd.Sysfd, p) + n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p) if err != nil { n = 0 if err == syscall.EAGAIN && fd.pd.pollable() { @@ -264,7 +264,7 @@ func (fd *FD) Write(p []byte) (int, error) { if fd.IsStream && max-nn > maxRW { max = nn + maxRW } - n, err := ignoringEINTR(syscall.Write, fd.Sysfd, p[nn:max]) + n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max]) if n > 0 { nn += n } @@ -423,7 +423,7 @@ func (fd *FD) ReadDirent(buf []byte) (int, error) { } defer fd.decref() for { - n, err := ignoringEINTR(syscall.ReadDirent, fd.Sysfd, buf) + n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf) if err != nil { n = 0 if err == syscall.EAGAIN && fd.pd.pollable() { @@ -452,7 +452,9 @@ func (fd *FD) Fstat(s *syscall.Stat_t) error { return err } defer fd.decref() - return syscall.Fstat(fd.Sysfd, s) + return ignoringEINTR(func() error { + return syscall.Fstat(fd.Sysfd, s) + }) } // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used. @@ -514,7 +516,7 @@ func (fd *FD) WriteOnce(p []byte) (int, error) { return 0, err } defer fd.writeUnlock() - return ignoringEINTR(syscall.Write, fd.Sysfd, p) + return ignoringEINTRIO(syscall.Write, fd.Sysfd, p) } // RawRead invokes the user-defined function f for a read operation. @@ -555,14 +557,8 @@ func (fd *FD) RawWrite(f func(uintptr) bool) error { } } -// ignoringEINTR makes a function call and repeats it if it returns -// an EINTR error. This appears to be required even though we install -// all signal handlers with SA_RESTART: see #22838, #38033, #38836. -// Also #20400 and #36644 are issues in which a signal handler is -// installed without setting SA_RESTART. None of these are the common case, -// but there are enough of them that it seems that we can't avoid -// an EINTR loop. -func ignoringEINTR(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { +// ignoringEINTRIO is like ignoringEINTR, but just for IO calls. +func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { for { n, err := fn(fd, p) if err != syscall.EINTR { diff --git a/src/os/dir_darwin.go b/src/os/dir_darwin.go index 2f9ba78d68..87797e2dda 100644 --- a/src/os/dir_darwin.go +++ b/src/os/dir_darwin.go @@ -47,6 +47,9 @@ func (f *File) readdirnames(n int) (names []string, err error) { var entptr *syscall.Dirent for len(names) < size || n == -1 { if res := readdir_r(d.dir, &dirent, &entptr); res != 0 { + if syscall.Errno(res) == syscall.EINTR { + continue + } return names, wrapSyscallError("readdir", syscall.Errno(res)) } if entptr == nil { // EOF diff --git a/src/os/file.go b/src/os/file.go index a2b71cb61a..05d2f83283 100644 --- a/src/os/file.go +++ b/src/os/file.go @@ -255,7 +255,10 @@ func Mkdir(name string, perm FileMode) error { if runtime.GOOS == "windows" && isWindowsNulName(name) { return &PathError{"mkdir", name, syscall.ENOTDIR} } - e := syscall.Mkdir(fixLongPath(name), syscallMode(perm)) + longName := fixLongPath(name) + e := ignoringEINTR(func() error { + return syscall.Mkdir(longName, syscallMode(perm)) + }) if e != nil { return &PathError{"mkdir", name, e} diff --git a/src/os/file_plan9.go b/src/os/file_plan9.go index eb158905ab..043500744b 100644 --- a/src/os/file_plan9.go +++ b/src/os/file_plan9.go @@ -558,3 +558,7 @@ func (c *rawConn) Write(f func(uintptr) bool) error { func newRawConn(file *File) (*rawConn, error) { return nil, syscall.EPLAN9 } + +func ignoringEINTR(fn func() error) error { + return fn() +} diff --git a/src/os/file_posix.go b/src/os/file_posix.go index 24ea554b62..ae23d22d0a 100644 --- a/src/os/file_posix.go +++ b/src/os/file_posix.go @@ -76,7 +76,11 @@ func syscallMode(i FileMode) (o uint32) { // See docs in file.go:Chmod. func chmod(name string, mode FileMode) error { - if e := syscall.Chmod(fixLongPath(name), syscallMode(mode)); e != nil { + longName := fixLongPath(name) + e := ignoringEINTR(func() error { + return syscall.Chmod(longName, syscallMode(mode)) + }) + if e != nil { return &PathError{"chmod", name, e} } return nil @@ -101,7 +105,10 @@ func (f *File) chmod(mode FileMode) error { // On Windows or Plan 9, Chown always returns the syscall.EWINDOWS or // EPLAN9 error, wrapped in *PathError. func Chown(name string, uid, gid int) error { - if e := syscall.Chown(name, uid, gid); e != nil { + e := ignoringEINTR(func() error { + return syscall.Chown(name, uid, gid) + }) + if e != nil { return &PathError{"chown", name, e} } return nil @@ -114,7 +121,10 @@ func Chown(name string, uid, gid int) error { // On Windows, it always returns the syscall.EWINDOWS error, wrapped // in *PathError. func Lchown(name string, uid, gid int) error { - if e := syscall.Lchown(name, uid, gid); e != nil { + e := ignoringEINTR(func() error { + return syscall.Lchown(name, uid, gid) + }) + if e != nil { return &PathError{"lchown", name, e} } return nil @@ -222,3 +232,19 @@ func (f *File) checkValid(op string) error { } return nil } + +// ignoringEINTR makes a function call and repeats it if it returns an +// EINTR error. This appears to be required even though we install all +// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846. +// Also #20400 and #36644 are issues in which a signal handler is +// installed without setting SA_RESTART. None of these are the common case, +// but there are enough of them that it seems that we can't avoid +// an EINTR loop. +func ignoringEINTR(fn func() error) error { + for { + err := fn() + if err != syscall.EINTR { + return err + } + } +} diff --git a/src/os/file_unix.go b/src/os/file_unix.go index f2c00ae0cb..5446dd5003 100644 --- a/src/os/file_unix.go +++ b/src/os/file_unix.go @@ -39,7 +39,9 @@ func rename(oldname, newname string) error { return &LinkError{"rename", oldname, newname, syscall.EEXIST} } } - err = syscall.Rename(oldname, newname) + err = ignoringEINTR(func() error { + return syscall.Rename(oldname, newname) + }) if err != nil { return &LinkError{"rename", oldname, newname, err} } @@ -129,7 +131,9 @@ func newFile(fd uintptr, name string, kind newFileKind) *File { switch runtime.GOOS { case "darwin", "dragonfly", "freebsd", "netbsd", "openbsd": var st syscall.Stat_t - err := syscall.Fstat(fdi, &st) + err := ignoringEINTR(func() error { + return syscall.Fstat(fdi, &st) + }) typ := st.Mode & syscall.S_IFMT // Don't try to use kqueue with regular files on *BSDs. // On FreeBSD a regular file is always @@ -264,7 +268,10 @@ func (f *File) seek(offset int64, whence int) (ret int64, err error) { // If the file is a symbolic link, it changes the size of the link's target. // If there is an error, it will be of type *PathError. func Truncate(name string, size int64) error { - if e := syscall.Truncate(name, size); e != nil { + e := ignoringEINTR(func() error { + return syscall.Truncate(name, size) + }) + if e != nil { return &PathError{"truncate", name, e} } return nil @@ -277,11 +284,15 @@ func Remove(name string) error { // whether name is a file or directory. // Try both: it is cheaper on average than // doing a Stat plus the right one. - e := syscall.Unlink(name) + e := ignoringEINTR(func() error { + return syscall.Unlink(name) + }) if e == nil { return nil } - e1 := syscall.Rmdir(name) + e1 := ignoringEINTR(func() error { + return syscall.Rmdir(name) + }) if e1 == nil { return nil } @@ -316,7 +327,9 @@ func tempDir() string { // Link creates newname as a hard link to the oldname file. // If there is an error, it will be of type *LinkError. func Link(oldname, newname string) error { - e := syscall.Link(oldname, newname) + e := ignoringEINTR(func() error { + return syscall.Link(oldname, newname) + }) if e != nil { return &LinkError{"link", oldname, newname, e} } @@ -326,7 +339,9 @@ func Link(oldname, newname string) error { // Symlink creates newname as a symbolic link to oldname. // If there is an error, it will be of type *LinkError. func Symlink(oldname, newname string) error { - e := syscall.Symlink(oldname, newname) + e := ignoringEINTR(func() error { + return syscall.Symlink(oldname, newname) + }) if e != nil { return &LinkError{"symlink", oldname, newname, e} } @@ -365,7 +380,16 @@ func (f *File) readdir(n int) (fi []FileInfo, err error) { func Readlink(name string) (string, error) { for len := 128; ; len *= 2 { b := make([]byte, len) - n, e := fixCount(syscall.Readlink(name, b)) + var ( + n int + e error + ) + for { + n, e = fixCount(syscall.Readlink(name, b)) + if e != syscall.EINTR { + break + } + } // buffer too small if runtime.GOOS == "aix" && e == syscall.ERANGE { continue diff --git a/src/os/getwd.go b/src/os/getwd.go index 6d25466bb4..f3afd8c06c 100644 --- a/src/os/getwd.go +++ b/src/os/getwd.go @@ -45,7 +45,16 @@ func Getwd() (dir string, err error) { // If the operating system provides a Getwd call, use it. // Otherwise, we're trying to find our way back to ".". if syscall.ImplementsGetwd { - s, e := syscall.Getwd() + var ( + s string + e error + ) + for { + s, e = syscall.Getwd() + if e != syscall.EINTR { + break + } + } if useSyscallwd(e) { return s, NewSyscallError("getwd", e) } diff --git a/src/os/stat_unix.go b/src/os/stat_unix.go index 0a7e6029ac..ef74a43758 100644 --- a/src/os/stat_unix.go +++ b/src/os/stat_unix.go @@ -28,7 +28,9 @@ func (f *File) Stat() (FileInfo, error) { // statNolog stats a file with no test logging. func statNolog(name string) (FileInfo, error) { var fs fileStat - err := syscall.Stat(name, &fs.sys) + err := ignoringEINTR(func() error { + return syscall.Stat(name, &fs.sys) + }) if err != nil { return nil, &PathError{"stat", name, err} } @@ -39,7 +41,9 @@ func statNolog(name string) (FileInfo, error) { // lstatNolog lstats a file with no test logging. func lstatNolog(name string) (FileInfo, error) { var fs fileStat - err := syscall.Lstat(name, &fs.sys) + err := ignoringEINTR(func() error { + return syscall.Lstat(name, &fs.sys) + }) if err != nil { return nil, &PathError{"lstat", name, err} } diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go index cfc0419b72..be3adc9801 100644 --- a/src/runtime/trace/trace_stack_test.go +++ b/src/runtime/trace/trace_stack_test.go @@ -252,7 +252,7 @@ func TestTraceSymbolize(t *testing.T) { {trace.EvGoSysCall, []frame{ {"syscall.read", 0}, {"syscall.Read", 0}, - {"internal/poll.ignoringEINTR", 0}, + {"internal/poll.ignoringEINTRIO", 0}, {"internal/poll.(*FD).Read", 0}, {"os.(*File).read", 0}, {"os.(*File).Read", 0}, -- cgit v1.3 From e94544cf012535da6b3c9e735bc4026e2db1c99c Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Wed, 19 Aug 2020 21:39:12 -0700 Subject: cmd/compile: fix checkptr handling of &^ checkptr has code to recognize &^ expressions, but it didn't take into account that "p &^ x" gets rewritten to "p & ^x" during walk, which resulted in false positive diagnostics. This CL changes walkexpr to mark OANDNOT expressions with Implicit when they're rewritten to OAND, so that walkCheckPtrArithmetic can still recognize them later. It would be slightly more idiomatic to instead mark the OBITNOT expression as Implicit (as it's a compiler-generated Node), but the OBITNOT expression might get constant folded. It's not worth the extra complexity/subtlety of relying on n.Right.Orig, so we set Implicit on the OAND node instead. To atone for this transgression, I add documentation for nodeImplicit. Fixes #40917. Change-Id: I386304171ad299c530e151e5924f179e9a5fd5b8 Reviewed-on: https://go-review.googlesource.com/c/go/+/249477 Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall Reviewed-by: Cuong Manh Le --- src/cmd/compile/internal/gc/syntax.go | 4 ++-- src/cmd/compile/internal/gc/walk.go | 7 ++++++- src/runtime/checkptr_test.go | 1 + src/runtime/testdata/testprog/checkptr.go | 8 ++++++++ test/fixedbugs/issue40917.go | 23 +++++++++++++++++++++++ 5 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 test/fixedbugs/issue40917.go (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go index b658410c53..47e5e59156 100644 --- a/src/cmd/compile/internal/gc/syntax.go +++ b/src/cmd/compile/internal/gc/syntax.go @@ -141,8 +141,8 @@ const ( nodeInitorder, _ // tracks state during init1; two bits _, _ // second nodeInitorder bit _, nodeHasBreak - _, nodeNoInline // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only - _, nodeImplicit + _, nodeNoInline // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only + _, nodeImplicit // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND _, nodeIsDDD // is the argument variadic _, nodeDiag // already printed error about this _, nodeColas // OAS resulting from := diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index 8ae3d9a5c7..74ed0411bd 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -973,6 +973,7 @@ opswitch: case OANDNOT: n.Left = walkexpr(n.Left, init) n.Op = OAND + n.SetImplicit(true) // for walkCheckPtrArithmetic n.Right = nod(OBITNOT, n.Right, nil) n.Right = typecheck(n.Right, ctxExpr) n.Right = walkexpr(n.Right, init) @@ -4003,8 +4004,12 @@ func walkCheckPtrArithmetic(n *Node, init *Nodes) *Node { case OADD: walk(n.Left) walk(n.Right) - case OSUB, OANDNOT: + case OSUB: walk(n.Left) + case OAND: + if n.Implicit() { // was OANDNOT + walk(n.Left) + } case OCONVNOP: if n.Left.Type.Etype == TUNSAFEPTR { n.Left = cheapexpr(n.Left, init) diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go index 8ab8a4937c..194cc1243a 100644 --- a/src/runtime/checkptr_test.go +++ b/src/runtime/checkptr_test.go @@ -27,6 +27,7 @@ func TestCheckPtr(t *testing.T) { {"CheckPtrAlignmentPtr", "fatal error: checkptr: misaligned pointer conversion\n"}, {"CheckPtrAlignmentNoPtr", ""}, {"CheckPtrArithmetic", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"}, + {"CheckPtrArithmetic2", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"}, {"CheckPtrSize", "fatal error: checkptr: converted pointer straddles multiple allocations\n"}, {"CheckPtrSmall", "fatal error: checkptr: pointer arithmetic computed bad pointer value\n"}, } diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go index 45e6fb1aa5..e0a2794f4c 100644 --- a/src/runtime/testdata/testprog/checkptr.go +++ b/src/runtime/testdata/testprog/checkptr.go @@ -10,6 +10,7 @@ func init() { register("CheckPtrAlignmentNoPtr", CheckPtrAlignmentNoPtr) register("CheckPtrAlignmentPtr", CheckPtrAlignmentPtr) register("CheckPtrArithmetic", CheckPtrArithmetic) + register("CheckPtrArithmetic2", CheckPtrArithmetic2) register("CheckPtrSize", CheckPtrSize) register("CheckPtrSmall", CheckPtrSmall) } @@ -32,6 +33,13 @@ func CheckPtrArithmetic() { sink2 = (*int)(unsafe.Pointer(i)) } +func CheckPtrArithmetic2() { + var x [2]int64 + p := unsafe.Pointer(&x[1]) + var one uintptr = 1 + sink2 = unsafe.Pointer(uintptr(p) & ^one) +} + func CheckPtrSize() { p := new(int64) sink2 = p diff --git a/test/fixedbugs/issue40917.go b/test/fixedbugs/issue40917.go new file mode 100644 index 0000000000..2128be5eca --- /dev/null +++ b/test/fixedbugs/issue40917.go @@ -0,0 +1,23 @@ +// run -gcflags=-d=checkptr + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "unsafe" + +func main() { + var x [2]uint64 + a := unsafe.Pointer(&x[1]) + + b := a + b = unsafe.Pointer(uintptr(b) + 2) + b = unsafe.Pointer(uintptr(b) - 1) + b = unsafe.Pointer(uintptr(b) &^ 1) + + if a != b { + panic("pointer arithmetic failed") + } +} -- cgit v1.3