From 92bda33d2771a9b12868d9025f113538fa7a84de Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 31 Jul 2020 15:58:00 -0400
Subject: runtime: revert signal stack mlocking

Go 1.14 included a (rather awful) workaround for a Linux kernel bug
that corrupted vector registers on x86 CPUs during signal delivery
(https://bugzilla.kernel.org/show_bug.cgi?id=205663). This bug was
introduced in Linux 5.2 and fixed in 5.3.15, 5.4.2 and all 5.5 and
later kernels. The fix was also back-ported by major distros. This
workaround was necessary, but had unfortunate downsides, including
causing Go programs to exceed the mlock ulimit in many configurations
(#37436).

We're reasonably confident that by the Go 1.16 release, the number of
systems running affected kernels will be vanishingly small. Hence,
this CL removes this workaround.

This effectively reverts CLs 209597 (version parser), 209899 (mlock
top of signal stack), 210299 (better failure message), 223121 (soft
mlock failure handling), and 244059 (special-case patched Ubuntu
kernels). The one thing we keep is the osArchInit function. It's empty
everywhere now, but is a reasonable hook to have.

Updates #35326, #35777 (the original register corruption bugs).
Updates #40184 (request to revert in 1.15).
Fixes #35979.

Change-Id: Ie213270837095576f1f3ef46bf3de187dc486c50
Reviewed-on: https://go-review.googlesource.com/c/go/+/246200
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/defs_linux_386.go   |  11 ----
 src/runtime/defs_linux_amd64.go |  11 ----
 src/runtime/export_test.go      |   2 -
 src/runtime/os_linux.go         |   9 ---
 src/runtime/os_linux_x86.go     | 118 +---------------------------------------
 src/runtime/panic.go            |  10 ----
 src/runtime/string.go           |  34 ------------
 src/runtime/string_test.go      |  31 -----------
 src/runtime/sys_linux_386.s     |  19 -------
 src/runtime/sys_linux_amd64.s   |  19 -------
 10 files changed, 1 insertion(+), 263 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go
index f4db8cf927..64a0fbcaaa 100644
--- a/src/runtime/defs_linux_386.go
+++ b/src/runtime/defs_linux_386.go
@@ -226,14 +226,3 @@ type sockaddr_un struct {
 	family uint16
 	path   [108]byte
 }
-
-const __NEW_UTS_LEN = 64
-
-type new_utsname struct {
-	sysname    [__NEW_UTS_LEN + 1]byte
-	nodename   [__NEW_UTS_LEN + 1]byte
-	release    [__NEW_UTS_LEN + 1]byte
-	version    [__NEW_UTS_LEN + 1]byte
-	machine    [__NEW_UTS_LEN + 1]byte
-	domainname [__NEW_UTS_LEN + 1]byte
-}
diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go
index 8480d85219..1ae18a309b 100644
--- a/src/runtime/defs_linux_amd64.go
+++ b/src/runtime/defs_linux_amd64.go
@@ -262,14 +262,3 @@ type sockaddr_un struct {
 	family uint16
 	path   [108]byte
 }
-
-const __NEW_UTS_LEN = 64
-
-type new_utsname struct {
-	sysname    [__NEW_UTS_LEN + 1]byte
-	nodename   [__NEW_UTS_LEN + 1]byte
-	release    [__NEW_UTS_LEN + 1]byte
-	version    [__NEW_UTS_LEN + 1]byte
-	machine    [__NEW_UTS_LEN + 1]byte
-	domainname [__NEW_UTS_LEN + 1]byte
-}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 5ab03f3f99..d591fdc4e9 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -43,8 +43,6 @@ var PhysHugePageSize = physHugePageSize
 
 var NetpollGenericInit = netpollGenericInit
 
-var ParseRelease = parseRelease
-
 var Memmove = memmove
 var MemclrNoHeapPointers = memclrNoHeapPointers
 
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 7b95ff2428..22931b4d5c 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -328,20 +328,11 @@ func libpreinit() {
 	initsig(true)
 }
 
-// gsignalInitQuirk, if non-nil, is called for every allocated gsignal G.
-//
-// TODO(austin): Remove this after Go 1.15 when we remove the
-// mlockGsignal workaround.
-var gsignalInitQuirk func(gsignal *g)
-
 // Called to initialize a new m (including the bootstrap m).
 // Called on the parent thread (main thread in case of bootstrap), can allocate memory.
 func mpreinit(mp *m) {
 	mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
 	mp.gsignal.m = mp
-	if gsignalInitQuirk != nil {
-		gsignalInitQuirk(mp.gsignal)
-	}
 }
 
 func gettid() uint32
diff --git a/src/runtime/os_linux_x86.go b/src/runtime/os_linux_x86.go
index 97f870707d..d91fa1a0d1 100644
--- a/src/runtime/os_linux_x86.go
+++ b/src/runtime/os_linux_x86.go
@@ -7,120 +7,4 @@
 
 package runtime
 
-import (
-	"runtime/internal/atomic"
-	"unsafe"
-)
-
-//go:noescape
-func uname(utsname *new_utsname) int
-
-func mlock(addr, len uintptr) int
-
-func osArchInit() {
-	// Linux 5.2 introduced a bug that can corrupt vector
-	// registers on return from a signal if the signal stack isn't
-	// faulted in:
-	// https://bugzilla.kernel.org/show_bug.cgi?id=205663
-	//
-	// It was fixed in 5.3.15, 5.4.2, and all 5.5 and later
-	// kernels.
-	//
-	// If we're on an affected kernel, work around this issue by
-	// mlocking the top page of every signal stack. This doesn't
-	// help for signal stacks created in C, but there's not much
-	// we can do about that.
-	//
-	// TODO(austin): Remove this in Go 1.15, at which point it
-	// will be unlikely to encounter any of the affected kernels
-	// in the wild.
-
-	var uts new_utsname
-	if uname(&uts) < 0 {
-		throw("uname failed")
-	}
-	// Check for null terminator to ensure gostringnocopy doesn't
-	// walk off the end of the release string.
-	found := false
-	for _, b := range uts.release {
-		if b == 0 {
-			found = true
-			break
-		}
-	}
-	if !found {
-		return
-	}
-	rel := gostringnocopy(&uts.release[0])
-
-	major, minor, patch, ok := parseRelease(rel)
-	if !ok {
-		return
-	}
-
-	if major == 5 && minor == 4 && patch < 2 {
-		// All 5.4 versions of Ubuntu are patched.
-		procVersion := []byte("/proc/version\000")
-		f := open(&procVersion[0], _O_RDONLY, 0)
-		if f >= 0 {
-			var buf [512]byte
-			p := noescape(unsafe.Pointer(&buf[0]))
-			n := read(f, p, int32(len(buf)))
-			closefd(f)
-
-			needle := []byte("Ubuntu")
-		contains:
-			for i, c := range buf[:n] {
-				if c != needle[0] {
-					continue
-				}
-				if int(n)-i < len(needle) {
-					break
-				}
-				for j, c2 := range needle {
-					if c2 != buf[i+j] {
-						continue contains
-					}
-				}
-				// This is an Ubuntu system.
-				return
-			}
-		}
-	}
-
-	if major == 5 && (minor == 2 || minor == 3 && patch < 15 || minor == 4 && patch < 2) {
-		gsignalInitQuirk = mlockGsignal
-		if m0.gsignal != nil {
-			throw("gsignal quirk too late")
-		}
-		throwReportQuirk = throwBadKernel
-	}
-}
-
-func mlockGsignal(gsignal *g) {
-	if atomic.Load(&touchStackBeforeSignal) != 0 {
-		// mlock has already failed, don't try again.
-		return
-	}
-
-	// This mlock call may fail, but we don't report the failure.
-	// Instead, if something goes badly wrong, we rely on prepareSignalM
-	// and throwBadKernel to do further mitigation and to report a problem
-	// to the user if mitigation fails. This is because many
-	// systems have a limit on the total mlock size, and many kernels
-	// that appear to have bad versions are actually patched to avoid the
-	// bug described above. We want Go 1.14 to run on those systems.
-	// See #37436.
-	if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 {
-		atomic.Store(&touchStackBeforeSignal, uint32(-errno))
-	}
-}
-
-// throwBadKernel is called, via throwReportQuirk, by throw.
-func throwBadKernel() {
-	if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 {
-		println("runtime: note: your Linux kernel may be buggy")
-		println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug")
-		println("runtime: note: mlock workaround for kernel bug failed with errno", errno)
-	}
-}
+func osArchInit() {}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 615249f33c..127843b081 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -1283,12 +1283,6 @@ func startpanic_m() bool {
 	}
 }
 
-// throwReportQuirk, if non-nil, is called by throw after dumping the stacks.
-//
-// TODO(austin): Remove this after Go 1.15 when we remove the
-// mlockGsignal workaround.
-var throwReportQuirk func()
-
 var didothers bool
 var deadlock mutex
 
@@ -1335,10 +1329,6 @@ func dopanic_m(gp *g, pc, sp uintptr) bool {
 
 	printDebugLog()
 
-	if throwReportQuirk != nil {
-		throwReportQuirk()
-	}
-
 	return docrash
 }
 
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 0515b56573..251044231e 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -499,37 +499,3 @@ func gostringw(strw *uint16) string {
 	b[n2] = 0 // for luck
 	return s[:n2]
 }
-
-// parseRelease parses a dot-separated version number. It follows the
-// semver syntax, but allows the minor and patch versions to be
-// elided.
-func parseRelease(rel string) (major, minor, patch int, ok bool) {
-	// Strip anything after a dash or plus.
-	for i := 0; i < len(rel); i++ {
-		if rel[i] == '-' || rel[i] == '+' {
-			rel = rel[:i]
-			break
-		}
-	}
-
-	next := func() (int, bool) {
-		for i := 0; i < len(rel); i++ {
-			if rel[i] == '.' {
-				ver, ok := atoi(rel[:i])
-				rel = rel[i+1:]
-				return ver, ok
-			}
-		}
-		ver, ok := atoi(rel)
-		rel = ""
-		return ver, ok
-	}
-	if major, ok = next(); !ok || rel == "" {
-		return
-	}
-	if minor, ok = next(); !ok || rel == "" {
-		return
-	}
-	patch, ok = next()
-	return
-}
diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go
index b9ac667533..4eda12c35d 100644
--- a/src/runtime/string_test.go
+++ b/src/runtime/string_test.go
@@ -454,34 +454,3 @@ func TestAtoi32(t *testing.T) {
 		}
 	}
 }
-
-type parseReleaseTest struct {
-	in                  string
-	major, minor, patch int
-}
-
-var parseReleaseTests = []parseReleaseTest{
-	{"", -1, -1, -1},
-	{"x", -1, -1, -1},
-	{"5", 5, 0, 0},
-	{"5.12", 5, 12, 0},
-	{"5.12-x", 5, 12, 0},
-	{"5.12.1", 5, 12, 1},
-	{"5.12.1-x", 5, 12, 1},
-	{"5.12.1.0", 5, 12, 1},
-	{"5.20496382327982653440", -1, -1, -1},
-}
-
-func TestParseRelease(t *testing.T) {
-	for _, test := range parseReleaseTests {
-		major, minor, patch, ok := runtime.ParseRelease(test.in)
-		if !ok {
-			major, minor, patch = -1, -1, -1
-		}
-		if test.major != major || test.minor != minor || test.patch != patch {
-			t.Errorf("parseRelease(%q) = (%v, %v, %v) want (%v, %v, %v)",
-				test.in, major, minor, patch,
-				test.major, test.minor, test.patch)
-		}
-	}
-}
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index 5b9b638ad7..1e3a834812 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -39,8 +39,6 @@
 #define SYS_socketcall		102
 #define SYS_setittimer		104
 #define SYS_clone		120
-#define SYS_uname		122
-#define SYS_mlock		150
 #define SYS_sched_yield 	158
 #define SYS_nanosleep		162
 #define SYS_rt_sigreturn	173
@@ -808,20 +806,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-4
 	INVOKE_SYSCALL
 	MOVL	AX, ret+0(FP)
 	RET
-
-// func uname(utsname *new_utsname) int
-TEXT ·uname(SB),NOSPLIT,$0-8
-	MOVL    $SYS_uname, AX
-	MOVL    utsname+0(FP), BX
-	INVOKE_SYSCALL
-	MOVL	AX, ret+4(FP)
-	RET
-
-// func mlock(addr, len uintptr) int
-TEXT ·mlock(SB),NOSPLIT,$0-12
-	MOVL    $SYS_mlock, AX
-	MOVL    addr+0(FP), BX
-	MOVL    len+4(FP), CX
-	INVOKE_SYSCALL
-	MOVL	AX, ret+8(FP)
-	RET
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index fe9c6bce85..b60057ce83 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -33,10 +33,8 @@
 #define SYS_clone		56
 #define SYS_exit		60
 #define SYS_kill		62
-#define SYS_uname		63
 #define SYS_fcntl		72
 #define SYS_sigaltstack 	131
-#define SYS_mlock		149
 #define SYS_arch_prctl		158
 #define SYS_gettid		186
 #define SYS_futex		202
@@ -789,20 +787,3 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8
 	SYSCALL
 	MOVQ	AX, ret+0(FP)
 	RET
-
-// func uname(utsname *new_utsname) int
-TEXT ·uname(SB),NOSPLIT,$0-16
-	MOVQ    utsname+0(FP), DI
-	MOVL    $SYS_uname, AX
-	SYSCALL
-	MOVQ	AX, ret+8(FP)
-	RET
-
-// func mlock(addr, len uintptr) int
-TEXT ·mlock(SB),NOSPLIT,$0-24
-	MOVQ    addr+0(FP), DI
-	MOVQ    len+8(FP), SI
-	MOVL    $SYS_mlock, AX
-	SYSCALL
-	MOVQ	AX, ret+16(FP)
-	RET
-- 
cgit v1.3


From c0dded04f7ded5048b44200078a1f723f5e1bcc1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 14 Jul 2020 01:41:03 -0600
Subject: runtime: do not explicitly exit on ctrl handler

The default ctrl+c handler should process exits in situations where it
makes sense, like console apps, but not in situations where it doesn't,
like libraries or services. Therefore, we should remove the exit(2) so
that the default handler is used for this. This also uses the more
proper windows exit code of STATUS_CONTROL_C_EXIT, with the base case
handler installed by KernelBase.dll. In particular, this helps in the
case of services, which previously would terminate when receiving
shutdown signals, instead of passing them onward to the service program.
In this CL, contrary to CL 244959, we do not need to special case
services with expensive detection algorithms, or rely on hard-coded
library/archive flags.

Fixes #40167.
Fixes #40074.

Change-Id: I9bf6ed6f65cefeff754d270aa33fa4df8d0b451f
Reviewed-on: https://go-review.googlesource.com/c/go/+/243597
Run-TryBot: Jason A. Donenfeld <Jason@zx2c4.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 src/runtime/os_windows.go | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index a584ada702..a62e941229 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -1010,11 +1010,6 @@ func ctrlhandler1(_type uint32) uint32 {
 	if sigsend(s) {
 		return 1
 	}
-	if !islibrary && !isarchive {
-		// Only exit the program if we don't have a DLL.
-		// See https://golang.org/issues/35965.
-		exit(2) // SIGINT, SIGTERM, etc
-	}
 	return 0
 }
 
-- 
cgit v1.3


From cf9b4f63a57b4360be700831781885fc6cf5a0b1 Mon Sep 17 00:00:00 2001
From: Joel Sing <joel@sing.id.au>
Date: Tue, 5 May 2020 03:15:58 +1000
Subject: runtime: use riscv64 RDTIME instruction

Use the actual RDTIME instruction, rather than a WORD.
Generated code is the same.

Change-Id: I6f6f5a1836eae2d05af34d4a22db2ede4fdcb458
Reviewed-on: https://go-review.googlesource.com/c/go/+/231997
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/asm_riscv64.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
index d7c45a183d..8f6c8773eb 100644
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -79,7 +79,7 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0-0
 
 // func cputicks() int64
 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
-	WORD	$0xc0102573	// rdtime a0
+	RDTIME	A0
 	MOV	A0, ret+0(FP)
 	RET
 
-- 
cgit v1.3


From c6a11f0dd279f374602794af60c7cde4585a1e6f Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 11 Aug 2020 13:04:48 -0700
Subject: crypto,internal/bytealg: fix assembly that clobbers BP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BP should be callee-save. It will be saved automatically if
there is a nonzero frame size. Otherwise, we need to avoid this register.

Change-Id: If3f551efa42d830c8793d9f0183cb8daad7a2ab5
Reviewed-on: https://go-review.googlesource.com/c/go/+/248260
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/crypto/elliptic/p256_asm_amd64.s |  5 ++--
 src/crypto/md5/md5block_amd64.s      |  2 +-
 src/internal/bytealg/index_amd64.s   | 52 ++++++++++++++++++------------------
 src/runtime/sys_linux_amd64.s        |  8 +++---
 4 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'src/runtime')

diff --git a/src/crypto/elliptic/p256_asm_amd64.s b/src/crypto/elliptic/p256_asm_amd64.s
index 7afa54a58c..c77b11bcf2 100644
--- a/src/crypto/elliptic/p256_asm_amd64.s
+++ b/src/crypto/elliptic/p256_asm_amd64.s
@@ -1336,7 +1336,7 @@ TEXT p256SubInternal(SB),NOSPLIT,$0
 
 	RET
 /* ---------------------------------------*/
-TEXT p256MulInternal(SB),NOSPLIT,$0
+TEXT p256MulInternal(SB),NOSPLIT,$8
 	MOVQ acc4, mul0
 	MULQ t0
 	MOVQ mul0, acc0
@@ -1519,7 +1519,7 @@ TEXT p256MulInternal(SB),NOSPLIT,$0
 
 	RET
 /* ---------------------------------------*/
-TEXT p256SqrInternal(SB),NOSPLIT,$0
+TEXT p256SqrInternal(SB),NOSPLIT,$8
 
 	MOVQ acc4, mul0
 	MULQ acc5
@@ -2345,4 +2345,3 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48
 
 	RET
 /* ---------------------------------------*/
-
diff --git a/src/crypto/md5/md5block_amd64.s b/src/crypto/md5/md5block_amd64.s
index 90d932b146..7c7d92d7e8 100644
--- a/src/crypto/md5/md5block_amd64.s
+++ b/src/crypto/md5/md5block_amd64.s
@@ -13,7 +13,7 @@
 // Licence: I hereby disclaim the copyright on this code and place it
 // in the public domain.
 
-TEXT	·block(SB),NOSPLIT,$0-32
+TEXT	·block(SB),NOSPLIT,$8-32
 	MOVQ	dig+0(FP),	BP
 	MOVQ	p+8(FP),	SI
 	MOVQ	p_len+16(FP), DX
diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s
index 4459820801..6193b57239 100644
--- a/src/internal/bytealg/index_amd64.s
+++ b/src/internal/bytealg/index_amd64.s
@@ -8,7 +8,7 @@
 TEXT ·Index(SB),NOSPLIT,$0-56
 	MOVQ a_base+0(FP), DI
 	MOVQ a_len+8(FP), DX
-	MOVQ b_base+24(FP), BP
+	MOVQ b_base+24(FP), R8
 	MOVQ b_len+32(FP), AX
 	MOVQ DI, R10
 	LEAQ ret+48(FP), R11
@@ -17,7 +17,7 @@ TEXT ·Index(SB),NOSPLIT,$0-56
 TEXT ·IndexString(SB),NOSPLIT,$0-40
 	MOVQ a_base+0(FP), DI
 	MOVQ a_len+8(FP), DX
-	MOVQ b_base+16(FP), BP
+	MOVQ b_base+16(FP), R8
 	MOVQ b_len+24(FP), AX
 	MOVQ DI, R10
 	LEAQ ret+32(FP), R11
@@ -26,7 +26,7 @@ TEXT ·IndexString(SB),NOSPLIT,$0-40
 // AX: length of string, that we are searching for
 // DX: length of string, in which we are searching
 // DI: pointer to string, in which we are searching
-// BP: pointer to string, that we are searching for
+// R8: pointer to string, that we are searching for
 // R11: address, where to put return value
 // Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
 TEXT indexbody<>(SB),NOSPLIT,$0
@@ -37,11 +37,11 @@ TEXT indexbody<>(SB),NOSPLIT,$0
 no_sse42:
 	CMPQ AX, $2
 	JA   _3_or_more
-	MOVW (BP), BP
+	MOVW (R8), R8
 	LEAQ -1(DI)(DX*1), DX
 loop2:
 	MOVW (DI), SI
-	CMPW SI,BP
+	CMPW SI,R8
 	JZ success
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -50,12 +50,12 @@ loop2:
 _3_or_more:
 	CMPQ AX, $3
 	JA   _4_or_more
-	MOVW 1(BP), BX
-	MOVW (BP), BP
+	MOVW 1(R8), BX
+	MOVW (R8), R8
 	LEAQ -2(DI)(DX*1), DX
 loop3:
 	MOVW (DI), SI
-	CMPW SI,BP
+	CMPW SI,R8
 	JZ   partial_success3
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -72,11 +72,11 @@ partial_success3:
 _4_or_more:
 	CMPQ AX, $4
 	JA   _5_or_more
-	MOVL (BP), BP
+	MOVL (R8), R8
 	LEAQ -3(DI)(DX*1), DX
 loop4:
 	MOVL (DI), SI
-	CMPL SI,BP
+	CMPL SI,R8
 	JZ   success
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -87,11 +87,11 @@ _5_or_more:
 	JA   _8_or_more
 	LEAQ 1(DI)(DX*1), DX
 	SUBQ AX, DX
-	MOVL -4(BP)(AX*1), BX
-	MOVL (BP), BP
+	MOVL -4(R8)(AX*1), BX
+	MOVL (R8), R8
 loop5to7:
 	MOVL (DI), SI
-	CMPL SI,BP
+	CMPL SI,R8
 	JZ   partial_success5to7
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -108,11 +108,11 @@ partial_success5to7:
 _8_or_more:
 	CMPQ AX, $8
 	JA   _9_or_more
-	MOVQ (BP), BP
+	MOVQ (R8), R8
 	LEAQ -7(DI)(DX*1), DX
 loop8:
 	MOVQ (DI), SI
-	CMPQ SI,BP
+	CMPQ SI,R8
 	JZ   success
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -123,11 +123,11 @@ _9_or_more:
 	JA   _16_or_more
 	LEAQ 1(DI)(DX*1), DX
 	SUBQ AX, DX
-	MOVQ -8(BP)(AX*1), BX
-	MOVQ (BP), BP
+	MOVQ -8(R8)(AX*1), BX
+	MOVQ (R8), R8
 loop9to15:
 	MOVQ (DI), SI
-	CMPQ SI,BP
+	CMPQ SI,R8
 	JZ   partial_success9to15
 	ADDQ $1,DI
 	CMPQ DI,DX
@@ -144,7 +144,7 @@ partial_success9to15:
 _16_or_more:
 	CMPQ AX, $16
 	JA   _17_or_more
-	MOVOU (BP), X1
+	MOVOU (R8), X1
 	LEAQ -15(DI)(DX*1), DX
 loop16:
 	MOVOU (DI), X2
@@ -161,8 +161,8 @@ _17_or_more:
 	JA   _32_or_more
 	LEAQ 1(DI)(DX*1), DX
 	SUBQ AX, DX
-	MOVOU -16(BP)(AX*1), X0
-	MOVOU (BP), X1
+	MOVOU -16(R8)(AX*1), X0
+	MOVOU (R8), X1
 loop17to31:
 	MOVOU (DI), X2
 	PCMPEQB X1,X2
@@ -188,7 +188,7 @@ partial_success17to31:
 _32_or_more:
 	CMPQ AX, $32
 	JA   _33_to_63
-	VMOVDQU (BP), Y1
+	VMOVDQU (R8), Y1
 	LEAQ -31(DI)(DX*1), DX
 loop32:
 	VMOVDQU (DI), Y2
@@ -203,8 +203,8 @@ loop32:
 _33_to_63:
 	LEAQ 1(DI)(DX*1), DX
 	SUBQ AX, DX
-	VMOVDQU -32(BP)(AX*1), Y0
-	VMOVDQU (BP), Y1
+	VMOVDQU -32(R8)(AX*1), Y0
+	VMOVDQU (R8), Y1
 loop33to63:
 	VMOVDQU (DI), Y2
 	VPCMPEQB Y1, Y2, Y3
@@ -241,10 +241,10 @@ sse42:
 	// This value was determined experimentally and is the ~same
 	// on Nehalem (first with SSE42) and Haswell.
 	JAE _9_or_more
-	LEAQ 16(BP), SI
+	LEAQ 16(R8), SI
 	TESTW $0xff0, SI
 	JEQ no_sse42
-	MOVOU (BP), X1
+	MOVOU (R8), X1
 	LEAQ -15(DI)(DX*1), SI
 	MOVQ $16, R9
 	SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index b60057ce83..621c01b365 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -212,7 +212,7 @@ TEXT runtime·walltime1(SB),NOSPLIT,$16-12
 	// due to stack probes inserted to avoid stack/heap collisions.
 	// See issue #20427.
 
-	MOVQ	SP, BP	// Save old SP; BP unchanged by C code.
+	MOVQ	SP, R12	// Save old SP; R12 unchanged by C code.
 
 	get_tls(CX)
 	MOVQ	g(CX), AX
@@ -250,7 +250,7 @@ noswitch:
 	MOVQ	0(SP), AX	// sec
 	MOVQ	8(SP), DX	// nsec
 ret:
-	MOVQ	BP, SP		// Restore real SP
+	MOVQ	R12, SP		// Restore real SP
 	// Restore vdsoPC, vdsoSP
 	// We don't worry about being signaled between the two stores.
 	// If we are not in a signal handler, we'll restore vdsoSP to 0,
@@ -277,7 +277,7 @@ fallback:
 TEXT runtime·nanotime1(SB),NOSPLIT,$16-8
 	// Switch to g0 stack. See comment above in runtime·walltime.
 
-	MOVQ	SP, BP	// Save old SP; BP unchanged by C code.
+	MOVQ	SP, R12	// Save old SP; R12 unchanged by C code.
 
 	get_tls(CX)
 	MOVQ	g(CX), AX
@@ -315,7 +315,7 @@ noswitch:
 	MOVQ	0(SP), AX	// sec
 	MOVQ	8(SP), DX	// nsec
 ret:
-	MOVQ	BP, SP		// Restore real SP
+	MOVQ	R12, SP		// Restore real SP
 	// Restore vdsoPC, vdsoSP
 	// We don't worry about being signaled between the two stores.
 	// If we are not in a signal handler, we'll restore vdsoSP to 0,
-- 
cgit v1.3


From 6f99b33c18266a8858af96163de97173bdf6f081 Mon Sep 17 00:00:00 2001
From: Polina Osadcha <polliosa@google.com>
Date: Thu, 18 Jun 2020 16:17:13 +0300
Subject: all: replace Replace(..., -1) with ReplaceAll(...)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change-Id: I8f7cff7a83a9c50bfa3331e8b40e4a6c2e1c0eee
Reviewed-on: https://go-review.googlesource.com/c/go/+/245198
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 doc/progs/run.go                       | 2 +-
 src/cmd/cover/cover_test.go            | 2 +-
 src/cmd/go/internal/version/version.go | 2 +-
 src/runtime/mkpreempt.go               | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/runtime')

diff --git a/doc/progs/run.go b/doc/progs/run.go
index baef3f79f9..8ac75cdcff 100644
--- a/doc/progs/run.go
+++ b/doc/progs/run.go
@@ -105,7 +105,7 @@ func test(tmpdir, file, want string) error {
 
 	// Canonicalize output.
 	out = bytes.TrimRight(out, "\n")
-	out = bytes.Replace(out, []byte{'\n'}, []byte{' '}, -1)
+	out = bytes.ReplaceAll(out, []byte{'\n'}, []byte{' '})
 
 	// Check the result.
 	match, err := regexp.Match(want, out)
diff --git a/src/cmd/cover/cover_test.go b/src/cmd/cover/cover_test.go
index 8a56e39011..1c252e6e45 100644
--- a/src/cmd/cover/cover_test.go
+++ b/src/cmd/cover/cover_test.go
@@ -179,7 +179,7 @@ func TestCover(t *testing.T) {
 	}
 	lines := bytes.Split(file, []byte("\n"))
 	for i, line := range lines {
-		lines[i] = bytes.Replace(line, []byte("LINE"), []byte(fmt.Sprint(i+1)), -1)
+		lines[i] = bytes.ReplaceAll(line, []byte("LINE"), []byte(fmt.Sprint(i+1)))
 	}
 
 	// Add a function that is not gofmt'ed. This used to cause a crash.
diff --git a/src/cmd/go/internal/version/version.go b/src/cmd/go/internal/version/version.go
index 056db7bf9e..c2de8d326d 100644
--- a/src/cmd/go/internal/version/version.go
+++ b/src/cmd/go/internal/version/version.go
@@ -138,7 +138,7 @@ func scanFile(file string, info os.FileInfo, mustPrint bool) {
 
 	fmt.Printf("%s: %s\n", file, vers)
 	if *versionM && mod != "" {
-		fmt.Printf("\t%s\n", strings.Replace(mod[:len(mod)-1], "\n", "\n\t", -1))
+		fmt.Printf("\t%s\n", strings.ReplaceAll(mod[:len(mod)-1], "\n", "\n\t"))
 	}
 }
 
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
index 1fe77663b9..44dea22ef3 100644
--- a/src/runtime/mkpreempt.go
+++ b/src/runtime/mkpreempt.go
@@ -131,7 +131,7 @@ func header(arch string) {
 
 func p(f string, args ...interface{}) {
 	fmted := fmt.Sprintf(f, args...)
-	fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1))
+	fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
 }
 
 func label(l string) {
-- 
cgit v1.3


From f979d072d339a24e4938d46588c153587d61af19 Mon Sep 17 00:00:00 2001
From: Martin Möhrmann <moehrmann@google.com>
Date: Sun, 3 May 2020 16:26:05 +0200
Subject: runtime: avoid memclr call for keys in mapdelete_fast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace memclrHasPointers calls for keys in mapdelete_fast*
functions with direct writes since the key sizes are known
at compile time.

name                     old time/op  new time/op  delta
MapDelete/Pointer/100    33.7ns ± 1%  23.7ns ± 2%  -29.68%  (p=0.000 n=7+9)
MapDelete/Pointer/1000   41.6ns ± 5%  34.9ns ± 4%  -16.01%  (p=0.000 n=9+10)
MapDelete/Pointer/10000  45.6ns ± 1%  38.2ns ± 2%  -16.34%  (p=0.000 n=8+10)

Change-Id: Icaac43b520b93c2cf9fd192b822fae7203a7bbf7
Reviewed-on: https://go-review.googlesource.com/c/go/+/231737
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/map_fast32.go |  8 ++++++--
 src/runtime/map_fast64.go |  8 +++++++-
 src/runtime/map_test.go   | 22 ++++++++++++++++++++++
 3 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/map_fast32.go b/src/runtime/map_fast32.go
index 534454f3ad..d035ed0386 100644
--- a/src/runtime/map_fast32.go
+++ b/src/runtime/map_fast32.go
@@ -299,8 +299,12 @@ search:
 				continue
 			}
 			// Only clear key if there are pointers in it.
-			if t.key.ptrdata != 0 {
-				memclrHasPointers(k, t.key.size)
+			// This can only happen if pointers are 32 bit
+			// wide as 64 bit pointers do not fit into a 32 bit key.
+			if sys.PtrSize == 4 && t.key.ptrdata != 0 {
+				// The key must be a pointer as we checked pointers are
+				// 32 bits wide and the key is 32 bits wide also.
+				*(*unsafe.Pointer)(k) = nil
 			}
 			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.elemsize))
 			if t.elem.ptrdata != 0 {
diff --git a/src/runtime/map_fast64.go b/src/runtime/map_fast64.go
index 1669c7cfe9..f1f3927598 100644
--- a/src/runtime/map_fast64.go
+++ b/src/runtime/map_fast64.go
@@ -300,7 +300,13 @@ search:
 			}
 			// Only clear key if there are pointers in it.
 			if t.key.ptrdata != 0 {
-				memclrHasPointers(k, t.key.size)
+				if sys.PtrSize == 8 {
+					*(*unsafe.Pointer)(k) = nil
+				} else {
+					// There are three ways to squeeze at one ore more 32 bit pointers into 64 bits.
+					// Just call memclrHasPointers instead of trying to handle all cases here.
+					memclrHasPointers(k, 8)
+				}
 			}
 			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.elemsize))
 			if t.elem.ptrdata != 0 {
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index 1b7ccad6ed..302b3c23c1 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -993,6 +993,27 @@ func benchmarkMapDeleteStr(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapDeletePointer(b *testing.B, n int) {
+	i2p := make([]*int, n)
+	for i := 0; i < n; i++ {
+		i2p[i] = new(int)
+	}
+	a := make(map[*int]int, n)
+	b.ResetTimer()
+	k := 0
+	for i := 0; i < b.N; i++ {
+		if len(a) == 0 {
+			b.StopTimer()
+			for j := 0; j < n; j++ {
+				a[i2p[j]] = j
+			}
+			k = i
+			b.StartTimer()
+		}
+		delete(a, i2p[i-k])
+	}
+}
+
 func runWith(f func(*testing.B, int), v ...int) func(*testing.B) {
 	return func(b *testing.B) {
 		for _, n := range v {
@@ -1023,6 +1044,7 @@ func BenchmarkMapDelete(b *testing.B) {
 	b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
 	b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
 	b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
+	b.Run("Pointer", runWith(benchmarkMapDeletePointer, 100, 1000, 10000))
 }
 
 func TestDeferDeleteSlow(t *testing.T) {
-- 
cgit v1.3


From 681559e1f10f83a053b4ebab101de3d77ede8353 Mon Sep 17 00:00:00 2001
From: "zero.xu" <zero.xu@bytedance.com>
Date: Mon, 17 Aug 2020 07:06:32 +0000
Subject: runtime: update comment: modTimer is called by Timer.Reset

Change-Id: I97d0d1343d41b603a68388e496411fb040dc6d66
GitHub-Last-Rev: d11177ad249bd844dd9e7e355eea28596d0b1fa8
GitHub-Pull-Request: golang/go#38625
Reviewed-on: https://go-review.googlesource.com/c/go/+/229767
Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
---
 src/runtime/time.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/time.go b/src/runtime/time.go
index fdb5066b24..f895bf8443 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -403,7 +403,7 @@ func dodeltimer0(pp *p) {
 }
 
 // modtimer modifies an existing timer.
-// This is called by the netpoll code or time.Ticker.Reset.
+// This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset.
 // Reports whether the timer was modified before it was run.
 func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool {
 	if when < 0 {
-- 
cgit v1.3


From d79350bac73670c04a91b6761d334b810201f6ee Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 4 May 2020 18:36:31 +0200
Subject: runtime: use hw.ncpuonline sysctl in getncpu on netbsd

Since NetBSD 7, hw.ncpuonline reports the number of CPUs online, while
hw.cpu reports the number of CPUs configured. Try hw.cpuonline first and
fall back to hw.ncpu in case it fails (which is the case on NetBSD
before 7.0).

This follows the behavior on OpenBSD (see CL 161757). Also, Go
in pkgsrc is patched to use hw.cpuonline, so this CL would allow said
patch to be dropped.

Updates #30824

Change-Id: Id1c19dff2c1e4401e6074179fae7c708ba0e3098
Reviewed-on: https://go-review.googlesource.com/c/go/+/231957
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Benny Siegert <bsiegert@gmail.com>
---
 src/runtime/os_netbsd.go | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index 97106c7b9d..f7f90cedc1 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -95,18 +95,28 @@ var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)
 
 // From NetBSD's <sys/sysctl.h>
 const (
-	_CTL_HW      = 6
-	_HW_NCPU     = 3
-	_HW_PAGESIZE = 7
+	_CTL_HW        = 6
+	_HW_NCPU       = 3
+	_HW_PAGESIZE   = 7
+	_HW_NCPUONLINE = 16
 )
 
-func getncpu() int32 {
-	mib := [2]uint32{_CTL_HW, _HW_NCPU}
-	out := uint32(0)
+func sysctlInt(mib []uint32) (int32, bool) {
+	var out int32
 	nout := unsafe.Sizeof(out)
-	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
-	if ret >= 0 {
-		return int32(out)
+	ret := sysctl(&mib[0], uint32(len(mib)), (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret < 0 {
+		return 0, false
+	}
+	return out, true
+}
+
+func getncpu() int32 {
+	if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPUONLINE}); ok {
+		return int32(n)
+	}
+	if n, ok := sysctlInt([]uint32{_CTL_HW, _HW_NCPU}); ok {
+		return int32(n)
 	}
 	return 1
 }
-- 
cgit v1.3


From dc12d5b0f5e9c1cfec2a8eb6dd7ff3473c36d45c Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 17 Aug 2020 11:28:26 +0200
Subject: all: add empty line between copyright header and package clause

Makes sure the copyright notice is not interpreted as the package level
godoc.

Change-Id: I2afce7c9d620f19d51ec1438b1d0db1774b57146
Reviewed-on: https://go-review.googlesource.com/c/go/+/248760
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Dave Cheney <dave@cheney.net>
---
 src/cmd/compile/internal/ssa/debug.go         | 1 +
 src/cmd/compile/internal/ssa/passbm_test.go   | 1 +
 src/cmd/go/internal/trace/trace.go            | 1 +
 src/cmd/link/internal/benchmark/bench_test.go | 1 +
 src/cmd/link/internal/ld/errors.go            | 1 +
 src/runtime/closure_test.go                   | 1 +
 src/runtime/map_benchmark_test.go             | 1 +
 src/runtime/slice_test.go                     | 1 +
 src/sync/cond_test.go                         | 1 +
 test/fixedbugs/issue15281.go                  | 1 +
 10 files changed, 10 insertions(+)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/ssa/debug.go b/src/cmd/compile/internal/ssa/debug.go
index 13fe67cbca..6353f72897 100644
--- a/src/cmd/compile/internal/ssa/debug.go
+++ b/src/cmd/compile/internal/ssa/debug.go
@@ -1,6 +1,7 @@
 // Copyright 2017 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package ssa
 
 import (
diff --git a/src/cmd/compile/internal/ssa/passbm_test.go b/src/cmd/compile/internal/ssa/passbm_test.go
index eefdbb8722..3fd3eb579b 100644
--- a/src/cmd/compile/internal/ssa/passbm_test.go
+++ b/src/cmd/compile/internal/ssa/passbm_test.go
@@ -1,6 +1,7 @@
 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package ssa
 
 import (
diff --git a/src/cmd/go/internal/trace/trace.go b/src/cmd/go/internal/trace/trace.go
index 7cb7636a34..c8fac92c9f 100644
--- a/src/cmd/go/internal/trace/trace.go
+++ b/src/cmd/go/internal/trace/trace.go
@@ -1,6 +1,7 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package trace
 
 import (
diff --git a/src/cmd/link/internal/benchmark/bench_test.go b/src/cmd/link/internal/benchmark/bench_test.go
index d8ec717c7c..419dc55724 100644
--- a/src/cmd/link/internal/benchmark/bench_test.go
+++ b/src/cmd/link/internal/benchmark/bench_test.go
@@ -1,6 +1,7 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package benchmark
 
 import (
diff --git a/src/cmd/link/internal/ld/errors.go b/src/cmd/link/internal/ld/errors.go
index c5ce097fde..d6e8ff236d 100644
--- a/src/cmd/link/internal/ld/errors.go
+++ b/src/cmd/link/internal/ld/errors.go
@@ -1,6 +1,7 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package ld
 
 import (
diff --git a/src/runtime/closure_test.go b/src/runtime/closure_test.go
index ea65fbd5f5..741c932eab 100644
--- a/src/runtime/closure_test.go
+++ b/src/runtime/closure_test.go
@@ -1,6 +1,7 @@
 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package runtime_test
 
 import "testing"
diff --git a/src/runtime/map_benchmark_test.go b/src/runtime/map_benchmark_test.go
index 893cb6c5b6..d0becc9ddb 100644
--- a/src/runtime/map_benchmark_test.go
+++ b/src/runtime/map_benchmark_test.go
@@ -1,6 +1,7 @@
 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package runtime_test
 
 import (
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index e963a43dd3..cd2bc26d1e 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -1,6 +1,7 @@
 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package runtime_test
 
 import (
diff --git a/src/sync/cond_test.go b/src/sync/cond_test.go
index 9d0d9adc74..859cae59bc 100644
--- a/src/sync/cond_test.go
+++ b/src/sync/cond_test.go
@@ -1,6 +1,7 @@
 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package sync_test
 
 import (
diff --git a/test/fixedbugs/issue15281.go b/test/fixedbugs/issue15281.go
index 187c96f218..390867c848 100644
--- a/test/fixedbugs/issue15281.go
+++ b/test/fixedbugs/issue15281.go
@@ -3,6 +3,7 @@
 // Copyright 2016 The Go Authors.  All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package main
 
 import "runtime"
-- 
cgit v1.3


From 7bbd5ca5a6a94f58d33de6b1244248a32dc8cd9c Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 22 Jul 2020 11:21:36 -0400
Subject: runtime: replace index and contains with bytealg calls

The runtime has its own implementation of string indexing. To reduce
code duplication and cognitive load, replace this with calls to the
internal/bytealg package. We can't do this on Plan 9 because it needs
string indexing in a note handler (which isn't allowed to use the
optimized bytealg version because it uses SSE), so we can't just
eliminate the index function, but this CL does down-scope it so make
it clear it's only for note handlers on Plan 9.

Change-Id: Ie1a142678262048515c481e8c26313b80c5875df
Reviewed-on: https://go-review.googlesource.com/c/go/+/244537
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/os_plan9.go  | 18 ++++++++++++++++--
 src/runtime/proc.go      |  3 ++-
 src/runtime/runtime1.go  |  5 +++--
 src/runtime/string.go    | 16 ----------------
 src/runtime/traceback.go |  3 ++-
 5 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
index 9e187d2220..128c30adeb 100644
--- a/src/runtime/os_plan9.go
+++ b/src/runtime/os_plan9.go
@@ -82,10 +82,10 @@ func sigpanic() {
 	note := gostringnocopy((*byte)(unsafe.Pointer(g.m.notesig)))
 	switch g.sig {
 	case _SIGRFAULT, _SIGWFAULT:
-		i := index(note, "addr=")
+		i := indexNoFloat(note, "addr=")
 		if i >= 0 {
 			i += 5
-		} else if i = index(note, "va="); i >= 0 {
+		} else if i = indexNoFloat(note, "va="); i >= 0 {
 			i += 3
 		} else {
 			panicmem()
@@ -111,6 +111,20 @@ func sigpanic() {
 	}
 }
 
+// indexNoFloat is bytealg.IndexString but safe to use in a note
+// handler.
+func indexNoFloat(s, t string) int {
+	if len(t) == 0 {
+		return 0
+	}
+	for i := 0; i < len(s); i++ {
+		if s[i] == t[0] && hasPrefix(s[i:], t) {
+			return i
+		}
+	}
+	return -1
+}
+
 func atolwhex(p string) int64 {
 	for hasPrefix(p, " ") || hasPrefix(p, "\t") {
 		p = p[1:]
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 035822216d..ed7e2128ae 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/bytealg"
 	"internal/cpu"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
@@ -5460,7 +5461,7 @@ func haveexperiment(name string) bool {
 	x := sys.Goexperiment
 	for x != "" {
 		xname := ""
-		i := index(x, ",")
+		i := bytealg.IndexByteString(x, ',')
 		if i < 0 {
 			xname, x = x, ""
 		} else {
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index c65a534ef6..7c893aa25c 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/bytealg"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -347,13 +348,13 @@ func parsedebugvars() {
 
 	for p := gogetenv("GODEBUG"); p != ""; {
 		field := ""
-		i := index(p, ",")
+		i := bytealg.IndexByteString(p, ',')
 		if i < 0 {
 			field, p = p, ""
 		} else {
 			field, p = p[:i], p[i+1:]
 		}
-		i = index(field, "=")
+		i = bytealg.IndexByteString(field, '=')
 		if i < 0 {
 			continue
 		}
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 251044231e..9a601f0094 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -335,22 +335,6 @@ func gostringn(p *byte, l int) string {
 	return s
 }
 
-func index(s, t string) int {
-	if len(t) == 0 {
-		return 0
-	}
-	for i := 0; i < len(s); i++ {
-		if s[i] == t[0] && hasPrefix(s[i:], t) {
-			return i
-		}
-	}
-	return -1
-}
-
-func contains(s, t string) bool {
-	return index(s, t) >= 0
-}
-
 func hasPrefix(s, prefix string) bool {
 	return len(s) >= len(prefix) && s[:len(prefix)] == prefix
 }
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 944c8473d2..96e552524e 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/bytealg"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -848,7 +849,7 @@ func showfuncinfo(f funcInfo, firstFrame bool, funcID, childID funcID) bool {
 		return true
 	}
 
-	return contains(name, ".") && (!hasPrefix(name, "runtime.") || isExportedRuntime(name))
+	return bytealg.IndexByteString(name, '.') >= 0 && (!hasPrefix(name, "runtime.") || isExportedRuntime(name))
 }
 
 // isExportedRuntime reports whether name is an exported runtime function.
-- 
cgit v1.3


From 7148abc1b900555199998aac25af11783a9eb41c Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 5 Jun 2020 16:44:29 -0400
Subject: runtime: simplify heapBitsSetType doubleCheck

The heapBitsSetType function has a slow doubleCheck debugging mode
that checks the bitmap written out by the rest of the function using
far more obvious logic. But even this has some surprisingly complex
logic in it. Simplify it a bit. This also happens to fix the logic on
32-bit.

Fixes #40335.

Change-Id: I5cee482ad8adbd01cf5b98e35a270fe941ba4940
Reviewed-on: https://go-review.googlesource.com/c/go/+/244538
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mbitmap.go | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 35332c91c4..cad6f56404 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -1403,17 +1403,20 @@ Phase4:
 	// Double check the whole bitmap.
 	if doubleCheck {
 		// x+size may not point to the heap, so back up one
-		// word and then call next().
-		end := heapBitsForAddr(x + size - sys.PtrSize).next()
-		endAI := arenaIdx(end.arena)
-		if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) {
-			// The unrolling code above walks hbitp just
-			// past the bitmap without moving to the next
-			// arena. Synthesize this for end.bitp.
-			end.arena--
-			endAI = arenaIdx(end.arena)
-			end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes)
-			end.last = nil
+		// word and then advance it the way we do above.
+		end := heapBitsForAddr(x + size - sys.PtrSize)
+		if outOfPlace {
+			// In out-of-place copying, we just advance
+			// using next.
+			end = end.next()
+		} else {
+			// Don't use next because that may advance to
+			// the next arena and the in-place logic
+			// doesn't do that.
+			end.shift += heapBitsShift
+			if end.shift == 4*heapBitsShift {
+				end.bitp, end.shift = add1(end.bitp), 0
+			}
 		}
 		if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
 			println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
@@ -1437,8 +1440,9 @@ Phase4:
 			var have, want uint8
 			have = (*h.bitp >> h.shift) & (bitPointer | bitScan)
 			if i >= totalptr {
-				want = 0 // deadmarker
 				if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
+					// heapBitsSetTypeGCProg always fills
+					// in full nibbles of bitScan.
 					want = bitScan
 				}
 			} else {
-- 
cgit v1.3


From d19fedd180fceb6a60961e19387893ddb047e4e6 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 5 Jun 2020 16:48:03 -0400
Subject: runtime: move checkmarks to a separate bitmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the GC stores the object marks for checkmarks mode in the
heap bitmap using a rather complex encoding: for one word objects, the
checkmark is stored in the pointer/scalar bit since one word objects
must be pointers; for larger objects, the checkmark is stored in what
would be the scan/dead bit for the second word of the object. This
encoding made more sense when the runtime used the first scan/dead bit
as the regular mark bit, but we moved away from that long ago.

This encoding and overloading of the heap bitmap bits causes a great
deal of complexity in many parts of the allocator and garbage
collector and leads to some subtle bugs like #15903.

This CL moves the checkmarks mark bits into their own per-arena bitmap
and reclaims the second scan/dead bit as a regular scan/dead bit.

I tested this by enabling doubleCheck mode in heapBitsSetType and
running in both regular and GODEBUG=gccheckmark=1 mode.

Fixes #15903.

No performance degradation. (Very slight improvement on a few
benchmarks, but it's probably just noise.)

name                                old time/op            new time/op            delta
BiogoIgor                                      16.6s ± 1%             16.4s ± 1%  -0.94%  (p=0.000 n=25+24)
BiogoKrishna                                   19.2s ± 3%             19.2s ± 3%    ~     (p=0.638 n=23+25)
BleveIndexBatch100                             6.12s ± 5%             6.17s ± 4%    ~     (p=0.170 n=25+25)
CompileTemplate                                206ms ± 1%             205ms ± 1%  -0.43%  (p=0.005 n=24+24)
CompileUnicode                                82.2ms ± 2%            81.5ms ± 2%  -0.95%  (p=0.001 n=22+22)
CompileGoTypes                                 755ms ± 3%             754ms ± 4%    ~     (p=0.715 n=25+25)
CompileCompiler                                3.73s ± 1%             3.73s ± 1%    ~     (p=0.445 n=25+24)
CompileSSA                                     8.67s ± 1%             8.66s ± 1%    ~     (p=0.836 n=24+22)
CompileFlate                                   134ms ± 2%             133ms ± 1%  -0.66%  (p=0.001 n=24+23)
CompileGoParser                                164ms ± 1%             163ms ± 1%  -0.85%  (p=0.000 n=24+24)
CompileReflect                                 466ms ± 5%             466ms ± 3%    ~     (p=0.863 n=25+25)
CompileTar                                     182ms ± 1%             182ms ± 1%  -0.31%  (p=0.048 n=24+24)
CompileXML                                     249ms ± 1%             248ms ± 1%  -0.32%  (p=0.031 n=21+25)
CompileStdCmd                                  10.3s ± 1%             10.3s ± 1%    ~     (p=0.459 n=23+23)
FoglemanFauxGLRenderRotateBoat                 8.66s ± 1%             8.62s ± 1%  -0.47%  (p=0.000 n=23+24)
FoglemanPathTraceRenderGopherIter1             20.3s ± 3%             20.2s ± 2%    ~     (p=0.893 n=25+25)
GopherLuaKNucleotide                           29.7s ± 1%             29.8s ± 2%    ~     (p=0.421 n=24+25)
MarkdownRenderXHTML                            246ms ± 1%             247ms ± 1%    ~     (p=0.558 n=25+24)
Tile38WithinCircle100kmRequest                 779µs ± 4%             779µs ± 3%    ~     (p=0.954 n=25+25)
Tile38IntersectsCircle100kmRequest            1.02ms ± 3%            1.01ms ± 4%    ~     (p=0.658 n=25+25)
Tile38KNearestLimit100Request                  984µs ± 4%             986µs ± 4%    ~     (p=0.627 n=24+25)
[Geo mean]                                     552ms                  551ms       -0.19%

https://perf.golang.org/search?q=upload:20200723.6

Change-Id: Ic703f26a83fb034941dc6f4788fc997d56890dec
Reviewed-on: https://go-review.googlesource.com/c/go/+/244539
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
---
 src/reflect/all_test.go    |   5 +-
 src/runtime/cgocall.go     |   2 +-
 src/runtime/gcinfo_test.go |  19 +----
 src/runtime/heapdump.go    |   2 +-
 src/runtime/mbitmap.go     | 173 +++++++++------------------------------------
 src/runtime/mcheckmark.go  | 100 ++++++++++++++++++++++++++
 src/runtime/mgc.go         |   4 +-
 src/runtime/mgcmark.go     |  70 +-----------------
 src/runtime/mheap.go       |   4 ++
 9 files changed, 148 insertions(+), 231 deletions(-)
 create mode 100644 src/runtime/mcheckmark.go

(limited to 'src/runtime')

diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go
index 6b31568bb9..ed2f225077 100644
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -6467,12 +6467,9 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) {
 	// Repeat the bitmap for the slice size, trimming scalars in
 	// the last element.
 	bits = rep(cap, bits)
-	for len(bits) > 2 && bits[len(bits)-1] == 0 {
+	for len(bits) > 0 && bits[len(bits)-1] == 0 {
 		bits = bits[:len(bits)-1]
 	}
-	if len(bits) == 2 && bits[0] == 0 && bits[1] == 0 {
-		bits = bits[:0]
-	}
 	if !bytes.Equal(heapBits, bits) {
 		t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits)
 	}
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index a4e64b00cc..099aa540e0 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -605,7 +605,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
 		hbits := heapBitsForAddr(base)
 		n := span.elemsize
 		for i = uintptr(0); i < n; i += sys.PtrSize {
-			if i != 1*sys.PtrSize && !hbits.morePointers() {
+			if !hbits.morePointers() {
 				// No more possible pointers.
 				break
 			}
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index ec1ba90c2e..0808b416f0 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -77,7 +77,7 @@ func TestGCInfo(t *testing.T) {
 	}
 
 	for i := 0; i < 10; i++ {
-		verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr)))
+		verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(infoPtr))
 		verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10))
 		verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr))
 		verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4))
@@ -97,25 +97,10 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
 	}
 }
 
-func padDead(mask []byte) []byte {
-	// Because the dead bit isn't encoded in the second word,
-	// and because on 32-bit systems a one-word allocation
-	// uses a two-word block, the pointer info for a one-word
-	// object needs to be expanded to include an extra scalar
-	// on 32-bit systems to match the heap bitmap.
-	if runtime.PtrSize == 4 && len(mask) == 1 {
-		return []byte{mask[0], 0}
-	}
-	return mask
-}
-
 func trimDead(mask []byte) []byte {
-	for len(mask) > 2 && mask[len(mask)-1] == typeScalar {
+	for len(mask) > 0 && mask[len(mask)-1] == typeScalar {
 		mask = mask[:len(mask)-1]
 	}
-	if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar {
-		mask = mask[:0]
-	}
 	return mask
 }
 
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index cfd5c251b4..4c35309211 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -713,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
 	i := uintptr(0)
 	hbits := heapBitsForAddr(p)
 	for ; i < nptr; i++ {
-		if i != 1 && !hbits.morePointers() {
+		if !hbits.morePointers() {
 			break // end of object
 		}
 		if hbits.isPointer() {
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index cad6f56404..8de44c14b9 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -6,10 +6,11 @@
 //
 // Stack, data, and bss bitmaps
 //
-// Stack frames and global variables in the data and bss sections are described
-// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
-// to be visited during GC. The bits in each byte are consumed starting with
-// the low bit: 1<<0, 1<<1, and so on.
+// Stack frames and global variables in the data and bss sections are
+// described by bitmaps with 1 bit per pointer-sized word. A "1" bit
+// means the word is a live pointer to be visited by the GC (referred to
+// as "pointer"). A "0" bit means the word should be ignored by GC
+// (referred to as "scalar", though it could be a dead pointer value).
 //
 // Heap bitmap
 //
@@ -20,18 +21,13 @@
 // through start+3*ptrSize, ha.bitmap[1] holds the entries for
 // start+4*ptrSize through start+7*ptrSize, and so on.
 //
-// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
-// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
-// The meaning of the high bit depends on the position of the word being described
-// in its allocated object. In all words *except* the second word, the
-// high bit indicates that the object is still being described. In
-// these words, if a bit pair with a high bit 0 is encountered, the
-// low bit can also be assumed to be 0, and the object description is
-// over. This 00 is called the ``dead'' encoding: it signals that the
-// rest of the words in the object are uninteresting to the garbage
-// collector.
-//
-// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
+// In each 2-bit entry, the lower bit is a pointer/scalar bit, just
+// like in the stack/data bitmaps described above. The upper bit
+// indicates scan/dead: a "1" value ("scan") indicates that there may
+// be pointers in later words of the allocation, and a "0" value
+// ("dead") indicates there are no more pointers in the allocation. If
+// the upper bit is 0, the lower bit must also be 0, and this
+// indicates scanning can ignore the rest of the allocation.
 //
 // The 2-bit entries are split when written into the byte, so that the top half
 // of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
@@ -39,38 +35,14 @@
 // This form allows a copy from the 1-bit to the 4-bit form to keep the
 // pointer bits contiguous, instead of having to space them out.
 //
-// The code makes use of the fact that the zero value for a heap bitmap
-// has no live pointer bit set and is (depending on position), not used,
-// not checkmarked, and is the dead encoding.
-// These properties must be preserved when modifying the encoding.
+// The code makes use of the fact that the zero value for a heap
+// bitmap means scalar/dead. This property must be preserved when
+// modifying the encoding.
 //
 // The bitmap for noscan spans is not maintained. Code must ensure
 // that an object is scannable before consulting its bitmap by
 // checking either the noscan bit in the span or by consulting its
 // type's information.
-//
-// Checkmarks
-//
-// In a concurrent garbage collector, one worries about failing to mark
-// a live object due to mutations without write barriers or bugs in the
-// collector implementation. As a sanity check, the GC has a 'checkmark'
-// mode that retraverses the object graph with the world stopped, to make
-// sure that everything that should be marked is marked.
-// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
-// for the second word of the object holds the checkmark bit.
-// When not in checkmark mode, this bit is set to 1.
-//
-// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
-// means every allocated object has two words, so there is room for the
-// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
-// just one word, so the second bit pair is not available for encoding the
-// checkmark. However, because non-pointer allocations are combined
-// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
-// must be a pointer, so the type bit in the first word is not actually needed.
-// It is still used in general, except in checkmark the type bit is repurposed
-// as the checkmark bit and then reinitialized (to 1) as the type bit when
-// finished.
-//
 
 package runtime
 
@@ -551,33 +523,6 @@ func (h heapBits) isPointer() bool {
 	return h.bits()&bitPointer != 0
 }
 
-// isCheckmarked reports whether the heap bits have the checkmarked bit set.
-// It must be told how large the object at h is, because the encoding of the
-// checkmark bit varies by size.
-// h must describe the initial word of the object.
-func (h heapBits) isCheckmarked(size uintptr) bool {
-	if size == sys.PtrSize {
-		return (*h.bitp>>h.shift)&bitPointer != 0
-	}
-	// All multiword objects are 2-word aligned,
-	// so we know that the initial word's 2-bit pair
-	// and the second word's 2-bit pair are in the
-	// same heap bitmap byte, *h.bitp.
-	return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0
-}
-
-// setCheckmarked sets the checkmarked bit.
-// It must be told how large the object at h is, because the encoding of the
-// checkmark bit varies by size.
-// h must describe the initial word of the object.
-func (h heapBits) setCheckmarked(size uintptr) {
-	if size == sys.PtrSize {
-		atomic.Or8(h.bitp, bitPointer<<h.shift)
-		return
-	}
-	atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
-}
-
 // bulkBarrierPreWrite executes a write barrier
 // for every pointer slot in the memory range [src, src+size),
 // using pointer/scalar information from [dst, dst+size).
@@ -795,7 +740,6 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 // TODO(rsc): Perhaps introduce a different heapBitsSpan type.
 
 // initSpan initializes the heap bitmap for a span.
-// It clears all checkmark bits.
 // If this is a span of pointer-sized objects, it initializes all
 // words to pointer/scan.
 // Otherwise, it initializes all words to scalar/dead.
@@ -826,45 +770,6 @@ func (h heapBits) initSpan(s *mspan) {
 	}
 }
 
-// initCheckmarkSpan initializes a span for being checkmarked.
-// It clears the checkmark bits, which are set to 1 in normal operation.
-func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
-	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
-	if sys.PtrSize == 8 && size == sys.PtrSize {
-		// Checkmark bit is type bit, bottom bit of every 2-bit entry.
-		// Only possible on 64-bit system, since minimum size is 8.
-		// Must clear type bit (checkmark bit) of every word.
-		// The type bit is the lower of every two-bit pair.
-		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
-			*h.bitp &^= bitPointerAll
-			h = h.forward(wordsPerBitmapByte)
-		}
-		return
-	}
-	for i := uintptr(0); i < n; i++ {
-		*h.bitp &^= bitScan << (heapBitsShift + h.shift)
-		h = h.forward(size / sys.PtrSize)
-	}
-}
-
-// clearCheckmarkSpan undoes all the checkmarking in a span.
-// The actual checkmark bits are ignored, so the only work to do
-// is to fix the pointer bits. (Pointer bits are ignored by scanobject
-// but consulted by typedmemmove.)
-func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
-	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
-	if sys.PtrSize == 8 && size == sys.PtrSize {
-		// Checkmark bit is type bit, bottom bit of every 2-bit entry.
-		// Only possible on 64-bit system, since minimum size is 8.
-		// Must clear type bit (checkmark bit) of every word.
-		// The type bit is the lower of every two-bit pair.
-		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
-			*h.bitp |= bitPointerAll
-			h = h.forward(wordsPerBitmapByte)
-		}
-	}
-}
-
 // countAlloc returns the number of objects allocated in span s by
 // scanning the allocation bitmap.
 func (s *mspan) countAlloc() int {
@@ -957,11 +862,11 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			if sys.PtrSize == 4 && dataSize == sys.PtrSize {
 				// 1 pointer object. On 32-bit machines clear the bit for the
 				// unused second word.
-				*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
+				*h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
 				*h.bitp |= (bitPointer | bitScan) << h.shift
 			} else {
 				// 2-element slice of pointer.
-				*h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift
+				*h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
 			}
 			return
 		}
@@ -974,11 +879,10 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			}
 		}
 		b := uint32(*ptrmask)
-		hb := (b & 3) | bitScan
-		// bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1.
-		// 110011 is shifted h.shift and complemented.
-		// This clears out the bits that are about to be
-		// ored into *h.hbitp in the next instructions.
+		hb := b & 3
+		hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1)
+		// Clear the bits for this object so we can set the
+		// appropriate ones.
 		*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
 		*h.bitp |= uint8(hb << h.shift)
 		return
@@ -1155,11 +1059,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		throw("heapBitsSetType: called with non-pointer type")
 		return
 	}
-	if nw < 2 {
-		// Must write at least 2 words, because the "no scan"
-		// encoding doesn't take effect until the third word.
-		nw = 2
-	}
 
 	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
 	// The leading byte is special because it contains the bits for word 1,
@@ -1172,21 +1071,22 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 
 	case h.shift == 0:
 		// Ptrmask and heap bitmap are aligned.
-		// Handle first byte of bitmap specially.
+		//
+		// This is a fast path for small objects.
 		//
 		// The first byte we write out covers the first four
 		// words of the object. The scan/dead bit on the first
 		// word must be set to scan since there are pointers
-		// somewhere in the object. The scan/dead bit on the
-		// second word is the checkmark, so we don't set it.
+		// somewhere in the object.
 		// In all following words, we set the scan/dead
 		// appropriately to indicate that the object contains
 		// to the next 2-bit entry in the bitmap.
 		//
-		// TODO: It doesn't matter if we set the checkmark, so
-		// maybe this case isn't needed any more.
+		// We set four bits at a time here, but if the object
+		// is fewer than four words, phase 3 will clear
+		// unnecessary bits.
 		hb = b & bitPointerAll
-		hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
+		hb |= bitScanAll
 		if w += 4; w >= nw {
 			goto Phase3
 		}
@@ -1203,14 +1103,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		// We took care of 1-word and 2-word objects above,
 		// so this is at least a 6-word object.
 		hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
-		// This is not noscan, so set the scan bit in the
-		// first word.
 		hb |= bitScan << (2 * heapBitsShift)
+		if nw > 1 {
+			hb |= bitScan << (3 * heapBitsShift)
+		}
 		b >>= 2
 		nb -= 2
-		// Note: no bitScan for second word because that's
-		// the checkmark.
-		*hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
+		*hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift))
 		*hbitp |= uint8(hb)
 		hbitp = add1(hbitp)
 		if w += 2; w >= nw {
@@ -1449,11 +1348,7 @@ Phase4:
 				if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
 					want |= bitPointer
 				}
-				if i != 1 {
-					want |= bitScan
-				} else {
-					have &^= bitScan
-				}
+				want |= bitScan
 			}
 			if have != want {
 				println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
@@ -2013,7 +1908,7 @@ func getgcmask(ep interface{}) (mask []byte) {
 			if hbits.isPointer() {
 				mask[i/sys.PtrSize] = 1
 			}
-			if i != 1*sys.PtrSize && !hbits.morePointers() {
+			if !hbits.morePointers() {
 				mask = mask[:i/sys.PtrSize]
 				break
 			}
diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go
new file mode 100644
index 0000000000..1fd8e4e78f
--- /dev/null
+++ b/src/runtime/mcheckmark.go
@@ -0,0 +1,100 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// GC checkmarks
+//
+// In a concurrent garbage collector, one worries about failing to mark
+// a live object due to mutations without write barriers or bugs in the
+// collector implementation. As a sanity check, the GC has a 'checkmark'
+// mode that retraverses the object graph with the world stopped, to make
+// sure that everything that should be marked is marked.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// A checkmarksMap stores the GC marks in "checkmarks" mode. It is a
+// per-arena bitmap with a bit for every word in the arena. The mark
+// is stored on the bit corresponding to the first word of the marked
+// allocation.
+//
+//go:notinheap
+type checkmarksMap [heapArenaBytes / sys.PtrSize / 8]uint8
+
+// If useCheckmark is true, marking of an object uses the checkmark
+// bits instead of the standard mark bits.
+var useCheckmark = false
+
+// startCheckmarks prepares for the checkmarks phase.
+//
+// The world must be stopped.
+func startCheckmarks() {
+	// Clear all checkmarks.
+	for _, ai := range mheap_.allArenas {
+		arena := mheap_.arenas[ai.l1()][ai.l2()]
+		bitmap := arena.checkmarks
+
+		if bitmap == nil {
+			// Allocate bitmap on first use.
+			bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys))
+			if bitmap == nil {
+				throw("out of memory allocating checkmarks bitmap")
+			}
+			arena.checkmarks = bitmap
+		} else {
+			// Otherwise clear the existing bitmap.
+			for i := range bitmap {
+				bitmap[i] = 0
+			}
+		}
+	}
+	// Enable checkmarking.
+	useCheckmark = true
+}
+
+// endCheckmarks ends the checkmarks phase.
+func endCheckmarks() {
+	if gcMarkWorkAvailable(nil) {
+		throw("GC work not flushed")
+	}
+	useCheckmark = false
+}
+
+// setCheckmark throws if marking object is a checkmarks violation,
+// and otherwise sets obj's checkmark. It returns true if obj was
+// already checkmarked.
+func setCheckmark(obj, base, off uintptr, mbits markBits) bool {
+	if !mbits.isMarked() {
+		printlock()
+		print("runtime: checkmarks found unexpected unmarked object obj=", hex(obj), "\n")
+		print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
+
+		// Dump the source (base) object
+		gcDumpObject("base", base, off)
+
+		// Dump the object
+		gcDumpObject("obj", obj, ^uintptr(0))
+
+		getg().m.traceback = 2
+		throw("checkmark found unmarked object")
+	}
+
+	ai := arenaIndex(obj)
+	arena := mheap_.arenas[ai.l1()][ai.l2()]
+	arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks))
+	mask := byte(1 << ((obj / heapArenaBytes) % 8))
+	bytep := &arena.checkmarks[arenaWord]
+
+	if atomic.Load8(bytep)&mask != 0 {
+		// Already checkmarked.
+		return true
+	}
+
+	atomic.Or8(bytep, mask)
+	return false
+}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index b3499516f6..c8c4a4c758 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1670,13 +1670,13 @@ func gcMarkTermination(nextTriggerRatio float64) {
 			// mark using checkmark bits, to check that we
 			// didn't forget to mark anything during the
 			// concurrent mark process.
+			startCheckmarks()
 			gcResetMarkState()
-			initCheckmarks()
 			gcw := &getg().m.p.ptr().gcw
 			gcDrain(gcw, 0)
 			wbBufFlush1(getg().m.p.ptr())
 			gcw.dispose()
-			clearCheckmarks()
+			endCheckmarks()
 		}
 
 		// marking is complete so we can turn the write barrier off
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index fe988c46d9..96910ff729 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -1354,11 +1354,7 @@ func scanobject(b uintptr, gcw *gcWork) {
 		}
 		// Load bits once. See CL 22712 and issue 16973 for discussion.
 		bits := hbits.bits()
-		// During checkmarking, 1-word objects store the checkmark
-		// in the type bit for the one word. The only one-word objects
-		// are pointers, or else they'd be merged with other non-pointer
-		// data into larger allocations.
-		if i != 1*sys.PtrSize && bits&bitScan == 0 {
+		if bits&bitScan == 0 {
 			break // no more pointers in this object
 		}
 		if bits&bitPointer == 0 {
@@ -1511,28 +1507,10 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp
 	mbits := span.markBitsForIndex(objIndex)
 
 	if useCheckmark {
-		if !mbits.isMarked() {
-			printlock()
-			print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
-			print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
-
-			// Dump the source (base) object
-			gcDumpObject("base", base, off)
-
-			// Dump the object
-			gcDumpObject("obj", obj, ^uintptr(0))
-
-			getg().m.traceback = 2
-			throw("checkmark found unmarked object")
-		}
-		hbits := heapBitsForAddr(obj)
-		if hbits.isCheckmarked(span.elemsize) {
+		if setCheckmark(obj, base, off, mbits) {
+			// Already marked.
 			return
 		}
-		hbits.setCheckmarked(span.elemsize)
-		if !hbits.isCheckmarked(span.elemsize) {
-			throw("setCheckmarked and isCheckmarked disagree")
-		}
 	} else {
 		if debug.gccheckmark > 0 && span.isFree(objIndex) {
 			print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
@@ -1661,45 +1639,3 @@ func gcMarkTinyAllocs() {
 		greyobject(c.tiny, 0, 0, span, gcw, objIndex)
 	}
 }
-
-// Checkmarking
-
-// To help debug the concurrent GC we remark with the world
-// stopped ensuring that any object encountered has their normal
-// mark bit set. To do this we use an orthogonal bit
-// pattern to indicate the object is marked. The following pattern
-// uses the upper two bits in the object's boundary nibble.
-// 01: scalar  not marked
-// 10: pointer not marked
-// 11: pointer     marked
-// 00: scalar      marked
-// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
-// The higher bit is 1 for pointers and 0 for scalars, whether the object
-// is marked or not.
-// The first nibble no longer holds the typeDead pattern indicating that the
-// there are no more pointers in the object. This information is held
-// in the second nibble.
-
-// If useCheckmark is true, marking of an object uses the
-// checkmark bits (encoding above) instead of the standard
-// mark bits.
-var useCheckmark = false
-
-//go:nowritebarrier
-func initCheckmarks() {
-	useCheckmark = true
-	for _, s := range mheap_.allspans {
-		if s.state.get() == mSpanInUse {
-			heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
-		}
-	}
-}
-
-func clearCheckmarks() {
-	useCheckmark = false
-	for _, s := range mheap_.allspans {
-		if s.state.get() == mSpanInUse {
-			heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
-		}
-	}
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 2c7bfd8a59..6341375160 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -300,6 +300,10 @@ type heapArena struct {
 	// during marking.
 	pageSpecials [pagesPerArena / 8]uint8
 
+	// checkmarks stores the debug.gccheckmark state. It is only
+	// used if debug.gccheckmark > 0.
+	checkmarks *checkmarksMap
+
 	// zeroedBase marks the first byte of the first page in this
 	// arena which hasn't been used yet and is therefore already
 	// zero. zeroedBase is relative to the arena base.
-- 
cgit v1.3


From 7ee26224436d80dca3f7e98c8fcf21185522d8e6 Mon Sep 17 00:00:00 2001
From: Cherry Zhang <cherryyz@google.com>
Date: Wed, 12 Aug 2020 20:27:57 -0400
Subject: cmd/link: link dynamic library automatically

cgo_import_dynamic pragma indicates a symbol is imported from a
dynamic library. Currently, the linker does not actually link
against the dynamic library, so we have to "force" it by using

//go:cgo_import_dynamic _ _ "dylib"

syntax, which links in the library unconditionally.

This CL changes it to link in the library automatically when a
symbol is imported from the library, without using the "force"
syntax. (The "force" syntax is still supported.)

Remove the unconditional imports in the runtime. Now,
Security.framework and CoreFoundation.framework are only linked
when the x509 package is imported (or otherwise specified).

Fixes #40727.

Change-Id: Ied36b1f621cdcc5dc4a8f497cdf1c554a182d0e0
Reviewed-on: https://go-review.googlesource.com/c/go/+/248333
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/link/internal/ld/go.go | 3 +++
 src/runtime/sys_darwin.go      | 6 ------
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/link/internal/ld/go.go b/src/cmd/link/internal/ld/go.go
index bf5c9ca1ba..b3541c46c0 100644
--- a/src/cmd/link/internal/ld/go.go
+++ b/src/cmd/link/internal/ld/go.go
@@ -183,6 +183,9 @@ func setCgoAttr(ctxt *Link, lookup func(string, int) loader.Sym, file string, pk
 					hostObjSyms[s] = struct{}{}
 				}
 				havedynamic = 1
+				if lib != "" && ctxt.IsDarwin() {
+					machoadddynlib(lib, ctxt.LinkMode)
+				}
 			}
 
 			continue
diff --git a/src/runtime/sys_darwin.go b/src/runtime/sys_darwin.go
index 06474434c9..e4f19bbf41 100644
--- a/src/runtime/sys_darwin.go
+++ b/src/runtime/sys_darwin.go
@@ -489,9 +489,3 @@ func setNonblock(fd int32) {
 //go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib"
 //go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib"
 //go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib"
-
-// Magic incantation to get libSystem and friends actually dynamically linked.
-// TODO: Why does the code require this?  See cmd/link/internal/ld/go.go
-//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib"
-//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/Security.framework/Versions/A/Security"
-//go:cgo_import_dynamic _ _ "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation"
-- 
cgit v1.3


From 260dff3ca3b06385dc298523791a2079162f546e Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 19 Feb 2020 19:45:57 +0000
Subject: runtime: clean up old markrootSpans

This change removes the old markrootSpans implementation and deletes the
feature flag.

Updates #37487.

Change-Id: Idb5a2559abcc3be5a7da6f2ccce1a86e1d7634e3
Reviewed-on: https://go-review.googlesource.com/c/go/+/221183
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/runtime/mgcmark.go     | 120 ++++-----------------------------------------
 src/runtime/mgcsweep.go    |   2 +-
 src/runtime/mgcsweepbuf.go |  38 --------------
 src/runtime/mheap.go       |   8 ++-
 4 files changed, 14 insertions(+), 154 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 96910ff729..2b84945471 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -47,10 +47,6 @@ const (
 	// Must be a multiple of the pageInUse bitmap element size and
 	// must also evenly divide pagesPerArena.
 	pagesPerSpanRoot = 512
-
-	// go115NewMarkrootSpans is a feature flag that indicates whether
-	// to use the new bitmap-based markrootSpans implementation.
-	go115NewMarkrootSpans = true
 )
 
 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
@@ -87,24 +83,16 @@ func gcMarkRootPrepare() {
 	//
 	// We depend on addfinalizer to mark objects that get
 	// finalizers after root marking.
-	if go115NewMarkrootSpans {
-		// We're going to scan the whole heap (that was available at the time the
-		// mark phase started, i.e. markArenas) for in-use spans which have specials.
-		//
-		// Break up the work into arenas, and further into chunks.
-		//
-		// Snapshot allArenas as markArenas. This snapshot is safe because allArenas
-		// is append-only.
-		mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
-		work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
-	} else {
-		// We're only interested in scanning the in-use spans,
-		// which will all be swept at this point. More spans
-		// may be added to this list during concurrent GC, but
-		// we only care about spans that were allocated before
-		// this mark phase.
-		work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
-	}
+	//
+	// We're going to scan the whole heap (that was available at the time the
+	// mark phase started, i.e. markArenas) for in-use spans which have specials.
+	//
+	// Break up the work into arenas, and further into chunks.
+	//
+	// Snapshot allArenas as markArenas. This snapshot is safe because allArenas
+	// is append-only.
+	mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
+	work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
 
 	// Scan stacks.
 	//
@@ -316,10 +304,6 @@ func markrootFreeGStacks() {
 //
 //go:nowritebarrier
 func markrootSpans(gcw *gcWork, shard int) {
-	if !go115NewMarkrootSpans {
-		oldMarkrootSpans(gcw, shard)
-		return
-	}
 	// Objects with finalizers have two GC-related invariants:
 	//
 	// 1) Everything reachable from the object must be marked.
@@ -396,90 +380,6 @@ func markrootSpans(gcw *gcWork, shard int) {
 	}
 }
 
-// oldMarkrootSpans marks roots for one shard of work.spans.
-//
-// For go115NewMarkrootSpans = false.
-//
-//go:nowritebarrier
-func oldMarkrootSpans(gcw *gcWork, shard int) {
-	// Objects with finalizers have two GC-related invariants:
-	//
-	// 1) Everything reachable from the object must be marked.
-	// This ensures that when we pass the object to its finalizer,
-	// everything the finalizer can reach will be retained.
-	//
-	// 2) Finalizer specials (which are not in the garbage
-	// collected heap) are roots. In practice, this means the fn
-	// field must be scanned.
-	//
-	// TODO(austin): There are several ideas for making this more
-	// efficient in issue #11485.
-
-	sg := mheap_.sweepgen
-	spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
-	// Note that work.spans may not include spans that were
-	// allocated between entering the scan phase and now. We may
-	// also race with spans being added into sweepSpans when they're
-	// just created, and as a result we may see nil pointers in the
-	// spans slice. This is okay because any objects with finalizers
-	// in those spans must have been allocated and given finalizers
-	// after we entered the scan phase, so addfinalizer will have
-	// ensured the above invariants for them.
-	for i := 0; i < len(spans); i++ {
-		// sweepBuf.block requires that we read pointers from the block atomically.
-		// It also requires that we ignore nil pointers.
-		s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i])))
-
-		// This is racing with spans being initialized, so
-		// check the state carefully.
-		if s == nil || s.state.get() != mSpanInUse {
-			continue
-		}
-		// Check that this span was swept (it may be cached or uncached).
-		if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
-			// sweepgen was updated (+2) during non-checkmark GC pass
-			print("sweep ", s.sweepgen, " ", sg, "\n")
-			throw("gc: unswept span")
-		}
-
-		// Speculatively check if there are any specials
-		// without acquiring the span lock. This may race with
-		// adding the first special to a span, but in that
-		// case addfinalizer will observe that the GC is
-		// active (which is globally synchronized) and ensure
-		// the above invariants. We may also ensure the
-		// invariants, but it's okay to scan an object twice.
-		if s.specials == nil {
-			continue
-		}
-
-		// Lock the specials to prevent a special from being
-		// removed from the list while we're traversing it.
-		lock(&s.speciallock)
-
-		for sp := s.specials; sp != nil; sp = sp.next {
-			if sp.kind != _KindSpecialFinalizer {
-				continue
-			}
-			// don't mark finalized object, but scan it so we
-			// retain everything it points to.
-			spf := (*specialfinalizer)(unsafe.Pointer(sp))
-			// A finalizer can be set for an inner byte of an object, find object beginning.
-			p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
-
-			// Mark everything that can be reached from
-			// the object (but *not* the object itself or
-			// we'll never collect it).
-			scanobject(p, gcw)
-
-			// The special itself is a root.
-			scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
-		}
-
-		unlock(&s.speciallock)
-	}
-}
-
 // gcAssistAlloc performs GC work to make gp's assist debt positive.
 // gp must be the calling user gorountine.
 //
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 3aa3afc028..9244174403 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -662,7 +662,7 @@ func (s *mspan) oldSweep(preserve bool) bool {
 			special = *specialp
 		}
 	}
-	if go115NewMarkrootSpans && hadSpecials && s.specials == nil {
+	if hadSpecials && s.specials == nil {
 		spanHasNoSpecials(s)
 	}
 
diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go
index 1f722c3d58..5e5ca3dd2f 100644
--- a/src/runtime/mgcsweepbuf.go
+++ b/src/runtime/mgcsweepbuf.go
@@ -136,41 +136,3 @@ func (b *gcSweepBuf) pop() *mspan {
 	block.spans[bottom] = nil
 	return s
 }
-
-// numBlocks returns the number of blocks in buffer b. numBlocks is
-// safe to call concurrently with any other operation. Spans that have
-// been pushed prior to the call to numBlocks are guaranteed to appear
-// in some block in the range [0, numBlocks()), assuming there are no
-// intervening pops. Spans that are pushed after the call may also
-// appear in these blocks.
-func (b *gcSweepBuf) numBlocks() int {
-	return int(divRoundUp(uintptr(atomic.Load(&b.index)), gcSweepBlockEntries))
-}
-
-// block returns the spans in the i'th block of buffer b. block is
-// safe to call concurrently with push. The block may contain nil
-// pointers that must be ignored, and each entry in the block must be
-// loaded atomically.
-func (b *gcSweepBuf) block(i int) []*mspan {
-	// Perform bounds check before loading spine address since
-	// push ensures the allocated length is at least spineLen.
-	if i < 0 || uintptr(i) >= atomic.Loaduintptr(&b.spineLen) {
-		throw("block index out of range")
-	}
-
-	// Get block i.
-	spine := atomic.Loadp(unsafe.Pointer(&b.spine))
-	blockp := add(spine, sys.PtrSize*uintptr(i))
-	block := (*gcSweepBlock)(atomic.Loadp(blockp))
-
-	// Slice the block if necessary.
-	cursor := uintptr(atomic.Load(&b.index))
-	top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries
-	var spans []*mspan
-	if uintptr(i) < top {
-		spans = block.spans[:]
-	} else {
-		spans = block.spans[:bottom]
-	}
-	return spans
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 6341375160..0807726863 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -52,7 +52,7 @@ const (
 	// The definition of this flag helps ensure that if there's a problem with
 	// the new markroot spans implementation and it gets turned off, that the new
 	// mcentral implementation also gets turned off so the runtime isn't broken.
-	go115NewMCentralImpl = true && go115NewMarkrootSpans
+	go115NewMCentralImpl = true
 )
 
 // Main malloc heap.
@@ -1705,9 +1705,7 @@ func addspecial(p unsafe.Pointer, s *special) bool {
 	s.offset = uint16(offset)
 	s.next = *t
 	*t = s
-	if go115NewMarkrootSpans {
-		spanHasSpecials(span)
-	}
+	spanHasSpecials(span)
 	unlock(&span.speciallock)
 	releasem(mp)
 
@@ -1748,7 +1746,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special {
 		}
 		t = &s.next
 	}
-	if go115NewMarkrootSpans && span.specials == nil {
+	if span.specials == nil {
 		spanHasNoSpecials(span)
 	}
 	unlock(&span.speciallock)
-- 
cgit v1.3


From e6d0bd2b8951bde6f0ac6421f20e18efc7ba0cdb Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 19 Feb 2020 16:37:48 +0000
Subject: runtime: clean up old mcentral code

This change deletes the old mcentral implementation from the code base
and the newMCentralImpl feature flag along with it.

Updates #37487.

Change-Id: Ibca8f722665f0865051f649ffe699cbdbfdcfcf2
Reviewed-on: https://go-review.googlesource.com/c/go/+/221184
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/lockrank.go    |  16 +--
 src/runtime/malloc.go      |   8 +-
 src/runtime/mcache.go      |   6 +-
 src/runtime/mcentral.go    | 239 +--------------------------------------------
 src/runtime/mgc.go         |  10 +-
 src/runtime/mgcsweep.go    | 237 ++------------------------------------------
 src/runtime/mgcsweepbuf.go | 138 --------------------------
 src/runtime/mheap.go       |  36 +------
 8 files changed, 25 insertions(+), 665 deletions(-)
 delete mode 100644 src/runtime/mgcsweepbuf.go

(limited to 'src/runtime')

diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go
index 000193585d..b23cf767be 100644
--- a/src/runtime/lockrank.go
+++ b/src/runtime/lockrank.go
@@ -67,8 +67,6 @@ const (
 	lockRankRwmutexW
 	lockRankRwmutexR
 
-	lockRankMcentral // For !go115NewMCentralImpl
-	lockRankSpine    // For !go115NewMCentralImpl
 	lockRankSpanSetSpine
 	lockRankGscan
 	lockRankStackpool
@@ -149,8 +147,6 @@ var lockNames = []string{
 	lockRankRwmutexW: "rwmutexW",
 	lockRankRwmutexR: "rwmutexR",
 
-	lockRankMcentral:     "mcentral",
-	lockRankSpine:        "spine",
 	lockRankSpanSetSpine: "spanSetSpine",
 	lockRankGscan:        "gscan",
 	lockRankStackpool:    "stackpool",
@@ -228,18 +224,16 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
 	lockRankRwmutexW: {},
 	lockRankRwmutexR: {lockRankRwmutexW},
 
-	lockRankMcentral:     {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
-	lockRankSpine:        {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
 	lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
-	lockRankGscan:        {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine},
-	lockRankStackpool:    {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankMcentral, lockRankSpine, lockRankSpanSetSpine, lockRankGscan},
-	lockRankStackLarge:   {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankSpanSetSpine, lockRankGscan},
+	lockRankGscan:        {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankNotifyList, lockRankProf, lockRankGcBitsArenas, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
+	lockRankStackpool:    {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
+	lockRankStackLarge:   {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
 	lockRankDefer:        {},
 	lockRankSudog:        {lockRankNotifyList, lockRankHchan},
 	lockRankWbufSpans:    {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
-	lockRankMheap:        {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankMcentral, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine},
+	lockRankMheap:        {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine},
 	lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan},
-	lockRankGlobalAlloc:  {lockRankProf, lockRankSpine, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
+	lockRankGlobalAlloc:  {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
 
 	lockRankGFree:     {lockRankSched},
 	lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf},
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index b3fac3de24..e46327f9ce 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1178,11 +1178,9 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 	if s == nil {
 		throw("out of memory")
 	}
-	if go115NewMCentralImpl {
-		// Put the large span in the mcentral swept list so that it's
-		// visible to the background sweeper.
-		mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
-	}
+	// Put the large span in the mcentral swept list so that it's
+	// visible to the background sweeper.
+	mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
 	s.limit = s.base() + size
 	heapBitsForAddr(s.base()).initSpan(s)
 	return s
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 5bceb51ac9..7a7d33ccae 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -131,11 +131,7 @@ func (c *mcache) refill(spc spanClass) {
 		if s.sweepgen != mheap_.sweepgen+3 {
 			throw("bad sweepgen in refill")
 		}
-		if go115NewMCentralImpl {
-			mheap_.central[spc].mcentral.uncacheSpan(s)
-		} else {
-			atomic.Store(&s.sweepgen, mheap_.sweepgen)
-		}
+		mheap_.central[spc].mcentral.uncacheSpan(s)
 	}
 
 	// Get a new cached span from the central lists.
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index ed49d86d0c..ed49e01677 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -18,7 +18,6 @@ import "runtime/internal/atomic"
 //
 //go:notinheap
 type mcentral struct {
-	lock      mutex
 	spanclass spanClass
 
 	// For !go115NewMCentralImpl.
@@ -55,16 +54,10 @@ type mcentral struct {
 // Initialize a single central free list.
 func (c *mcentral) init(spc spanClass) {
 	c.spanclass = spc
-	if go115NewMCentralImpl {
-		lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine)
-		lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine)
-		lockInit(&c.full[0].spineLock, lockRankSpanSetSpine)
-		lockInit(&c.full[1].spineLock, lockRankSpanSetSpine)
-	} else {
-		c.nonempty.init()
-		c.empty.init()
-		lockInit(&c.lock, lockRankMcentral)
-	}
+	lockInit(&c.partial[0].spineLock, lockRankSpanSetSpine)
+	lockInit(&c.partial[1].spineLock, lockRankSpanSetSpine)
+	lockInit(&c.full[0].spineLock, lockRankSpanSetSpine)
+	lockInit(&c.full[1].spineLock, lockRankSpanSetSpine)
 }
 
 // partialUnswept returns the spanSet which holds partially-filled
@@ -93,9 +86,6 @@ func (c *mcentral) fullSwept(sweepgen uint32) *spanSet {
 
 // Allocate a span to use in an mcache.
 func (c *mcentral) cacheSpan() *mspan {
-	if !go115NewMCentralImpl {
-		return c.oldCacheSpan()
-	}
 	// Deduct credit for this span allocation and sweep if necessary.
 	spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
 	deductSweepCredit(spanBytes, 0)
@@ -213,127 +203,11 @@ havespan:
 	return s
 }
 
-// Allocate a span to use in an mcache.
-//
-// For !go115NewMCentralImpl.
-func (c *mcentral) oldCacheSpan() *mspan {
-	// Deduct credit for this span allocation and sweep if necessary.
-	spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
-	deductSweepCredit(spanBytes, 0)
-
-	lock(&c.lock)
-	traceDone := false
-	if trace.enabled {
-		traceGCSweepStart()
-	}
-	sg := mheap_.sweepgen
-retry:
-	var s *mspan
-	for s = c.nonempty.first; s != nil; s = s.next {
-		if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
-			c.nonempty.remove(s)
-			c.empty.insertBack(s)
-			unlock(&c.lock)
-			s.sweep(true)
-			goto havespan
-		}
-		if s.sweepgen == sg-1 {
-			// the span is being swept by background sweeper, skip
-			continue
-		}
-		// we have a nonempty span that does not require sweeping, allocate from it
-		c.nonempty.remove(s)
-		c.empty.insertBack(s)
-		unlock(&c.lock)
-		goto havespan
-	}
-
-	for s = c.empty.first; s != nil; s = s.next {
-		if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
-			// we have an empty span that requires sweeping,
-			// sweep it and see if we can free some space in it
-			c.empty.remove(s)
-			// swept spans are at the end of the list
-			c.empty.insertBack(s)
-			unlock(&c.lock)
-			s.sweep(true)
-			freeIndex := s.nextFreeIndex()
-			if freeIndex != s.nelems {
-				s.freeindex = freeIndex
-				goto havespan
-			}
-			lock(&c.lock)
-			// the span is still empty after sweep
-			// it is already in the empty list, so just retry
-			goto retry
-		}
-		if s.sweepgen == sg-1 {
-			// the span is being swept by background sweeper, skip
-			continue
-		}
-		// already swept empty span,
-		// all subsequent ones must also be either swept or in process of sweeping
-		break
-	}
-	if trace.enabled {
-		traceGCSweepDone()
-		traceDone = true
-	}
-	unlock(&c.lock)
-
-	// Replenish central list if empty.
-	s = c.grow()
-	if s == nil {
-		return nil
-	}
-	lock(&c.lock)
-	c.empty.insertBack(s)
-	unlock(&c.lock)
-
-	// At this point s is a non-empty span, queued at the end of the empty list,
-	// c is unlocked.
-havespan:
-	if trace.enabled && !traceDone {
-		traceGCSweepDone()
-	}
-	n := int(s.nelems) - int(s.allocCount)
-	if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
-		throw("span has no free objects")
-	}
-	// Assume all objects from this span will be allocated in the
-	// mcache. If it gets uncached, we'll adjust this.
-	atomic.Xadd64(&c.nmalloc, int64(n))
-	usedBytes := uintptr(s.allocCount) * s.elemsize
-	atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes))
-	if trace.enabled {
-		// heap_live changed.
-		traceHeapAlloc()
-	}
-	if gcBlackenEnabled != 0 {
-		// heap_live changed.
-		gcController.revise()
-	}
-	freeByteBase := s.freeindex &^ (64 - 1)
-	whichByte := freeByteBase / 8
-	// Init alloc bits cache.
-	s.refillAllocCache(whichByte)
-
-	// Adjust the allocCache so that s.freeindex corresponds to the low bit in
-	// s.allocCache.
-	s.allocCache >>= s.freeindex % 64
-
-	return s
-}
-
 // Return span from an mcache.
 //
 // s must have a span class corresponding to this
 // mcentral and it must not be empty.
 func (c *mcentral) uncacheSpan(s *mspan) {
-	if !go115NewMCentralImpl {
-		c.oldUncacheSpan(s)
-		return
-	}
 	if s.allocCount == 0 {
 		throw("uncaching span but s.allocCount == 0")
 	}
@@ -393,111 +267,6 @@ func (c *mcentral) uncacheSpan(s *mspan) {
 	}
 }
 
-// Return span from an mcache.
-//
-// For !go115NewMCentralImpl.
-func (c *mcentral) oldUncacheSpan(s *mspan) {
-	if s.allocCount == 0 {
-		throw("uncaching span but s.allocCount == 0")
-	}
-
-	sg := mheap_.sweepgen
-	stale := s.sweepgen == sg+1
-	if stale {
-		// Span was cached before sweep began. It's our
-		// responsibility to sweep it.
-		//
-		// Set sweepgen to indicate it's not cached but needs
-		// sweeping and can't be allocated from. sweep will
-		// set s.sweepgen to indicate s is swept.
-		atomic.Store(&s.sweepgen, sg-1)
-	} else {
-		// Indicate that s is no longer cached.
-		atomic.Store(&s.sweepgen, sg)
-	}
-
-	n := int(s.nelems) - int(s.allocCount)
-	if n > 0 {
-		// cacheSpan updated alloc assuming all objects on s
-		// were going to be allocated. Adjust for any that
-		// weren't. We must do this before potentially
-		// sweeping the span.
-		atomic.Xadd64(&c.nmalloc, -int64(n))
-
-		lock(&c.lock)
-		c.empty.remove(s)
-		c.nonempty.insert(s)
-		if !stale {
-			// mCentral_CacheSpan conservatively counted
-			// unallocated slots in heap_live. Undo this.
-			//
-			// If this span was cached before sweep, then
-			// heap_live was totally recomputed since
-			// caching this span, so we don't do this for
-			// stale spans.
-			atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
-		}
-		unlock(&c.lock)
-	}
-
-	if stale {
-		// Now that s is in the right mcentral list, we can
-		// sweep it.
-		s.sweep(false)
-	}
-}
-
-// freeSpan updates c and s after sweeping s.
-// It sets s's sweepgen to the latest generation,
-// and, based on the number of free objects in s,
-// moves s to the appropriate list of c or returns it
-// to the heap.
-// freeSpan reports whether s was returned to the heap.
-// If preserve=true, it does not move s (the caller
-// must take care of it).
-//
-// For !go115NewMCentralImpl.
-func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
-	if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 {
-		throw("freeSpan given cached span")
-	}
-	s.needzero = 1
-
-	if preserve {
-		// preserve is set only when called from (un)cacheSpan above,
-		// the span must be in the empty list.
-		if !s.inList() {
-			throw("can't preserve unlinked span")
-		}
-		atomic.Store(&s.sweepgen, mheap_.sweepgen)
-		return false
-	}
-
-	lock(&c.lock)
-
-	// Move to nonempty if necessary.
-	if wasempty {
-		c.empty.remove(s)
-		c.nonempty.insert(s)
-	}
-
-	// delay updating sweepgen until here. This is the signal that
-	// the span may be used in an mcache, so it must come after the
-	// linked list operations above (actually, just after the
-	// lock of c above.)
-	atomic.Store(&s.sweepgen, mheap_.sweepgen)
-
-	if s.allocCount != 0 {
-		unlock(&c.lock)
-		return false
-	}
-
-	c.nonempty.remove(s)
-	unlock(&c.lock)
-	mheap_.freeSpan(s)
-	return true
-}
-
 // grow allocates a new empty span from the heap and initializes it for c's size class.
 func (c *mcentral) grow() *mspan {
 	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index c8c4a4c758..bd87144355 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -2149,21 +2149,13 @@ func gcSweep(mode gcMode) {
 	lock(&mheap_.lock)
 	mheap_.sweepgen += 2
 	mheap_.sweepdone = 0
-	if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 {
-		// We should have drained this list during the last
-		// sweep phase. We certainly need to start this phase
-		// with an empty swept list.
-		throw("non-empty swept list")
-	}
 	mheap_.pagesSwept = 0
 	mheap_.sweepArenas = mheap_.allArenas
 	mheap_.reclaimIndex = 0
 	mheap_.reclaimCredit = 0
 	unlock(&mheap_.lock)
 
-	if go115NewMCentralImpl {
-		sweep.centralIndex.clear()
-	}
+	sweep.centralIndex.clear()
 
 	if !_ConcurrentSweep || mode == gcForceBlockMode {
 		// Special case synchronous sweep.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 9244174403..6b8c56ce35 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -132,17 +132,15 @@ func finishsweep_m() {
 		sweep.npausesweep++
 	}
 
-	if go115NewMCentralImpl {
-		// Reset all the unswept buffers, which should be empty.
-		// Do this in sweep termination as opposed to mark termination
-		// so that we can catch unswept spans and reclaim blocks as
-		// soon as possible.
-		sg := mheap_.sweepgen
-		for i := range mheap_.central {
-			c := &mheap_.central[i].mcentral
-			c.partialUnswept(sg).reset()
-			c.fullUnswept(sg).reset()
-		}
+	// Reset all the unswept buffers, which should be empty.
+	// Do this in sweep termination as opposed to mark termination
+	// so that we can catch unswept spans and reclaim blocks as
+	// soon as possible.
+	sg := mheap_.sweepgen
+	for i := range mheap_.central {
+		c := &mheap_.central[i].mcentral
+		c.partialUnswept(sg).reset()
+		c.fullUnswept(sg).reset()
 	}
 
 	// Sweeping is done, so if the scavenger isn't already awake,
@@ -202,11 +200,7 @@ func sweepone() uintptr {
 	var s *mspan
 	sg := mheap_.sweepgen
 	for {
-		if go115NewMCentralImpl {
-			s = mheap_.nextSpanForSweep()
-		} else {
-			s = mheap_.sweepSpans[1-sg/2%2].pop()
-		}
+		s = mheap_.nextSpanForSweep()
 		if s == nil {
 			atomic.Store(&mheap_.sweepdone, 1)
 			break
@@ -322,9 +316,6 @@ func (s *mspan) ensureSwept() {
 // If preserve=true, don't return it to heap nor relink in mcentral lists;
 // caller takes care of it.
 func (s *mspan) sweep(preserve bool) bool {
-	if !go115NewMCentralImpl {
-		return s.oldSweep(preserve)
-	}
 	// It's critical that we enter this function with preemption disabled,
 	// GC must not start while we are in the middle of this function.
 	_g_ := getg()
@@ -568,214 +559,6 @@ func (s *mspan) sweep(preserve bool) bool {
 	return false
 }
 
-// Sweep frees or collects finalizers for blocks not marked in the mark phase.
-// It clears the mark bits in preparation for the next GC round.
-// Returns true if the span was returned to heap.
-// If preserve=true, don't return it to heap nor relink in mcentral lists;
-// caller takes care of it.
-//
-// For !go115NewMCentralImpl.
-func (s *mspan) oldSweep(preserve bool) bool {
-	// It's critical that we enter this function with preemption disabled,
-	// GC must not start while we are in the middle of this function.
-	_g_ := getg()
-	if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
-		throw("mspan.sweep: m is not locked")
-	}
-	sweepgen := mheap_.sweepgen
-	if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
-		print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
-		throw("mspan.sweep: bad span state")
-	}
-
-	if trace.enabled {
-		traceGCSweepSpan(s.npages * _PageSize)
-	}
-
-	atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages))
-
-	spc := s.spanclass
-	size := s.elemsize
-	res := false
-
-	c := _g_.m.p.ptr().mcache
-	freeToHeap := false
-
-	// The allocBits indicate which unmarked objects don't need to be
-	// processed since they were free at the end of the last GC cycle
-	// and were not allocated since then.
-	// If the allocBits index is >= s.freeindex and the bit
-	// is not marked then the object remains unallocated
-	// since the last GC.
-	// This situation is analogous to being on a freelist.
-
-	// Unlink & free special records for any objects we're about to free.
-	// Two complications here:
-	// 1. An object can have both finalizer and profile special records.
-	//    In such case we need to queue finalizer for execution,
-	//    mark the object as live and preserve the profile special.
-	// 2. A tiny object can have several finalizers setup for different offsets.
-	//    If such object is not marked, we need to queue all finalizers at once.
-	// Both 1 and 2 are possible at the same time.
-	hadSpecials := s.specials != nil
-	specialp := &s.specials
-	special := *specialp
-	for special != nil {
-		// A finalizer can be set for an inner byte of an object, find object beginning.
-		objIndex := uintptr(special.offset) / size
-		p := s.base() + objIndex*size
-		mbits := s.markBitsForIndex(objIndex)
-		if !mbits.isMarked() {
-			// This object is not marked and has at least one special record.
-			// Pass 1: see if it has at least one finalizer.
-			hasFin := false
-			endOffset := p - s.base() + size
-			for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
-				if tmp.kind == _KindSpecialFinalizer {
-					// Stop freeing of object if it has a finalizer.
-					mbits.setMarkedNonAtomic()
-					hasFin = true
-					break
-				}
-			}
-			// Pass 2: queue all finalizers _or_ handle profile record.
-			for special != nil && uintptr(special.offset) < endOffset {
-				// Find the exact byte for which the special was setup
-				// (as opposed to object beginning).
-				p := s.base() + uintptr(special.offset)
-				if special.kind == _KindSpecialFinalizer || !hasFin {
-					// Splice out special record.
-					y := special
-					special = special.next
-					*specialp = special
-					freespecial(y, unsafe.Pointer(p), size)
-				} else {
-					// This is profile record, but the object has finalizers (so kept alive).
-					// Keep special record.
-					specialp = &special.next
-					special = *specialp
-				}
-			}
-		} else {
-			// object is still live: keep special record
-			specialp = &special.next
-			special = *specialp
-		}
-	}
-	if hadSpecials && s.specials == nil {
-		spanHasNoSpecials(s)
-	}
-
-	if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled {
-		// Find all newly freed objects. This doesn't have to
-		// efficient; allocfreetrace has massive overhead.
-		mbits := s.markBitsForBase()
-		abits := s.allocBitsForIndex(0)
-		for i := uintptr(0); i < s.nelems; i++ {
-			if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
-				x := s.base() + i*s.elemsize
-				if debug.allocfreetrace != 0 {
-					tracefree(unsafe.Pointer(x), size)
-				}
-				if debug.clobberfree != 0 {
-					clobberfree(unsafe.Pointer(x), size)
-				}
-				if raceenabled {
-					racefree(unsafe.Pointer(x), size)
-				}
-				if msanenabled {
-					msanfree(unsafe.Pointer(x), size)
-				}
-			}
-			mbits.advance()
-			abits.advance()
-		}
-	}
-
-	// Count the number of free objects in this span.
-	nalloc := uint16(s.countAlloc())
-	if spc.sizeclass() == 0 && nalloc == 0 {
-		s.needzero = 1
-		freeToHeap = true
-	}
-	nfreed := s.allocCount - nalloc
-	if nalloc > s.allocCount {
-		print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
-		throw("sweep increased allocation count")
-	}
-
-	s.allocCount = nalloc
-	wasempty := s.nextFreeIndex() == s.nelems
-	s.freeindex = 0 // reset allocation index to start of span.
-	if trace.enabled {
-		getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize
-	}
-
-	// gcmarkBits becomes the allocBits.
-	// get a fresh cleared gcmarkBits in preparation for next GC
-	s.allocBits = s.gcmarkBits
-	s.gcmarkBits = newMarkBits(s.nelems)
-
-	// Initialize alloc bits cache.
-	s.refillAllocCache(0)
-
-	// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
-	// because of the potential for a concurrent free/SetFinalizer.
-	// But we need to set it before we make the span available for allocation
-	// (return it to heap or mcentral), because allocation code assumes that a
-	// span is already swept if available for allocation.
-	if freeToHeap || nfreed == 0 {
-		// The span must be in our exclusive ownership until we update sweepgen,
-		// check for potential races.
-		if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
-			print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
-			throw("mspan.sweep: bad span state after sweep")
-		}
-		// Serialization point.
-		// At this point the mark bits are cleared and allocation ready
-		// to go so release the span.
-		atomic.Store(&s.sweepgen, sweepgen)
-	}
-
-	if nfreed > 0 && spc.sizeclass() != 0 {
-		c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
-		res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty)
-		// mcentral.freeSpan updates sweepgen
-	} else if freeToHeap {
-		// Free large span to heap
-
-		// NOTE(rsc,dvyukov): The original implementation of efence
-		// in CL 22060046 used sysFree instead of sysFault, so that
-		// the operating system would eventually give the memory
-		// back to us again, so that an efence program could run
-		// longer without running out of memory. Unfortunately,
-		// calling sysFree here without any kind of adjustment of the
-		// heap data structures means that when the memory does
-		// come back to us, we have the wrong metadata for it, either in
-		// the mspan structures or in the garbage collection bitmap.
-		// Using sysFault here means that the program will run out of
-		// memory fairly quickly in efence mode, but at least it won't
-		// have mysterious crashes due to confused memory reuse.
-		// It should be possible to switch back to sysFree if we also
-		// implement and then call some kind of mheap.deleteSpan.
-		if debug.efence > 0 {
-			s.limit = 0 // prevent mlookup from finding this span
-			sysFault(unsafe.Pointer(s.base()), size)
-		} else {
-			mheap_.freeSpan(s)
-		}
-		c.local_nlargefree++
-		c.local_largefree += size
-		res = true
-	}
-	if !res {
-		// The span has been swept and is still in-use, so put
-		// it on the swept in-use list.
-		mheap_.sweepSpans[sweepgen/2%2].push(s)
-	}
-	return res
-}
-
 // reportZombies reports any marked but free objects in s and throws.
 //
 // This generally means one of the following:
diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go
deleted file mode 100644
index 5e5ca3dd2f..0000000000
--- a/src/runtime/mgcsweepbuf.go
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"internal/cpu"
-	"runtime/internal/atomic"
-	"runtime/internal/sys"
-	"unsafe"
-)
-
-// A gcSweepBuf is a set of *mspans.
-//
-// gcSweepBuf is safe for concurrent push operations *or* concurrent
-// pop operations, but not both simultaneously.
-type gcSweepBuf struct {
-	// A gcSweepBuf is a two-level data structure consisting of a
-	// growable spine that points to fixed-sized blocks. The spine
-	// can be accessed without locks, but adding a block or
-	// growing it requires taking the spine lock.
-	//
-	// Because each mspan covers at least 8K of heap and takes at
-	// most 8 bytes in the gcSweepBuf, the growth of the spine is
-	// quite limited.
-	//
-	// The spine and all blocks are allocated off-heap, which
-	// allows this to be used in the memory manager and avoids the
-	// need for write barriers on all of these. We never release
-	// this memory because there could be concurrent lock-free
-	// access and we're likely to reuse it anyway. (In principle,
-	// we could do this during STW.)
-
-	spineLock mutex
-	spine     unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically
-	spineLen  uintptr        // Spine array length, accessed atomically
-	spineCap  uintptr        // Spine array cap, accessed under lock
-
-	// index is the first unused slot in the logical concatenation
-	// of all blocks. It is accessed atomically.
-	index uint32
-}
-
-const (
-	gcSweepBlockEntries    = 512 // 4KB on 64-bit
-	gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit
-)
-
-type gcSweepBlock struct {
-	spans [gcSweepBlockEntries]*mspan
-}
-
-// push adds span s to buffer b. push is safe to call concurrently
-// with other push operations, but NOT to call concurrently with pop.
-func (b *gcSweepBuf) push(s *mspan) {
-	// Obtain our slot.
-	cursor := uintptr(atomic.Xadd(&b.index, +1) - 1)
-	top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries
-
-	// Do we need to add a block?
-	spineLen := atomic.Loaduintptr(&b.spineLen)
-	var block *gcSweepBlock
-retry:
-	if top < spineLen {
-		spine := atomic.Loadp(unsafe.Pointer(&b.spine))
-		blockp := add(spine, sys.PtrSize*top)
-		block = (*gcSweepBlock)(atomic.Loadp(blockp))
-	} else {
-		// Add a new block to the spine, potentially growing
-		// the spine.
-		lock(&b.spineLock)
-		// spineLen cannot change until we release the lock,
-		// but may have changed while we were waiting.
-		spineLen = atomic.Loaduintptr(&b.spineLen)
-		if top < spineLen {
-			unlock(&b.spineLock)
-			goto retry
-		}
-
-		if spineLen == b.spineCap {
-			// Grow the spine.
-			newCap := b.spineCap * 2
-			if newCap == 0 {
-				newCap = gcSweepBufInitSpineCap
-			}
-			newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys)
-			if b.spineCap != 0 {
-				// Blocks are allocated off-heap, so
-				// no write barriers.
-				memmove(newSpine, b.spine, b.spineCap*sys.PtrSize)
-			}
-			// Spine is allocated off-heap, so no write barrier.
-			atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine)
-			b.spineCap = newCap
-			// We can't immediately free the old spine
-			// since a concurrent push with a lower index
-			// could still be reading from it. We let it
-			// leak because even a 1TB heap would waste
-			// less than 2MB of memory on old spines. If
-			// this is a problem, we could free old spines
-			// during STW.
-		}
-
-		// Allocate a new block and add it to the spine.
-		block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), cpu.CacheLineSize, &memstats.gc_sys))
-		blockp := add(b.spine, sys.PtrSize*top)
-		// Blocks are allocated off-heap, so no write barrier.
-		atomic.StorepNoWB(blockp, unsafe.Pointer(block))
-		atomic.Storeuintptr(&b.spineLen, spineLen+1)
-		unlock(&b.spineLock)
-	}
-
-	// We have a block. Insert the span atomically, since there may be
-	// concurrent readers via the block API.
-	atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s))
-}
-
-// pop removes and returns a span from buffer b, or nil if b is empty.
-// pop is safe to call concurrently with other pop operations, but NOT
-// to call concurrently with push.
-func (b *gcSweepBuf) pop() *mspan {
-	cursor := atomic.Xadd(&b.index, -1)
-	if int32(cursor) < 0 {
-		atomic.Xadd(&b.index, +1)
-		return nil
-	}
-
-	// There are no concurrent spine or block modifications during
-	// pop, so we can omit the atomics.
-	top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries
-	blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top)))
-	block := *blockp
-	s := block.spans[bottom]
-	// Clear the pointer for block(i).
-	block.spans[bottom] = nil
-	return s
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 0807726863..cb586171c4 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -44,15 +44,6 @@ const (
 	// Must be a multiple of the pageInUse bitmap element size and
 	// must also evenly divid pagesPerArena.
 	pagesPerReclaimerChunk = 512
-
-	// go115NewMCentralImpl is a feature flag for the new mcentral implementation.
-	//
-	// This flag depends on go115NewMarkrootSpans because the new mcentral
-	// implementation requires that markroot spans no longer rely on mgcsweepbufs.
-	// The definition of this flag helps ensure that if there's a problem with
-	// the new markroot spans implementation and it gets turned off, that the new
-	// mcentral implementation also gets turned off so the runtime isn't broken.
-	go115NewMCentralImpl = true
 )
 
 // Main malloc heap.
@@ -85,19 +76,6 @@ type mheap struct {
 	// access (since that may free the backing store).
 	allspans []*mspan // all spans out there
 
-	// sweepSpans contains two mspan stacks: one of swept in-use
-	// spans, and one of unswept in-use spans. These two trade
-	// roles on each GC cycle. Since the sweepgen increases by 2
-	// on each cycle, this means the swept spans are in
-	// sweepSpans[sweepgen/2%2] and the unswept spans are in
-	// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
-	// unswept stack and pushes spans that are still in-use on the
-	// swept stack. Likewise, allocating an in-use span pushes it
-	// on the swept stack.
-	//
-	// For !go115NewMCentralImpl.
-	sweepSpans [2]gcSweepBuf
-
 	_ uint32 // align uint64 fields on 32-bit for atomics
 
 	// Proportional sweep
@@ -220,7 +198,7 @@ type mheap struct {
 		base, end uintptr
 	}
 
-	// _ uint32 // ensure 64-bit alignment of central
+	_ uint32 // ensure 64-bit alignment of central
 
 	// central free lists for small size classes.
 	// the padding makes sure that the mcentrals are
@@ -719,8 +697,6 @@ func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8)
 // Initialize the heap.
 func (h *mheap) init() {
 	lockInit(&h.lock, lockRankMheap)
-	lockInit(&h.sweepSpans[0].spineLock, lockRankSpine)
-	lockInit(&h.sweepSpans[1].spineLock, lockRankSpine)
 	lockInit(&h.speciallock, lockRankMheapSpecial)
 
 	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
@@ -1294,16 +1270,6 @@ HaveSpan:
 	h.setSpans(s.base(), npages, s)
 
 	if !manual {
-		if !go115NewMCentralImpl {
-			// Add to swept in-use list.
-			//
-			// This publishes the span to root marking.
-			//
-			// h.sweepgen is guaranteed to only change during STW,
-			// and preemption is disabled in the page allocator.
-			h.sweepSpans[h.sweepgen/2%2].push(s)
-		}
-
 		// Mark in-use span in arena page bitmap.
 		//
 		// This publishes the span to the page sweeper, so
-- 
cgit v1.3


From a61a3c378d9ce71d9b97a1b4fb3320b8b3d6a599 Mon Sep 17 00:00:00 2001
From: Heisenberg <lziqiang1@gmail.com>
Date: Thu, 11 Jun 2020 11:17:20 +0800
Subject: runtime: use the CBZ instruction in the assembler

Use CBZ to replace the comparison and branch of arm64 and the zero instruction in the assembly file.

Change-Id: Id6c03e9af13aadafc3ad3953f82d2ffa29c12926
Reviewed-on: https://go-review.googlesource.com/c/go/+/237497
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/rt0_freebsd_arm64.s | 3 +--
 src/runtime/rt0_netbsd_arm64.s  | 3 +--
 src/runtime/rt0_openbsd_arm64.s | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s
index 3a348c33e2..a938d98262 100644
--- a/src/runtime/rt0_freebsd_arm64.s
+++ b/src/runtime/rt0_freebsd_arm64.s
@@ -45,8 +45,7 @@ TEXT _rt0_arm64_freebsd_lib(SB),NOSPLIT,$184
 
 	// Create a new thread to do the runtime initialization and return.
 	MOVD	_cgo_sys_thread_create(SB), R4
-	CMP	$0, R4
-	BEQ	nocgo
+	CBZ	R4, nocgo
 	MOVD	$_rt0_arm64_freebsd_lib_go(SB), R0
 	MOVD	$0, R1
 	SUB	$16, RSP	// reserve 16 bytes for sp-8 where fp may be saved.
diff --git a/src/runtime/rt0_netbsd_arm64.s b/src/runtime/rt0_netbsd_arm64.s
index 75ecbe5176..2f3b5a5a87 100644
--- a/src/runtime/rt0_netbsd_arm64.s
+++ b/src/runtime/rt0_netbsd_arm64.s
@@ -44,8 +44,7 @@ TEXT _rt0_arm64_netbsd_lib(SB),NOSPLIT,$184
 
 	// Create a new thread to do the runtime initialization and return.
 	MOVD	_cgo_sys_thread_create(SB), R4
-	CMP	$0, R4
-	BEQ	nocgo
+	CBZ	R4, nocgo
 	MOVD	$_rt0_arm64_netbsd_lib_go(SB), R0
 	MOVD	$0, R1
 	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
diff --git a/src/runtime/rt0_openbsd_arm64.s b/src/runtime/rt0_openbsd_arm64.s
index 12408f2eec..722fab6129 100644
--- a/src/runtime/rt0_openbsd_arm64.s
+++ b/src/runtime/rt0_openbsd_arm64.s
@@ -50,8 +50,7 @@ TEXT _rt0_arm64_openbsd_lib(SB),NOSPLIT,$184
 
 	// Create a new thread to do the runtime initialization and return.
 	MOVD	_cgo_sys_thread_create(SB), R4
-	CMP	$0, R4
-	BEQ	nocgo
+	CBZ	R4, nocgo
 	MOVD	$_rt0_arm64_openbsd_lib_go(SB), R0
 	MOVD	$0, R1
 	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
-- 
cgit v1.3


From ba97be4b58241bebbc4ff70574bd82152ab19ffe Mon Sep 17 00:00:00 2001
From: liu-xuewen <liiuxuewen@gmail.com>
Date: Mon, 13 Jul 2020 09:15:38 +0000
Subject: runtime: remove tracebackinit and unused skipPC

CL [152537](https://go-review.googlesource.com/c/go/+/152537/) changed the way inlined frames are represented in tracebacks to no longer use skipPC

Change-Id: I42386fdcc5cf72f3c122e789b6af9cbd0c6bed4b
GitHub-Last-Rev: 79c26dcd532907eda4ffc30951845c1c01243501
GitHub-Pull-Request: golang/go#39829
Reviewed-on: https://go-review.googlesource.com/c/go/+/239701
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/asm.s        | 21 ---------------------
 src/runtime/proc.go      |  1 -
 src/runtime/traceback.go | 13 -------------
 3 files changed, 35 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/asm.s b/src/runtime/asm.s
index 95a3424de2..27d8df9e06 100644
--- a/src/runtime/asm.s
+++ b/src/runtime/asm.s
@@ -11,24 +11,3 @@
 DATA runtime·no_pointers_stackmap+0x00(SB)/4, $2
 DATA runtime·no_pointers_stackmap+0x04(SB)/4, $0
 GLOBL runtime·no_pointers_stackmap(SB),RODATA, $8
-
-// NaCl requires that these skips be verifiable machine code.
-#ifdef GOARCH_amd64
-#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90
-#endif
-#ifdef GOARCH_386
-#define SKIP4 BYTE $0x90; BYTE $0x90; BYTE $0x90; BYTE $0x90
-#endif
-#ifdef GOARCH_wasm
-#define SKIP4 UNDEF; UNDEF; UNDEF; UNDEF
-#endif
-#ifndef SKIP4
-#define SKIP4 WORD $0
-#endif
-
-#define SKIP16 SKIP4; SKIP4; SKIP4; SKIP4
-#define SKIP64 SKIP16; SKIP16; SKIP16; SKIP16
-
-// This function must be sizeofSkipFunction bytes.
-TEXT runtime·skipPleaseUseCallersFrames(SB),NOSPLIT,$0-0
-	SKIP64; SKIP64; SKIP64; SKIP64
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ed7e2128ae..9a358cd529 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -558,7 +558,6 @@ func schedinit() {
 
 	sched.maxmcount = 10000
 
-	tracebackinit()
 	moduledataverify()
 	stackinit()
 	mallocinit()
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 96e552524e..7850eceafa 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -36,16 +36,6 @@ import (
 
 const usesLR = sys.MinFrameSize > 0
 
-var skipPC uintptr
-
-func tracebackinit() {
-	// Go variable initialization happens late during runtime startup.
-	// Instead of initializing the variables above in the declarations,
-	// schedinit calls this function so that the variables are
-	// initialized and available earlier in the startup sequence.
-	skipPC = funcPC(skipPleaseUseCallersFrames)
-}
-
 // Traceback over the deferred function calls.
 // Report them like calls that have been invoked but not started executing yet.
 func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer) {
@@ -83,9 +73,6 @@ func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v uns
 
 const sizeofSkipFunction = 256
 
-// This function is defined in asm.s to be sizeofSkipFunction bytes long.
-func skipPleaseUseCallersFrames()
-
 // Generic traceback. Handles runtime stack prints (pcbuf == nil),
 // the runtime.Callers function (pcbuf != nil), as well as the garbage
 // collector (callback != nil).  A little clunky to merge these, but avoids
-- 
cgit v1.3


From 88c094c96a164aef2134e548d495c4bc14dc4687 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Thu, 25 Jun 2020 09:10:23 -0700
Subject: runtime: print faulting instruction on a SIGFPE

Just like SIGILL, it might be useful to see what the instruction
that generated the SIGFPE is.

Update #39816

Change-Id: I8b2ff692998f0b770289339537dceab96b09d1ee
Reviewed-on: https://go-review.googlesource.com/c/go/+/239999
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/signal_unix.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index dd6d79f8ec..6a11c91fb9 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -616,7 +616,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 		print("signal arrived during cgo execution\n")
 		gp = _g_.m.lockedg.ptr()
 	}
-	if sig == _SIGILL {
+	if sig == _SIGILL || sig == _SIGFPE {
 		// It would be nice to know how long the instruction is.
 		// Unfortunately, that's complicated to do in general (mostly for x86
 		// and s930x, but other archs have non-standard instruction lengths also).
-- 
cgit v1.3


From 4e5ed83e8d2fbbbc8f6524f40ab3b6733dc57a38 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Fri, 3 Jul 2020 11:28:50 -0700
Subject: runtime: use bit-parallel operations to compute heap bit summaries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new implementation is much faster in all cases.

name                                             old time/op  new time/op  delta
PallocBitsSummarize/Unpacked00-16                 142ns ± 1%     7ns ± 2%  -94.75%  (p=0.000 n=10+9)
PallocBitsSummarize/UnpackedFFFFFFFFFFFFFFFF-16   172ns ± 0%    24ns ± 0%  -86.02%  (p=0.000 n=9+9)
PallocBitsSummarize/UnpackedAA-16                 145ns ± 0%    32ns ± 0%  -78.16%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedAAAAAAAAAAAAAAAA-16   172ns ± 0%    33ns ± 0%  -80.95%  (p=0.000 n=9+9)
PallocBitsSummarize/Unpacked80000000AAAAAAAA-16   162ns ± 1%    60ns ± 0%  -62.69%  (p=0.000 n=10+9)
PallocBitsSummarize/UnpackedAAAAAAAA00000001-16   163ns ± 0%    68ns ± 1%  -58.47%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedBBBBBBBBBBBBBBBB-16   172ns ± 0%    35ns ± 0%  -79.70%  (p=0.000 n=9+9)
PallocBitsSummarize/Unpacked80000000BBBBBBBB-16   161ns ± 0%    63ns ± 0%  -60.61%  (p=0.000 n=8+10)
PallocBitsSummarize/UnpackedBBBBBBBB00000001-16   163ns ± 0%    60ns ± 0%  -63.14%  (p=0.000 n=9+10)
PallocBitsSummarize/UnpackedCCCCCCCCCCCCCCCC-16   172ns ± 0%    39ns ± 0%  -77.41%  (p=0.000 n=7+10)
PallocBitsSummarize/Unpacked4444444444444444-16   172ns ± 0%    39ns ± 0%  -77.42%  (p=0.000 n=7+10)
PallocBitsSummarize/Unpacked4040404040404040-16   173ns ± 2%    51ns ± 1%  -70.55%  (p=0.000 n=10+10)
PallocBitsSummarize/Unpacked4000400040004000-16   160ns ± 1%    53ns ± 0%  -66.78%  (p=0.000 n=10+10)
PallocBitsSummarize/Unpacked1000404044CCAAFF-16   169ns ± 1%    59ns ± 1%  -65.28%  (p=0.000 n=10+10)

Change-Id: I94daa645b76a9cf9c93edeb2058d7132216fcb72
Reviewed-on: https://go-review.googlesource.com/c/go/+/240900
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mpallocbits.go      | 147 +++++++++++++++++++++++-----------------
 src/runtime/mpallocbits_test.go |  36 ++++++----
 2 files changed, 108 insertions(+), 75 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go
index a8011341bc..ff79bfbc1a 100644
--- a/src/runtime/mpallocbits.go
+++ b/src/runtime/mpallocbits.go
@@ -120,78 +120,99 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) {
 // sake of documentation, 0s are free pages and 1s are allocated pages.
 type pallocBits pageBits
 
-// consec8tab is a table containing the number of consecutive
-// zero bits for any uint8 value.
-//
-// The table is generated by calling consec8(i) for each
-// possible uint8 value, which is defined as:
-//
-// // consec8 counts the maximum number of consecutive 0 bits
-// // in a uint8.
-// func consec8(n uint8) int {
-// 	n = ^n
-// 	i := 0
-// 	for n != 0 {
-// 		n &= (n << 1)
-// 		i++
-// 	}
-// 	return i
-// }
-var consec8tab = [256]uint{
-	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
-	4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0,
-}
-
 // summarize returns a packed summary of the bitmap in pallocBits.
 func (b *pallocBits) summarize() pallocSum {
-	// TODO(mknyszek): There may be something more clever to be done
-	// here to make the summarize operation more efficient. For example,
-	// we can compute start and end with 64-bit wide operations easily,
-	// but max is a bit more complex. Perhaps there exists some way to
-	// leverage the 64-bit start and end to our advantage?
-	var start, max, end uint
+	var start, max, cur uint
+	const notSetYet = ^uint(0) // sentinel for start value
+	start = notSetYet
 	for i := 0; i < len(b); i++ {
-		a := b[i]
-		for j := 0; j < 64; j += 8 {
-			k := uint8(a >> j)
-
-			// Compute start.
-			si := uint(sys.TrailingZeros8(k))
-			if start == uint(i*64+j) {
-				start += si
-			}
+		x := b[i]
+		if x == 0 {
+			cur += 64
+			continue
+		}
+		t := uint(sys.TrailingZeros64(x))
+		l := uint(sys.LeadingZeros64(x))
 
-			// Compute max.
-			if end+si > max {
-				max = end + si
-			}
-			if mi := consec8tab[k]; mi > max {
-				max = mi
+		// Finish any region spanning the uint64s
+		cur += t
+		if start == notSetYet {
+			start = cur
+		}
+		if cur > max {
+			max = cur
+		}
+		// Final region that might span to next uint64
+		cur = l
+	}
+	if start == notSetYet {
+		// Made it all the way through without finding a single 1 bit.
+		const n = uint(64 * len(b))
+		return packPallocSum(n, n, n)
+	}
+	if cur > max {
+		max = cur
+	}
+	if max >= 64-2 {
+		// There is no way an internal run of zeros could beat max.
+		return packPallocSum(start, max, cur)
+	}
+	// Now look inside each uint64 for runs of zeros.
+	// All uint64s must be nonzero, or we would have aborted above.
+outer:
+	for i := 0; i < len(b); i++ {
+		x := b[i]
+
+		// Look inside this uint64. We have a pattern like
+		// 000000 1xxxxx1 000000
+		// We need to look inside the 1xxxxx1 for any contiguous
+		// region of zeros.
+
+		// We already know the trailing zeros are no larger than max. Remove them.
+		x >>= sys.TrailingZeros64(x) & 63
+		if x&(x+1) == 0 { // no more zeros (except at the top).
+			continue
+		}
+
+		// Strategy: shrink all runs of zeros by max. If any runs of zero
+		// remain, then we've identified a larger maxiumum zero run.
+		p := max     // number of zeros we still need to shrink by.
+		k := uint(1) // current minimum length of runs of ones in x.
+		for {
+			// Shrink all runs of zeros by p places (except the top zeros).
+			for p > 0 {
+				if p <= k {
+					// Shift p ones down into the top of each run of zeros.
+					x |= x >> (p & 63)
+					if x&(x+1) == 0 { // no more zeros (except at the top).
+						continue outer
+					}
+					break
+				}
+				// Shift k ones down into the top of each run of zeros.
+				x |= x >> (k & 63)
+				if x&(x+1) == 0 { // no more zeros (except at the top).
+					continue outer
+				}
+				p -= k
+				// We've just doubled the minimum length of 1-runs.
+				// This allows us to shift farther in the next iteration.
+				k *= 2
 			}
 
-			// Compute end.
-			if k == 0 {
-				end += 8
-			} else {
-				end = uint(sys.LeadingZeros8(k))
+			// The length of the lowest-order zero run is an increment to our maximum.
+			j := uint(sys.TrailingZeros64(^x)) // count contiguous trailing ones
+			x >>= j & 63                       // remove trailing ones
+			j = uint(sys.TrailingZeros64(x))   // count contiguous trailing zeros
+			x >>= j & 63                       // remove zeros
+			max += j                           // we have a new maximum!
+			if x&(x+1) == 0 {                  // no more zeros (except at the top).
+				continue outer
 			}
+			p = j // remove j more zeros from each zero run.
 		}
 	}
-	return packPallocSum(start, max, end)
+	return packPallocSum(start, max, cur)
 }
 
 // find searches for npages contiguous free pages in pallocBits and returns
diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go
index 71a29f3b3a..42268a1698 100644
--- a/src/runtime/mpallocbits_test.go
+++ b/src/runtime/mpallocbits_test.go
@@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) {
 
 // Ensures two packed summaries are identical, and reports a detailed description
 // of the difference if they're not.
-func checkPallocSum(t *testing.T, got, want PallocSum) {
+func checkPallocSum(t testing.TB, got, want PallocSum) {
 	if got.Start() != want.Start() {
 		t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start())
 	}
@@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) {
 
 // Benchmarks how quickly we can summarize a PallocBits.
 func BenchmarkPallocBitsSummarize(b *testing.B) {
-	buf0 := new(PallocBits)
-	buf1 := new(PallocBits)
-	for i := 0; i < len(buf1); i++ {
-		buf1[i] = ^uint64(0)
-	}
-	bufa := new(PallocBits)
-	for i := 0; i < len(bufa); i++ {
-		bufa[i] = 0xaa
-	}
-	for _, buf := range []*PallocBits{buf0, buf1, bufa} {
-		b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) {
+	patterns := []uint64{
+		0,
+		^uint64(0),
+		0xaa,
+		0xaaaaaaaaaaaaaaaa,
+		0x80000000aaaaaaaa,
+		0xaaaaaaaa00000001,
+		0xbbbbbbbbbbbbbbbb,
+		0x80000000bbbbbbbb,
+		0xbbbbbbbb00000001,
+		0xcccccccccccccccc,
+		0x4444444444444444,
+		0x4040404040404040,
+		0x4000400040004000,
+		0x1000404044ccaaff,
+	}
+	for _, p := range patterns {
+		buf := new(PallocBits)
+		for i := 0; i < len(buf); i++ {
+			buf[i] = p
+		}
+		b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) {
+			checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf))
 			for i := 0; i < b.N; i++ {
 				buf.Summarize()
 			}
-- 
cgit v1.3


From 8b8f926fc3f0e8f002d0a8e97aab9500e4db83a7 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Mon, 6 Jul 2020 20:46:31 -0700
Subject: runtime: bit parallel implementation of findBitRange64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a bit-parallel implementation of findBitRange64.
It uses a repeated shift-'N-and technique to erase all the
free marks that are too small for the allocation.

Also some small improvements to find1.

name                                             old time/op  new time/op  delta
FindBitRange64/Pattern00Size2-16                 4.19ns ± 0%  2.26ns ± 0%   -46.04%  (p=0.000 n=10+8)
FindBitRange64/Pattern00Size8-16                 4.19ns ± 0%  2.12ns ± 0%   -49.35%  (p=0.000 n=9+10)
FindBitRange64/Pattern00Size32-16                4.20ns ± 0%  2.12ns ± 0%   -49.49%  (p=0.000 n=10+8)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize2-16   2.13ns ± 0%  2.27ns ± 0%    +6.28%  (p=0.000 n=10+10)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize8-16   2.13ns ± 0%  4.46ns ± 0%  +109.39%  (p=0.000 n=10+9)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize32-16  2.13ns ± 1%  5.58ns ± 0%  +162.37%  (p=0.000 n=10+9)
FindBitRange64/PatternAASize2-16                 22.2ns ± 0%   2.3ns ± 0%   -89.82%  (p=0.000 n=9+8)
FindBitRange64/PatternAASize8-16                 22.2ns ± 0%   2.1ns ± 1%   -90.41%  (p=0.000 n=9+10)
FindBitRange64/PatternAASize32-16                22.2ns ± 0%   2.1ns ± 1%   -90.43%  (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize2-16    156ns ± 1%     2ns ± 0%   -98.54%  (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize8-16    155ns ± 1%     2ns ± 0%   -98.63%  (p=0.000 n=10+8)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize32-16   155ns ± 0%     2ns ± 1%   -98.63%  (p=0.000 n=8+10)
FindBitRange64/Pattern80000000AAAAAAAASize2-16   81.2ns ± 0%   2.3ns ± 1%   -97.21%  (p=0.000 n=10+10)
FindBitRange64/Pattern80000000AAAAAAAASize8-16   81.1ns ± 0%   2.1ns ± 0%   -97.39%  (p=0.000 n=10+9)
FindBitRange64/Pattern80000000AAAAAAAASize32-16  81.1ns ± 0%   2.1ns ± 0%   -97.38%  (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAA00000001Size2-16   76.8ns ± 1%   2.3ns ± 0%   -97.05%  (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAA00000001Size8-16   76.6ns ± 0%   2.1ns ± 0%   -97.23%  (p=0.000 n=8+10)
FindBitRange64/PatternAAAAAAAA00000001Size32-16  76.7ns ± 0%   2.1ns ± 0%   -97.23%  (p=0.000 n=9+9)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize2-16   2.13ns ± 0%  2.27ns ± 0%    +6.57%  (p=0.000 n=8+8)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize8-16   76.7ns ± 0%   2.9ns ± 0%   -96.20%  (p=0.000 n=9+10)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize32-16  76.7ns ± 0%   2.9ns ± 0%   -96.20%  (p=0.000 n=10+10)
FindBitRange64/Pattern80000000BBBBBBBBSize2-16   2.12ns ± 0%  2.27ns ± 1%    +6.74%  (p=0.000 n=10+10)
FindBitRange64/Pattern80000000BBBBBBBBSize8-16   44.8ns ± 0%   2.9ns ± 0%   -93.49%  (p=0.000 n=9+10)
FindBitRange64/Pattern80000000BBBBBBBBSize32-16  44.9ns ± 0%   2.9ns ± 0%   -93.49%  (p=0.000 n=10+8)
FindBitRange64/PatternBBBBBBBB00000001Size2-16   4.20ns ± 1%  2.27ns ± 1%   -46.02%  (p=0.000 n=10+10)
FindBitRange64/PatternBBBBBBBB00000001Size8-16   44.9ns ± 0%   2.9ns ± 1%   -93.51%  (p=0.000 n=10+9)
FindBitRange64/PatternBBBBBBBB00000001Size32-16  44.9ns ± 0%   2.9ns ± 0%   -93.51%  (p=0.000 n=10+9)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize2-16   4.19ns ± 0%  2.26ns ± 0%   -46.10%  (p=0.000 n=10+10)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize8-16   76.5ns ± 0%   2.9ns ± 0%   -96.19%  (p=0.000 n=8+7)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize32-16  76.5ns ± 0%   2.9ns ± 0%   -96.19%  (p=0.000 n=10+8)
FindBitRange64/Pattern4444444444444444Size2-16   76.4ns ± 0%   2.3ns ± 0%   -97.04%  (p=0.000 n=8+10)
FindBitRange64/Pattern4444444444444444Size8-16   76.5ns ± 0%   2.1ns ± 0%   -97.23%  (p=0.000 n=9+10)
FindBitRange64/Pattern4444444444444444Size32-16  76.5ns ± 0%   2.1ns ± 0%   -97.23%  (p=0.000 n=8+10)
FindBitRange64/Pattern4040404040404040Size2-16   40.3ns ± 0%   2.3ns ± 0%   -94.38%  (p=0.000 n=7+10)
FindBitRange64/Pattern4040404040404040Size8-16   40.2ns ± 0%   2.1ns ± 0%   -94.75%  (p=0.000 n=10+10)
FindBitRange64/Pattern4040404040404040Size32-16  40.2ns ± 0%   2.1ns ± 0%   -94.76%  (p=0.000 n=10+6)
FindBitRange64/Pattern4000400040004000Size2-16   22.2ns ± 0%   2.2ns ± 0%   -89.86%  (p=0.001 n=8+9)
FindBitRange64/Pattern4000400040004000Size8-16   22.2ns ± 0%   2.1ns ± 0%   -90.52%  (p=0.000 n=8+10)
FindBitRange64/Pattern4000400040004000Size32-16  22.2ns ± 1%   2.1ns ± 0%   -90.50%  (p=0.000 n=10+10)

The cases that slow down aren't really that slow, and those inputs
never actually occur (there's a short circuit before the call to
findBitRange64 for that case).

Change-Id: I50fae62915098032d8ce7fa57ef29eee9deb01ba
Reviewed-on: https://go-review.googlesource.com/c/go/+/241279
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mpallocbits.go      | 41 ++++++++++++++++++++++++++++++-----------
 src/runtime/mpallocbits_test.go | 33 +++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 13 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go
index ff79bfbc1a..ff112300c3 100644
--- a/src/runtime/mpallocbits.go
+++ b/src/runtime/mpallocbits.go
@@ -218,7 +218,7 @@ outer:
 // find searches for npages contiguous free pages in pallocBits and returns
 // the index where that run starts, as well as the index of the first free page
 // it found in the search. searchIdx represents the first known free page and
-// where to begin the search from.
+// where to begin the next search from.
 //
 // If find fails to find any free space, it returns an index of ^uint(0) and
 // the new searchIdx should be ignored.
@@ -239,9 +239,10 @@ func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) {
 //
 // See find for an explanation of the searchIdx parameter.
 func (b *pallocBits) find1(searchIdx uint) uint {
+	_ = b[0] // lift nil check out of loop
 	for i := searchIdx / 64; i < uint(len(b)); i++ {
 		x := b[i]
-		if x == ^uint64(0) {
+		if ^x == 0 {
 			continue
 		}
 		return i*64 + uint(sys.TrailingZeros64(^x))
@@ -263,18 +264,18 @@ func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) {
 	end, newSearchIdx := uint(0), ^uint(0)
 	for i := searchIdx / 64; i < uint(len(b)); i++ {
 		bi := b[i]
-		if bi == ^uint64(0) {
+		if ^bi == 0 {
 			end = 0
 			continue
 		}
 		// First see if we can pack our allocation in the trailing
 		// zeros plus the end of the last 64 bits.
-		start := uint(sys.TrailingZeros64(bi))
 		if newSearchIdx == ^uint(0) {
 			// The new searchIdx is going to be at these 64 bits after any
 			// 1s we file, so count trailing 1s.
 			newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi))
 		}
+		start := uint(sys.TrailingZeros64(bi))
 		if end+start >= uint(npages) {
 			return i*64 - end, newSearchIdx
 		}
@@ -369,15 +370,33 @@ func (b *pallocBits) pages64(i uint) uint64 {
 // findBitRange64 returns the bit index of the first set of
 // n consecutive 1 bits. If no consecutive set of 1 bits of
 // size n may be found in c, then it returns an integer >= 64.
+// n must be > 0.
 func findBitRange64(c uint64, n uint) uint {
-	i := uint(0)
-	cont := uint(sys.TrailingZeros64(^c))
-	for cont < n && i < 64 {
-		i += cont
-		i += uint(sys.TrailingZeros64(c >> i))
-		cont = uint(sys.TrailingZeros64(^(c >> i)))
+	// This implementation is based on shrinking the length of
+	// runs of contiguous 1 bits. We remove the top n-1 1 bits
+	// from each run of 1s, then look for the first remaining 1 bit.
+	p := n - 1   // number of 1s we want to remove.
+	k := uint(1) // current minimum width of runs of 0 in c.
+	for p > 0 {
+		if p <= k {
+			// Shift p 0s down into the top of each run of 1s.
+			c &= c >> (p & 63)
+			break
+		}
+		// Shift k 0s down into the top of each run of 1s.
+		c &= c >> (k & 63)
+		if c == 0 {
+			return 64
+		}
+		p -= k
+		// We've just doubled the minimum length of 0-runs.
+		// This allows us to shift farther in the next iteration.
+		k *= 2
 	}
-	return i
+	// Find first remaining 1.
+	// Since we shrunk from the top down, the first 1 is in
+	// its correct original position.
+	return uint(sys.TrailingZeros64(c))
 }
 
 // pallocData encapsulates pallocBits and a bitmap for
diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go
index 42268a1698..5095e24220 100644
--- a/src/runtime/mpallocbits_test.go
+++ b/src/runtime/mpallocbits_test.go
@@ -504,10 +504,9 @@ func TestFindBitRange64(t *testing.T) {
 			t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result)
 		}
 	}
-	for i := uint(0); i <= 64; i++ {
+	for i := uint(1); i <= 64; i++ {
 		check(^uint64(0), i, 0)
 	}
-	check(0, 0, 0)
 	for i := uint(1); i <= 64; i++ {
 		check(0, i, ^uint(0))
 	}
@@ -520,3 +519,33 @@ func TestFindBitRange64(t *testing.T) {
 	check(0xffff03ff0107ffff, 16, 0)
 	check(0x0fff03ff01079fff, 16, ^uint(0))
 }
+
+func BenchmarkFindBitRange64(b *testing.B) {
+	patterns := []uint64{
+		0,
+		^uint64(0),
+		0xaa,
+		0xaaaaaaaaaaaaaaaa,
+		0x80000000aaaaaaaa,
+		0xaaaaaaaa00000001,
+		0xbbbbbbbbbbbbbbbb,
+		0x80000000bbbbbbbb,
+		0xbbbbbbbb00000001,
+		0xcccccccccccccccc,
+		0x4444444444444444,
+		0x4040404040404040,
+		0x4000400040004000,
+	}
+	sizes := []uint{
+		2, 8, 32,
+	}
+	for _, pattern := range patterns {
+		for _, size := range sizes {
+			b.Run(fmt.Sprintf("Pattern%02XSize%d", pattern, size), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					FindBitRange64(pattern, size)
+				}
+			})
+		}
+	}
+}
-- 
cgit v1.3


From 7fbd8c75c6c57e713069a3a405e5cde26cfae090 Mon Sep 17 00:00:00 2001
From: lihaowei <haoweili35@gmail.com>
Date: Fri, 14 Aug 2020 10:35:46 +0000
Subject: all: fix spelling mistakes

Change-Id: I7d512281d8442d306594b57b5deaecd132b5ea9e
GitHub-Last-Rev: 251e1d6857516b21fd71f654133f81f23ffec654
GitHub-Pull-Request: golang/go#40793
Reviewed-on: https://go-review.googlesource.com/c/go/+/248441
Reviewed-by: Dave Cheney <dave@cheney.net>
---
 src/bufio/bufio.go     | 2 +-
 src/net/http/client.go | 2 +-
 src/runtime/mheap.go   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go
index 7cbd5424ea..6baf9b9e40 100644
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@@ -425,7 +425,7 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) {
 // of bytes in the combined first two elements, error).
 // The complete result is equal to
 // `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a
-// length of `totalLen`. The result is strucured in this way to allow callers
+// length of `totalLen`. The result is structured in this way to allow callers
 // to minimize allocations and copies.
 func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) {
 	var frag []byte
diff --git a/src/net/http/client.go b/src/net/http/client.go
index 3860d97d8f..6ca0d2e6cf 100644
--- a/src/net/http/client.go
+++ b/src/net/http/client.go
@@ -321,7 +321,7 @@ func knownRoundTripperImpl(rt RoundTripper, req *Request) bool {
 		return true
 	}
 	// There's a very minor chance of a false positive with this.
-	// Insted of detecting our golang.org/x/net/http2.Transport,
+	// Instead of detecting our golang.org/x/net/http2.Transport,
 	// it might detect a Transport type in a different http2
 	// package. But I know of none, and the only problem would be
 	// some temporarily leaked goroutines if the transport didn't
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index cb586171c4..1a57bcd66e 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -42,7 +42,7 @@ const (
 	// roughly 100µs.
 	//
 	// Must be a multiple of the pageInUse bitmap element size and
-	// must also evenly divid pagesPerArena.
+	// must also evenly divide pagesPerArena.
 	pagesPerReclaimerChunk = 512
 )
 
-- 
cgit v1.3


From 613388315e29d4e906805e602602500ca1e7e334 Mon Sep 17 00:00:00 2001
From: Cholerae Hu <choleraehyq@gmail.com>
Date: Mon, 11 May 2020 11:18:57 +0800
Subject: runtime: reduce critical path in injectglist

Change-Id: Ia3fb30ac9add39c803f11f69d967c6604fdeacf8
Reviewed-on: https://go-review.googlesource.com/c/go/+/233217
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/proc.go | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 9a358cd529..5e38b3194c 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2575,15 +2575,20 @@ func injectglist(glist *gList) {
 		return
 	}
 
-	lock(&sched.lock)
-	npidle := int(sched.npidle)
+	npidle := int(atomic.Load(&sched.npidle))
+	var globq gQueue
 	var n int
 	for n = 0; n < npidle && !q.empty(); n++ {
-		globrunqput(q.pop())
+		g := q.pop()
+		globq.pushBack(g)
+	}
+	if n > 0 {
+		lock(&sched.lock)
+		globrunqputbatch(&globq, int32(n))
+		unlock(&sched.lock)
+		startIdle(n)
+		qsize -= n
 	}
-	unlock(&sched.lock)
-	startIdle(n)
-	qsize -= n
 
 	if !q.empty() {
 		runqputbatch(pp, &q, qsize)
-- 
cgit v1.3


From 4149493443f09c14d9f0fad7030704ed57149b55 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 17 Aug 2020 16:32:33 +0200
Subject: runtime: move startupRandomData declaration to os_linux.go

startupRandomData is only used in sysauxv and getRandomData on linux,
thus move it closer to where it is used. Also adjust its godoc comment.

Change-Id: Ice51d579ec33436adbfdf247caf4ba00bae865e0
Reviewed-on: https://go-review.googlesource.com/c/go/+/248761
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/os_linux.go | 4 ++++
 src/runtime/runtime2.go | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 22931b4d5c..9702920bcf 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -249,6 +249,10 @@ func sysargs(argc int32, argv **byte) {
 	sysauxv(buf[:])
 }
 
+// startupRandomData holds random bytes initialized at startup. These come from
+// the ELF AT_RANDOM auxiliary vector.
+var startupRandomData []byte
+
 func sysauxv(auxv []uintptr) int {
 	var i int
 	for ; auxv[i] != _AT_NULL; i += 2 {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0bddcaa789..959878400d 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -845,10 +845,6 @@ type forcegcstate struct {
 	idle uint32
 }
 
-// startup_random_data holds random bytes initialized at startup. These come from
-// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
-var startupRandomData []byte
-
 // extendRandom extends the random numbers in r[:n] to the whole slice r.
 // Treats n<0 as n==0.
 func extendRandom(r []byte, n int) {
-- 
cgit v1.3


From b58d29741650c7bf10b17f455666e2727e1cdd2e Mon Sep 17 00:00:00 2001
From: Cherry Zhang <cherryyz@google.com>
Date: Mon, 17 Aug 2020 19:06:19 -0400
Subject: cmd/compile, runtime: mark R12 clobbered for write barrier call on
 PPC64

When external linking, for large binaries, the external linker
may insert a trampoline for the write barrier call, which looks

0000000005a98cc8 <__long_branch_runtime.gcWriteBarrier>:
 5a98cc8:       86 01 82 3d     addis   r12,r2,390
 5a98ccc:       d8 bd 8c e9     ld      r12,-16936(r12)
 5a98cd0:       a6 03 89 7d     mtctr   r12
 5a98cd4:       20 04 80 4e     bctr

It clobbers R12 (and CTR, which is never live across a call).

As at compile time we don't know whether the binary is big and
what link mode will be used, I think we need to mark R12 as
clobbered for write barrier call. For extra safety (future-proof)
we mark caller-saved register that cannot be used for function
arguments, which includes R11, as potentially clobbered as well.

Fixes #40851.

Change-Id: Iedd901c5072f1127cc59b0a48cfeb4aaec81b519
Reviewed-on: https://go-review.googlesource.com/c/go/+/248917
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/cmd/compile/internal/ssa/gen/PPC64Ops.go |  4 +--
 src/cmd/compile/internal/ssa/opGen.go        |  2 +-
 src/runtime/asm_ppc64x.s                     | 41 ++++++++++++++--------------
 3 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index f8bc6cb20b..0261dc283b 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -645,9 +645,9 @@ func init() {
 		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
 
 		// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
-		// It preserves R0 through R15, g, and its arguments R20 and R21,
+		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
 		// but may clobber anything else, including R31 (REGTMP).
-		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
+		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
 
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 408c855dbd..df2a27368b 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -26889,7 +26889,7 @@ var opcodeTable = [...]opInfo{
 				{0, 1048576}, // R20
 				{1, 2097152}, // R21
 			},
-			clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+			clobbers: 576460746931312640, // R11 R12 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
 		},
 	},
 	{
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 11d2f2f51a..23387a2165 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -916,23 +916,23 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
 // - R20 is the destination of the write
 // - R21 is the value being written at R20.
 // It clobbers condition codes.
-// It does not clobber R0 through R15,
+// It does not clobber R0 through R17 (except special registers),
 // but may clobber any other register, *including* R31.
 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112
 	// The standard prologue clobbers R31.
-	// We use R16 and R17 as scratch registers.
-	MOVD	g_m(g), R16
-	MOVD	m_p(R16), R16
-	MOVD	(p_wbBuf+wbBuf_next)(R16), R17
+	// We use R18 and R19 as scratch registers.
+	MOVD	g_m(g), R18
+	MOVD	m_p(R18), R18
+	MOVD	(p_wbBuf+wbBuf_next)(R18), R19
 	// Increment wbBuf.next position.
-	ADD	$16, R17
-	MOVD	R17, (p_wbBuf+wbBuf_next)(R16)
-	MOVD	(p_wbBuf+wbBuf_end)(R16), R16
-	CMP	R16, R17
+	ADD	$16, R19
+	MOVD	R19, (p_wbBuf+wbBuf_next)(R18)
+	MOVD	(p_wbBuf+wbBuf_end)(R18), R18
+	CMP	R18, R19
 	// Record the write.
-	MOVD	R21, -16(R17)	// Record value
-	MOVD	(R20), R16	// TODO: This turns bad writes into bad reads.
-	MOVD	R16, -8(R17)	// Record *slot
+	MOVD	R21, -16(R19)	// Record value
+	MOVD	(R20), R18	// TODO: This turns bad writes into bad reads.
+	MOVD	R18, -8(R19)	// Record *slot
 	// Is the buffer full? (flags set in CMP above)
 	BEQ	flush
 ret:
@@ -956,11 +956,12 @@ flush:
 	MOVD	R8, (FIXED_FRAME+56)(R1)
 	MOVD	R9, (FIXED_FRAME+64)(R1)
 	MOVD	R10, (FIXED_FRAME+72)(R1)
-	MOVD	R11, (FIXED_FRAME+80)(R1)
-	MOVD	R12, (FIXED_FRAME+88)(R1)
+	// R11, R12 may be clobbered by external-linker-inserted trampoline
 	// R13 is REGTLS
-	MOVD	R14, (FIXED_FRAME+96)(R1)
-	MOVD	R15, (FIXED_FRAME+104)(R1)
+	MOVD	R14, (FIXED_FRAME+80)(R1)
+	MOVD	R15, (FIXED_FRAME+88)(R1)
+	MOVD	R16, (FIXED_FRAME+96)(R1)
+	MOVD	R17, (FIXED_FRAME+104)(R1)
 
 	// This takes arguments R20 and R21.
 	CALL	runtime·wbBufFlush(SB)
@@ -975,10 +976,10 @@ flush:
 	MOVD	(FIXED_FRAME+56)(R1), R8
 	MOVD	(FIXED_FRAME+64)(R1), R9
 	MOVD	(FIXED_FRAME+72)(R1), R10
-	MOVD	(FIXED_FRAME+80)(R1), R11
-	MOVD	(FIXED_FRAME+88)(R1), R12
-	MOVD	(FIXED_FRAME+96)(R1), R14
-	MOVD	(FIXED_FRAME+104)(R1), R15
+	MOVD	(FIXED_FRAME+80)(R1), R14
+	MOVD	(FIXED_FRAME+88)(R1), R15
+	MOVD	(FIXED_FRAME+96)(R1), R16
+	MOVD	(FIXED_FRAME+104)(R1), R17
 	JMP	ret
 
 // Note: these functions use a special calling convention to save generated code space.
-- 
cgit v1.3


From 30a68bfb806b5217932e280f5a5f521237e69077 Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Mon, 27 Jul 2020 12:40:18 -0700
Subject: runtime: add "success" field to sudog

The current wakeup protocol for channel communications is that the
second goroutine sets gp.param to the sudog when a value is
successfully communicated over the channel, and to nil when the wakeup
is due to closing the channel.

Setting nil to indicate channel closure works okay for chansend and
chanrecv, because they're only communicating with one channel, so they
know it must be the channel that was closed. However, it means
selectgo has to re-poll all of the channels to figure out which one
was closed.

This commit adds a "success" field to sudog, and changes the wakeup
protocol to always set gp.param to sg, and to use sg.success to
indicate successful communication vs channel closure.

While here, this also reorganizes the chansend code slightly so that
the sudog is still released to the pool if the send blocks and then is
awoken because the channel closed.

Updates #40410.

Change-Id: I6cd9a20ebf9febe370a15af1b8afe24c5539efc6
Reviewed-on: https://go-review.googlesource.com/c/go/+/245019
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/chan.go     | 25 +++++++++++++++----------
 src/runtime/runtime2.go |  6 ++++++
 src/runtime/select.go   | 19 +++++++------------
 3 files changed, 28 insertions(+), 22 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index f6f4ffd02e..0afe5d962b 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -263,18 +263,19 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 	}
 	gp.waiting = nil
 	gp.activeStackChans = false
-	if gp.param == nil {
-		if c.closed == 0 {
-			throw("chansend: spurious wakeup")
-		}
-		panic(plainError("send on closed channel"))
-	}
+	closed := !mysg.success
 	gp.param = nil
 	if mysg.releasetime > 0 {
 		blockevent(mysg.releasetime-t0, 2)
 	}
 	mysg.c = nil
 	releaseSudog(mysg)
+	if closed {
+		if c.closed == 0 {
+			throw("chansend: spurious wakeup")
+		}
+		panic(plainError("send on closed channel"))
+	}
 	return true
 }
 
@@ -311,6 +312,7 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
 	gp := sg.g
 	unlockf()
 	gp.param = unsafe.Pointer(sg)
+	sg.success = true
 	if sg.releasetime != 0 {
 		sg.releasetime = cputicks()
 	}
@@ -384,7 +386,8 @@ func closechan(c *hchan) {
 			sg.releasetime = cputicks()
 		}
 		gp := sg.g
-		gp.param = nil
+		gp.param = unsafe.Pointer(sg)
+		sg.success = false
 		if raceenabled {
 			raceacquireg(gp, c.raceaddr())
 		}
@@ -402,7 +405,8 @@ func closechan(c *hchan) {
 			sg.releasetime = cputicks()
 		}
 		gp := sg.g
-		gp.param = nil
+		gp.param = unsafe.Pointer(sg)
+		sg.success = false
 		if raceenabled {
 			raceacquireg(gp, c.raceaddr())
 		}
@@ -575,11 +579,11 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 	if mysg.releasetime > 0 {
 		blockevent(mysg.releasetime-t0, 2)
 	}
-	closed := gp.param == nil
+	success := mysg.success
 	gp.param = nil
 	mysg.c = nil
 	releaseSudog(mysg)
-	return true, !closed
+	return true, success
 }
 
 // recv processes a receive operation on a full channel c.
@@ -632,6 +636,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
 	gp := sg.g
 	unlockf()
 	gp.param = unsafe.Pointer(sg)
+	sg.success = true
 	if sg.releasetime != 0 {
 		sg.releasetime = cputicks()
 	}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 959878400d..b7d0739e54 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -366,6 +366,12 @@ type sudog struct {
 	// g.selectDone must be CAS'd to win the wake-up race.
 	isSelect bool
 
+	// success indicates whether communication over channel c
+	// succeeded. It is true if the goroutine was awoken because a
+	// value was delivered over channel c, and false if awoken
+	// because c was closed.
+	success bool
+
 	parent   *sudog // semaRoot binary tree
 	waitlink *sudog // g.waiting list or semaRoot
 	waittail *sudog // semaRoot
diff --git a/src/runtime/select.go b/src/runtime/select.go
index a069e3e050..081db7bad4 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -221,12 +221,12 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 		nextp  **sudog
 	)
 
-loop:
 	// pass 1 - look for something already waiting
 	var dfli int
 	var dfl *scase
 	var casi int
 	var cas *scase
+	var caseSuccess bool
 	var recvOK bool
 	for i := 0; i < ncases; i++ {
 		casi = int(pollorder[i])
@@ -331,6 +331,7 @@ loop:
 	// We singly-linked up the SudoGs in lock order.
 	casi = -1
 	cas = nil
+	caseSuccess = false
 	sglist = gp.waiting
 	// Clear all elem before unlinking from gp.waiting.
 	for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
@@ -352,6 +353,7 @@ loop:
 			// sg has already been dequeued by the G that woke us up.
 			casi = int(casei)
 			cas = k
+			caseSuccess = sglist.success
 		} else {
 			c = k.c
 			if k.kind == caseSend {
@@ -367,16 +369,7 @@ loop:
 	}
 
 	if cas == nil {
-		// We can wake up with gp.param == nil (so cas == nil)
-		// when a channel involved in the select has been closed.
-		// It is easiest to loop and re-run the operation;
-		// we'll see that it's now closed.
-		// Maybe some day we can signal the close explicitly,
-		// but we'd have to distinguish close-on-reader from close-on-writer.
-		// It's easiest not to duplicate the code and just recheck above.
-		// We know that something closed, and things never un-close,
-		// so we won't block again.
-		goto loop
+		throw("selectgo: bad wakeup")
 	}
 
 	c = cas.c
@@ -386,7 +379,9 @@ loop:
 	}
 
 	if cas.kind == caseRecv {
-		recvOK = true
+		recvOK = caseSuccess
+	} else if cas.kind == caseSend && !caseSuccess {
+		goto sclose
 	}
 
 	if raceenabled {
-- 
cgit v1.3


From 78a1064d5dd05fc669342df3a6a5e11d49749d85 Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Mon, 27 Jul 2020 14:05:05 -0700
Subject: runtime: remove scase.releasetime field

selectgo will report at most one block event, so there's no need to
keep a releasetime for every select case. It suffices to simply track
the releasetime of the case responsible for the wakeup.

Updates #40410.

Change-Id: I72679cd43dde80d7e6dbab21a78952a4372d1e79
Reviewed-on: https://go-review.googlesource.com/c/go/+/245122
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/select.go |  1 -
 src/runtime/select.go                 | 23 ++++++++++-------------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go
index 49cc23cd3d..eb5ff8469b 100644
--- a/src/cmd/compile/internal/gc/select.go
+++ b/src/cmd/compile/internal/gc/select.go
@@ -386,7 +386,6 @@ func scasetype() *types.Type {
 			namedfield("elem", types.Types[TUNSAFEPTR]),
 			namedfield("kind", types.Types[TUINT16]),
 			namedfield("pc", types.Types[TUINTPTR]),
-			namedfield("releasetime", types.Types[TINT64]),
 		})
 		scase.SetNoalg(true)
 	}
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 081db7bad4..2f8b139155 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -26,11 +26,10 @@ const (
 // Known to compiler.
 // Changes here must also be made in src/cmd/internal/gc/select.go's scasetype.
 type scase struct {
-	c           *hchan         // chan
-	elem        unsafe.Pointer // data element
-	kind        uint16
-	pc          uintptr // race pc (for race detector / msan)
-	releasetime int64
+	c    *hchan         // chan
+	elem unsafe.Pointer // data element
+	kind uint16
+	pc   uintptr // race pc (for race detector / msan)
 }
 
 var (
@@ -142,9 +141,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	var t0 int64
 	if blockprofilerate > 0 {
 		t0 = cputicks()
-		for i := 0; i < ncases; i++ {
-			scases[i].releasetime = -1
-		}
 	}
 
 	// The compiler rewrites selects that statically have
@@ -227,6 +223,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	var casi int
 	var cas *scase
 	var caseSuccess bool
+	var caseReleaseTime int64 = -1
 	var recvOK bool
 	for i := 0; i < ncases; i++ {
 		casi = int(pollorder[i])
@@ -346,14 +343,14 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 		if k.kind == caseNil {
 			continue
 		}
-		if sglist.releasetime > 0 {
-			k.releasetime = sglist.releasetime
-		}
 		if sg == sglist {
 			// sg has already been dequeued by the G that woke us up.
 			casi = int(casei)
 			cas = k
 			caseSuccess = sglist.success
+			if sglist.releasetime > 0 {
+				caseReleaseTime = sglist.releasetime
+			}
 		} else {
 			c = k.c
 			if k.kind == caseSend {
@@ -483,8 +480,8 @@ send:
 	goto retc
 
 retc:
-	if cas.releasetime > 0 {
-		blockevent(cas.releasetime-t0, 1)
+	if caseReleaseTime > 0 {
+		blockevent(caseReleaseTime-t0, 1)
 	}
 	return casi, recvOK
 
-- 
cgit v1.3


From 8a984e8e3f2cf4101f448ea9b9d9880b9e83c11e Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Mon, 27 Jul 2020 14:47:47 -0700
Subject: runtime: omit nil-channel cases from selectgo's orders

Currently, selectgo does an initial pass over the cases array to look
for entries with nil channels, so they can be easily recognized and
skipped later on. But this still involves actually visiting the cases.

This commit changes selectgo to omit cases with nil channels when
constructing pollorder, so that they'll be skipped over entirely later
on. It also checks for caseDefault up front, which will facilitate
changing it to use a "block bool" parameter instead.

Updates #40410.

Change-Id: Icaebcb8f08df03cc33b6d8087616fb5585f7fedd
Reviewed-on: https://go-review.googlesource.com/c/go/+/245123
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/select.go | 66 +++++++++++++++++++++------------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/select.go b/src/runtime/select.go
index 2f8b139155..d540dd2e69 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -45,7 +45,7 @@ func sellock(scases []scase, lockorder []uint16) {
 	var c *hchan
 	for _, o := range lockorder {
 		c0 := scases[o].c
-		if c0 != nil && c0 != c {
+		if c0 != c {
 			c = c0
 			lock(&c.lock)
 		}
@@ -61,11 +61,8 @@ func selunlock(scases []scase, lockorder []uint16) {
 	// the G that calls select runnable again and schedules it for execution.
 	// When the G runs on another M, it locks all the locks and frees sel.
 	// Now if the first M touches sel, it will access freed memory.
-	for i := len(scases) - 1; i >= 0; i-- {
+	for i := len(lockorder) - 1; i >= 0; i-- {
 		c := scases[lockorder[i]].c
-		if c == nil {
-			break
-		}
 		if i > 0 && c == scases[lockorder[i-1]].c {
 			continue // will unlock it on the next iteration
 		}
@@ -129,15 +126,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	pollorder := order1[:ncases:ncases]
 	lockorder := order1[ncases:][:ncases:ncases]
 
-	// Replace send/receive cases involving nil channels with
-	// caseNil so logic below can assume non-nil channel.
-	for i := range scases {
-		cas := &scases[i]
-		if cas.c == nil && cas.kind != caseDefault {
-			*cas = scase{}
-		}
-	}
-
 	var t0 int64
 	if blockprofilerate > 0 {
 		t0 = cputicks()
@@ -152,15 +140,31 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	// optimizing (and needing to test).
 
 	// generate permuted order
-	for i := 1; i < ncases; i++ {
-		j := fastrandn(uint32(i + 1))
-		pollorder[i] = pollorder[j]
+	dfli := -1
+	norder := 0
+	for i := range scases {
+		cas := &scases[i]
+
+		// Omit cases without channels from the poll and lock orders.
+		if cas.c == nil {
+			if cas.kind == caseDefault {
+				dfli = i
+			}
+			cas.elem = nil // allow GC
+			continue
+		}
+
+		j := fastrandn(uint32(norder + 1))
+		pollorder[norder] = pollorder[j]
 		pollorder[j] = uint16(i)
+		norder++
 	}
+	pollorder = pollorder[:norder]
+	lockorder = lockorder[:norder]
 
 	// sort the cases by Hchan address to get the locking order.
 	// simple heap sort, to guarantee n log n time and constant stack footprint.
-	for i := 0; i < ncases; i++ {
+	for i := range lockorder {
 		j := i
 		// Start with the pollorder to permute cases on the same channel.
 		c := scases[pollorder[i]].c
@@ -171,7 +175,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 		}
 		lockorder[j] = pollorder[i]
 	}
-	for i := ncases - 1; i >= 0; i-- {
+	for i := len(lockorder) - 1; i >= 0; i-- {
 		o := lockorder[i]
 		c := scases[o].c
 		lockorder[i] = lockorder[0]
@@ -195,7 +199,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	}
 
 	if debugSelect {
-		for i := 0; i+1 < ncases; i++ {
+		for i := 0; i+1 < len(lockorder); i++ {
 			if scases[lockorder[i]].c.sortkey() > scases[lockorder[i+1]].c.sortkey() {
 				print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n")
 				throw("select: broken sort")
@@ -218,22 +222,17 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	)
 
 	// pass 1 - look for something already waiting
-	var dfli int
-	var dfl *scase
 	var casi int
 	var cas *scase
 	var caseSuccess bool
 	var caseReleaseTime int64 = -1
 	var recvOK bool
-	for i := 0; i < ncases; i++ {
-		casi = int(pollorder[i])
+	for _, casei := range pollorder {
+		casi = int(casei)
 		cas = &scases[casi]
 		c = cas.c
 
 		switch cas.kind {
-		case caseNil:
-			continue
-
 		case caseRecv:
 			sg = c.sendq.dequeue()
 			if sg != nil {
@@ -260,17 +259,12 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 			if c.qcount < c.dataqsiz {
 				goto bufsend
 			}
-
-		case caseDefault:
-			dfli = casi
-			dfl = cas
 		}
 	}
 
-	if dfl != nil {
+	if dfli >= 0 {
 		selunlock(scases, lockorder)
 		casi = dfli
-		cas = dfl
 		goto retc
 	}
 
@@ -283,9 +277,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	for _, casei := range lockorder {
 		casi = int(casei)
 		cas = &scases[casi]
-		if cas.kind == caseNil {
-			continue
-		}
 		c = cas.c
 		sg := acquireSudog()
 		sg.g = gp
@@ -340,9 +331,6 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 
 	for _, casei := range lockorder {
 		k = &scases[casei]
-		if k.kind == caseNil {
-			continue
-		}
 		if sg == sglist {
 			// sg has already been dequeued by the G that woke us up.
 			casi = int(casei)
-- 
cgit v1.3


From d36bc7d78ad226b20056c08fb8bca041e25b3d1d Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Mon, 27 Jul 2020 15:20:18 -0700
Subject: runtime: split PCs out of scase

Per-case PCs are only needed for race detector builds, so this allows
skipping allocating stack space for them for non-race builds.

It's possible to arrange the PCs and order arrays consecutively in
memory so that we could just reuse the order0 pointer to identify
both. However, there's more risk of that silently going wrong, so this
commit passes them as separate arguments for now. We can revisit this
in the future.

Updates #40410.

Change-Id: I8468bc25749e559891cb0cb007d1cc4a40fdd0f8
Reviewed-on: https://go-review.googlesource.com/c/go/+/245124
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/builtin.go         | 199 +++++++++++++------------
 src/cmd/compile/internal/gc/builtin/runtime.go |   4 +-
 src/cmd/compile/internal/gc/select.go          |  18 ++-
 src/runtime/select.go                          |  50 +++++--
 4 files changed, 153 insertions(+), 118 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go
index 2cf2f4687e..eafdb0ebe7 100644
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -126,74 +126,74 @@ var runtimeDecls = [...]struct {
 	{"selectnbsend", funcTag, 94},
 	{"selectnbrecv", funcTag, 95},
 	{"selectnbrecv2", funcTag, 97},
-	{"selectsetpc", funcTag, 62},
-	{"selectgo", funcTag, 98},
+	{"selectsetpc", funcTag, 98},
+	{"selectgo", funcTag, 99},
 	{"block", funcTag, 9},
-	{"makeslice", funcTag, 99},
-	{"makeslice64", funcTag, 100},
-	{"makeslicecopy", funcTag, 101},
-	{"growslice", funcTag, 103},
-	{"memmove", funcTag, 104},
-	{"memclrNoHeapPointers", funcTag, 105},
-	{"memclrHasPointers", funcTag, 105},
-	{"memequal", funcTag, 106},
-	{"memequal0", funcTag, 107},
-	{"memequal8", funcTag, 107},
-	{"memequal16", funcTag, 107},
-	{"memequal32", funcTag, 107},
-	{"memequal64", funcTag, 107},
-	{"memequal128", funcTag, 107},
-	{"f32equal", funcTag, 108},
-	{"f64equal", funcTag, 108},
-	{"c64equal", funcTag, 108},
-	{"c128equal", funcTag, 108},
-	{"strequal", funcTag, 108},
-	{"interequal", funcTag, 108},
-	{"nilinterequal", funcTag, 108},
-	{"memhash", funcTag, 109},
-	{"memhash0", funcTag, 110},
-	{"memhash8", funcTag, 110},
-	{"memhash16", funcTag, 110},
-	{"memhash32", funcTag, 110},
-	{"memhash64", funcTag, 110},
-	{"memhash128", funcTag, 110},
-	{"f32hash", funcTag, 110},
-	{"f64hash", funcTag, 110},
-	{"c64hash", funcTag, 110},
-	{"c128hash", funcTag, 110},
-	{"strhash", funcTag, 110},
-	{"interhash", funcTag, 110},
-	{"nilinterhash", funcTag, 110},
-	{"int64div", funcTag, 111},
-	{"uint64div", funcTag, 112},
-	{"int64mod", funcTag, 111},
-	{"uint64mod", funcTag, 112},
-	{"float64toint64", funcTag, 113},
-	{"float64touint64", funcTag, 114},
-	{"float64touint32", funcTag, 115},
-	{"int64tofloat64", funcTag, 116},
-	{"uint64tofloat64", funcTag, 117},
-	{"uint32tofloat64", funcTag, 118},
-	{"complex128div", funcTag, 119},
-	{"racefuncenter", funcTag, 120},
+	{"makeslice", funcTag, 100},
+	{"makeslice64", funcTag, 101},
+	{"makeslicecopy", funcTag, 102},
+	{"growslice", funcTag, 104},
+	{"memmove", funcTag, 105},
+	{"memclrNoHeapPointers", funcTag, 106},
+	{"memclrHasPointers", funcTag, 106},
+	{"memequal", funcTag, 107},
+	{"memequal0", funcTag, 108},
+	{"memequal8", funcTag, 108},
+	{"memequal16", funcTag, 108},
+	{"memequal32", funcTag, 108},
+	{"memequal64", funcTag, 108},
+	{"memequal128", funcTag, 108},
+	{"f32equal", funcTag, 109},
+	{"f64equal", funcTag, 109},
+	{"c64equal", funcTag, 109},
+	{"c128equal", funcTag, 109},
+	{"strequal", funcTag, 109},
+	{"interequal", funcTag, 109},
+	{"nilinterequal", funcTag, 109},
+	{"memhash", funcTag, 110},
+	{"memhash0", funcTag, 111},
+	{"memhash8", funcTag, 111},
+	{"memhash16", funcTag, 111},
+	{"memhash32", funcTag, 111},
+	{"memhash64", funcTag, 111},
+	{"memhash128", funcTag, 111},
+	{"f32hash", funcTag, 111},
+	{"f64hash", funcTag, 111},
+	{"c64hash", funcTag, 111},
+	{"c128hash", funcTag, 111},
+	{"strhash", funcTag, 111},
+	{"interhash", funcTag, 111},
+	{"nilinterhash", funcTag, 111},
+	{"int64div", funcTag, 112},
+	{"uint64div", funcTag, 113},
+	{"int64mod", funcTag, 112},
+	{"uint64mod", funcTag, 113},
+	{"float64toint64", funcTag, 114},
+	{"float64touint64", funcTag, 115},
+	{"float64touint32", funcTag, 116},
+	{"int64tofloat64", funcTag, 117},
+	{"uint64tofloat64", funcTag, 118},
+	{"uint32tofloat64", funcTag, 119},
+	{"complex128div", funcTag, 120},
+	{"racefuncenter", funcTag, 121},
 	{"racefuncenterfp", funcTag, 9},
 	{"racefuncexit", funcTag, 9},
-	{"raceread", funcTag, 120},
-	{"racewrite", funcTag, 120},
-	{"racereadrange", funcTag, 121},
-	{"racewriterange", funcTag, 121},
-	{"msanread", funcTag, 121},
-	{"msanwrite", funcTag, 121},
-	{"checkptrAlignment", funcTag, 122},
-	{"checkptrArithmetic", funcTag, 124},
-	{"libfuzzerTraceCmp1", funcTag, 126},
-	{"libfuzzerTraceCmp2", funcTag, 128},
-	{"libfuzzerTraceCmp4", funcTag, 129},
-	{"libfuzzerTraceCmp8", funcTag, 130},
-	{"libfuzzerTraceConstCmp1", funcTag, 126},
-	{"libfuzzerTraceConstCmp2", funcTag, 128},
-	{"libfuzzerTraceConstCmp4", funcTag, 129},
-	{"libfuzzerTraceConstCmp8", funcTag, 130},
+	{"raceread", funcTag, 121},
+	{"racewrite", funcTag, 121},
+	{"racereadrange", funcTag, 122},
+	{"racewriterange", funcTag, 122},
+	{"msanread", funcTag, 122},
+	{"msanwrite", funcTag, 122},
+	{"checkptrAlignment", funcTag, 123},
+	{"checkptrArithmetic", funcTag, 125},
+	{"libfuzzerTraceCmp1", funcTag, 127},
+	{"libfuzzerTraceCmp2", funcTag, 129},
+	{"libfuzzerTraceCmp4", funcTag, 130},
+	{"libfuzzerTraceCmp8", funcTag, 131},
+	{"libfuzzerTraceConstCmp1", funcTag, 127},
+	{"libfuzzerTraceConstCmp2", funcTag, 129},
+	{"libfuzzerTraceConstCmp4", funcTag, 130},
+	{"libfuzzerTraceConstCmp8", funcTag, 131},
 	{"x86HasPOPCNT", varTag, 6},
 	{"x86HasSSE41", varTag, 6},
 	{"x86HasFMA", varTag, 6},
@@ -202,7 +202,7 @@ var runtimeDecls = [...]struct {
 }
 
 func runtimeTypes() []*types.Type {
-	var typs [131]*types.Type
+	var typs [132]*types.Type
 	typs[0] = types.Bytetype
 	typs[1] = types.NewPtr(typs[0])
 	typs[2] = types.Types[TANY]
@@ -301,38 +301,39 @@ func runtimeTypes() []*types.Type {
 	typs[95] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[84])}, []*Node{anonfield(typs[6])})
 	typs[96] = types.NewPtr(typs[6])
 	typs[97] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[96]), anonfield(typs[84])}, []*Node{anonfield(typs[6])})
-	typs[98] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
-	typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])})
-	typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])})
-	typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])})
-	typs[102] = types.NewSlice(typs[2])
-	typs[103] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[102]), anonfield(typs[15])}, []*Node{anonfield(typs[102])})
-	typs[104] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil)
-	typs[105] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil)
-	typs[106] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])})
-	typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
-	typs[108] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
-	typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
-	typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
-	typs[111] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])})
-	typs[112] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])})
-	typs[113] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])})
-	typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])})
-	typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[65])})
-	typs[116] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])})
-	typs[117] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])})
-	typs[118] = functype(nil, []*Node{anonfield(typs[65])}, []*Node{anonfield(typs[20])})
-	typs[119] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])})
-	typs[120] = functype(nil, []*Node{anonfield(typs[5])}, nil)
-	typs[121] = functype(nil, []*Node{anonfield(typs[5]), anonfield(typs[5])}, nil)
-	typs[122] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[1]), anonfield(typs[5])}, nil)
-	typs[123] = types.NewSlice(typs[7])
-	typs[124] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[123])}, nil)
-	typs[125] = types.Types[TUINT8]
-	typs[126] = functype(nil, []*Node{anonfield(typs[125]), anonfield(typs[125])}, nil)
-	typs[127] = types.Types[TUINT16]
-	typs[128] = functype(nil, []*Node{anonfield(typs[127]), anonfield(typs[127])}, nil)
-	typs[129] = functype(nil, []*Node{anonfield(typs[65]), anonfield(typs[65])}, nil)
-	typs[130] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, nil)
+	typs[98] = functype(nil, []*Node{anonfield(typs[63])}, nil)
+	typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
+	typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])})
+	typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])})
+	typs[102] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])})
+	typs[103] = types.NewSlice(typs[2])
+	typs[104] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[103]), anonfield(typs[15])}, []*Node{anonfield(typs[103])})
+	typs[105] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil)
+	typs[106] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil)
+	typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])})
+	typs[108] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
+	typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
+	typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
+	typs[111] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
+	typs[112] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])})
+	typs[113] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])})
+	typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])})
+	typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])})
+	typs[116] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[65])})
+	typs[117] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])})
+	typs[118] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])})
+	typs[119] = functype(nil, []*Node{anonfield(typs[65])}, []*Node{anonfield(typs[20])})
+	typs[120] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])})
+	typs[121] = functype(nil, []*Node{anonfield(typs[5])}, nil)
+	typs[122] = functype(nil, []*Node{anonfield(typs[5]), anonfield(typs[5])}, nil)
+	typs[123] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[1]), anonfield(typs[5])}, nil)
+	typs[124] = types.NewSlice(typs[7])
+	typs[125] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[124])}, nil)
+	typs[126] = types.Types[TUINT8]
+	typs[127] = functype(nil, []*Node{anonfield(typs[126]), anonfield(typs[126])}, nil)
+	typs[128] = types.Types[TUINT16]
+	typs[129] = functype(nil, []*Node{anonfield(typs[128]), anonfield(typs[128])}, nil)
+	typs[130] = functype(nil, []*Node{anonfield(typs[65]), anonfield(typs[65])}, nil)
+	typs[131] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, nil)
 	return typs[:]
 }
diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go
index 00448272c5..25f86efdd6 100644
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -169,8 +169,8 @@ func selectnbsend(hchan chan<- any, elem *any) bool
 func selectnbrecv(elem *any, hchan <-chan any) bool
 func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool
 
-func selectsetpc(cas *byte)
-func selectgo(cas0 *byte, order0 *byte, ncases int) (int, bool)
+func selectsetpc(pc *uintptr)
+func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, ncases int) (int, bool)
 func block()
 
 func makeslice(typ *byte, len int, cap int) unsafe.Pointer
diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go
index eb5ff8469b..8eb31eb5c1 100644
--- a/src/cmd/compile/internal/gc/select.go
+++ b/src/cmd/compile/internal/gc/select.go
@@ -271,6 +271,14 @@ func walkselectcases(cases *Nodes) []*Node {
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
+	var pc0, pcs *Node
+	if flag_race {
+		pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(n)))
+		pc0 = typecheck(nod(OADDR, nod(OINDEX, pcs, nodintconst(0)), nil), ctxExpr)
+	} else {
+		pc0 = nodnil()
+	}
+
 	// register cases
 	for i, cas := range cases.Slice() {
 		setlineno(cas)
@@ -324,8 +332,8 @@ func walkselectcases(cases *Nodes) []*Node {
 
 		// TODO(mdempsky): There should be a cleaner way to
 		// handle this.
-		if instrumenting {
-			r = mkcall("selectsetpc", nil, nil, bytePtrToIndex(selv, int64(i)))
+		if flag_race {
+			r = mkcall("selectsetpc", nil, nil, nod(OADDR, nod(OINDEX, pcs, nodintconst(int64(i))), nil))
 			init = append(init, r)
 		}
 	}
@@ -337,13 +345,16 @@ func walkselectcases(cases *Nodes) []*Node {
 	r = nod(OAS2, nil, nil)
 	r.List.Set2(chosen, recvOK)
 	fn := syslook("selectgo")
-	r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), nodintconst(int64(n))))
+	r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(n))))
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
 	// selv and order are no longer alive after selectgo.
 	init = append(init, nod(OVARKILL, selv, nil))
 	init = append(init, nod(OVARKILL, order, nil))
+	if flag_race {
+		init = append(init, nod(OVARKILL, pcs, nil))
+	}
 
 	// dispatch cases
 	for i, cas := range cases.Slice() {
@@ -385,7 +396,6 @@ func scasetype() *types.Type {
 			namedfield("c", types.Types[TUNSAFEPTR]),
 			namedfield("elem", types.Types[TUNSAFEPTR]),
 			namedfield("kind", types.Types[TUINT16]),
-			namedfield("pc", types.Types[TUINTPTR]),
 		})
 		scase.SetNoalg(true)
 	}
diff --git a/src/runtime/select.go b/src/runtime/select.go
index d540dd2e69..d7c7d9f26f 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -29,7 +29,6 @@ type scase struct {
 	c    *hchan         // chan
 	elem unsafe.Pointer // data element
 	kind uint16
-	pc   uintptr // race pc (for race detector / msan)
 }
 
 var (
@@ -37,8 +36,8 @@ var (
 	chanrecvpc = funcPC(chanrecv)
 )
 
-func selectsetpc(cas *scase) {
-	cas.pc = getcallerpc()
+func selectsetpc(pc *uintptr) {
+	*pc = getcallerpc()
 }
 
 func sellock(scases []scase, lockorder []uint16) {
@@ -108,11 +107,15 @@ func block() {
 // Both reside on the goroutine's stack (regardless of any escaping in
 // selectgo).
 //
+// For race detector builds, pc0 points to an array of type
+// [ncases]uintptr (also on the stack); for other builds, it's set to
+// nil.
+//
 // selectgo returns the index of the chosen scase, which matches the
 // ordinal position of its respective select{recv,send,default} call.
 // Also, if the chosen scase was a receive operation, it reports whether
 // a value was received.
-func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
+func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) {
 	if debugSelect {
 		print("select: cas0=", cas0, "\n")
 	}
@@ -126,6 +129,21 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	pollorder := order1[:ncases:ncases]
 	lockorder := order1[ncases:][:ncases:ncases]
 
+	// Even when raceenabled is true, there might be select
+	// statements in packages compiled without -race (e.g.,
+	// ensureSigM in runtime/signal_unix.go).
+	var pcs []uintptr
+	if raceenabled && pc0 != nil {
+		pc1 := (*[1 << 16]uintptr)(unsafe.Pointer(pc0))
+		pcs = pc1[:ncases:ncases]
+	}
+	casePC := func(casi int) uintptr {
+		if pcs == nil {
+			return 0
+		}
+		return pcs[casi]
+	}
+
 	var t0 int64
 	if blockprofilerate > 0 {
 		t0 = cputicks()
@@ -247,7 +265,7 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 
 		case caseSend:
 			if raceenabled {
-				racereadpc(c.raceaddr(), cas.pc, chansendpc)
+				racereadpc(c.raceaddr(), casePC(casi), chansendpc)
 			}
 			if c.closed != 0 {
 				goto sclose
@@ -371,9 +389,9 @@ func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 
 	if raceenabled {
 		if cas.kind == caseRecv && cas.elem != nil {
-			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
+			raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc)
 		} else if cas.kind == caseSend {
-			raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+			raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
 		}
 	}
 	if msanenabled {
@@ -391,7 +409,7 @@ bufrecv:
 	// can receive from buffer
 	if raceenabled {
 		if cas.elem != nil {
-			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
+			raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc)
 		}
 		raceacquire(chanbuf(c, c.recvx))
 		racerelease(chanbuf(c, c.recvx))
@@ -418,7 +436,7 @@ bufsend:
 	if raceenabled {
 		raceacquire(chanbuf(c, c.sendx))
 		racerelease(chanbuf(c, c.sendx))
-		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+		raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
 	}
 	if msanenabled {
 		msanread(cas.elem, c.elemtype.size)
@@ -456,7 +474,7 @@ rclose:
 send:
 	// can send to a sleeping receiver (sg)
 	if raceenabled {
-		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+		raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
 	}
 	if msanenabled {
 		msanread(cas.elem, c.elemtype.size)
@@ -519,12 +537,18 @@ func reflect_rselect(cases []runtimeSelect) (int, bool) {
 		case selectRecv:
 			sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val}
 		}
-		if raceenabled || msanenabled {
-			selectsetpc(&sel[i])
+	}
+
+	var pc0 *uintptr
+	if raceenabled {
+		pcs := make([]uintptr, len(cases))
+		for i := range pcs {
+			selectsetpc(&pcs[i])
 		}
+		pc0 = &pcs[0]
 	}
 
-	return selectgo(&sel[0], &order[0], len(cases))
+	return selectgo(&sel[0], &order[0], pc0, len(cases))
 }
 
 func (q *waitq) dequeueSudoG(sgp *sudog) {
-- 
cgit v1.3


From fe23ba4a145ce8465d16ea2a92b9a7e96e15c28e Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Mon, 27 Jul 2020 16:19:15 -0700
Subject: runtime: eliminate scase.kind field

Currently, we include a "kind" field on scase to distinguish the three
kinds of cases in a select statement: sends, receives, and defaults.

This commit removes by kind field by instead arranging for the
compiler to always place sends before receives, and to provide their
counts separately. It also passes an explicit "block bool" parameter
to avoid needing to include a default case in the array.

It's safe to shuffle cases like this because the runtime will
randomize the order they're polled in anyway.

Fixes #40410.

Change-Id: Iaeaed4cf7bddd576d78f2c863bd91a03a5c82df2
Reviewed-on: https://go-review.googlesource.com/c/go/+/245125
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/builtin.go         |   2 +-
 src/cmd/compile/internal/gc/builtin/runtime.go |   2 +-
 src/cmd/compile/internal/gc/select.go          | 108 +++++++++++++------------
 src/reflect/all_test.go                        |   8 ++
 src/runtime/select.go                          | 104 +++++++++++++-----------
 5 files changed, 123 insertions(+), 101 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go
index eafdb0ebe7..861ffaaa5b 100644
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -302,7 +302,7 @@ func runtimeTypes() []*types.Type {
 	typs[96] = types.NewPtr(typs[6])
 	typs[97] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[96]), anonfield(typs[84])}, []*Node{anonfield(typs[6])})
 	typs[98] = functype(nil, []*Node{anonfield(typs[63])}, nil)
-	typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
+	typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[6])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
 	typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])})
 	typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])})
 	typs[102] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])})
diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go
index 25f86efdd6..635da80f7c 100644
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -170,7 +170,7 @@ func selectnbrecv(elem *any, hchan <-chan any) bool
 func selectnbrecv2(elem *any, received *bool, hchan <-chan any) bool
 
 func selectsetpc(pc *uintptr)
-func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, ncases int) (int, bool)
+func selectgo(cas0 *byte, order0 *byte, pc0 *uintptr, nsends int, nrecvs int, block bool) (int, bool)
 func block()
 
 func makeslice(typ *byte, len int, cap int) unsafe.Pointer
diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go
index 8eb31eb5c1..bae7ed30e2 100644
--- a/src/cmd/compile/internal/gc/select.go
+++ b/src/cmd/compile/internal/gc/select.go
@@ -106,18 +106,16 @@ func walkselect(sel *Node) {
 }
 
 func walkselectcases(cases *Nodes) []*Node {
-	n := cases.Len()
+	ncas := cases.Len()
 	sellineno := lineno
 
 	// optimization: zero-case select
-	if n == 0 {
+	if ncas == 0 {
 		return []*Node{mkcall("block", nil, nil)}
 	}
 
 	// optimization: one-case select: single op.
-	// TODO(rsc): Reenable optimization once order.go can handle it.
-	// golang.org/issue/7672.
-	if n == 1 {
+	if ncas == 1 {
 		cas := cases.First()
 		setlineno(cas)
 		l := cas.Ninit.Slice()
@@ -178,10 +176,12 @@ func walkselectcases(cases *Nodes) []*Node {
 
 	// convert case value arguments to addresses.
 	// this rewrite is used by both the general code and the next optimization.
+	var dflt *Node
 	for _, cas := range cases.Slice() {
 		setlineno(cas)
 		n := cas.Left
 		if n == nil {
+			dflt = cas
 			continue
 		}
 		switch n.Op {
@@ -202,15 +202,10 @@ func walkselectcases(cases *Nodes) []*Node {
 	}
 
 	// optimization: two-case select but one is default: single non-blocking op.
-	if n == 2 && (cases.First().Left == nil || cases.Second().Left == nil) {
-		var cas *Node
-		var dflt *Node
-		if cases.First().Left == nil {
+	if ncas == 2 && dflt != nil {
+		cas := cases.First()
+		if cas == dflt {
 			cas = cases.Second()
-			dflt = cases.First()
-		} else {
-			dflt = cases.Second()
-			cas = cases.First()
 		}
 
 		n := cas.Left
@@ -257,74 +252,73 @@ func walkselectcases(cases *Nodes) []*Node {
 		return []*Node{r, nod(OBREAK, nil, nil)}
 	}
 
+	if dflt != nil {
+		ncas--
+	}
+	casorder := make([]*Node, ncas)
+	nsends, nrecvs := 0, 0
+
 	var init []*Node
 
 	// generate sel-struct
 	lineno = sellineno
-	selv := temp(types.NewArray(scasetype(), int64(n)))
+	selv := temp(types.NewArray(scasetype(), int64(ncas)))
 	r := nod(OAS, selv, nil)
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
-	order := temp(types.NewArray(types.Types[TUINT16], 2*int64(n)))
+	order := temp(types.NewArray(types.Types[TUINT16], 2*int64(ncas)))
 	r = nod(OAS, order, nil)
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
 	var pc0, pcs *Node
 	if flag_race {
-		pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(n)))
+		pcs = temp(types.NewArray(types.Types[TUINTPTR], int64(ncas)))
 		pc0 = typecheck(nod(OADDR, nod(OINDEX, pcs, nodintconst(0)), nil), ctxExpr)
 	} else {
 		pc0 = nodnil()
 	}
 
 	// register cases
-	for i, cas := range cases.Slice() {
+	for _, cas := range cases.Slice() {
 		setlineno(cas)
 
 		init = append(init, cas.Ninit.Slice()...)
 		cas.Ninit.Set(nil)
 
-		// Keep in sync with runtime/select.go.
-		const (
-			caseNil = iota
-			caseRecv
-			caseSend
-			caseDefault
-		)
+		n := cas.Left
+		if n == nil { // default:
+			continue
+		}
 
+		var i int
 		var c, elem *Node
-		var kind int64 = caseDefault
-
-		if n := cas.Left; n != nil {
-			init = append(init, n.Ninit.Slice()...)
-
-			switch n.Op {
-			default:
-				Fatalf("select %v", n.Op)
-			case OSEND:
-				kind = caseSend
-				c = n.Left
-				elem = n.Right
-			case OSELRECV, OSELRECV2:
-				kind = caseRecv
-				c = n.Right.Left
-				elem = n.Left
-			}
+		switch n.Op {
+		default:
+			Fatalf("select %v", n.Op)
+		case OSEND:
+			i = nsends
+			nsends++
+			c = n.Left
+			elem = n.Right
+		case OSELRECV, OSELRECV2:
+			nrecvs++
+			i = ncas - nrecvs
+			c = n.Right.Left
+			elem = n.Left
 		}
 
+		casorder[i] = cas
+
 		setField := func(f string, val *Node) {
 			r := nod(OAS, nodSym(ODOT, nod(OINDEX, selv, nodintconst(int64(i))), lookup(f)), val)
 			r = typecheck(r, ctxStmt)
 			init = append(init, r)
 		}
 
-		setField("kind", nodintconst(kind))
-		if c != nil {
-			c = convnop(c, types.Types[TUNSAFEPTR])
-			setField("c", c)
-		}
+		c = convnop(c, types.Types[TUNSAFEPTR])
+		setField("c", c)
 		if elem != nil {
 			elem = convnop(elem, types.Types[TUNSAFEPTR])
 			setField("elem", elem)
@@ -337,6 +331,9 @@ func walkselectcases(cases *Nodes) []*Node {
 			init = append(init, r)
 		}
 	}
+	if nsends+nrecvs != ncas {
+		Fatalf("walkselectcases: miscount: %v + %v != %v", nsends, nrecvs, ncas)
+	}
 
 	// run the select
 	lineno = sellineno
@@ -345,7 +342,7 @@ func walkselectcases(cases *Nodes) []*Node {
 	r = nod(OAS2, nil, nil)
 	r.List.Set2(chosen, recvOK)
 	fn := syslook("selectgo")
-	r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(n))))
+	r.Rlist.Set1(mkcall1(fn, fn.Type.Results(), nil, bytePtrToIndex(selv, 0), bytePtrToIndex(order, 0), pc0, nodintconst(int64(nsends)), nodintconst(int64(nrecvs)), nodbool(dflt == nil)))
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
@@ -357,14 +354,11 @@ func walkselectcases(cases *Nodes) []*Node {
 	}
 
 	// dispatch cases
-	for i, cas := range cases.Slice() {
-		setlineno(cas)
-
-		cond := nod(OEQ, chosen, nodintconst(int64(i)))
+	dispatch := func(cond, cas *Node) {
 		cond = typecheck(cond, ctxExpr)
 		cond = defaultlit(cond, nil)
 
-		r = nod(OIF, cond, nil)
+		r := nod(OIF, cond, nil)
 
 		if n := cas.Left; n != nil && n.Op == OSELRECV2 {
 			x := nod(OAS, n.List.First(), recvOK)
@@ -377,6 +371,15 @@ func walkselectcases(cases *Nodes) []*Node {
 		init = append(init, r)
 	}
 
+	if dflt != nil {
+		setlineno(dflt)
+		dispatch(nod(OLT, chosen, nodintconst(0)), dflt)
+	}
+	for i, cas := range casorder {
+		setlineno(cas)
+		dispatch(nod(OEQ, chosen, nodintconst(int64(i))), cas)
+	}
+
 	return init
 }
 
@@ -395,7 +398,6 @@ func scasetype() *types.Type {
 		scase = tostruct([]*Node{
 			namedfield("c", types.Types[TUNSAFEPTR]),
 			namedfield("elem", types.Types[TUNSAFEPTR]),
-			namedfield("kind", types.Types[TUINT16]),
 		})
 		scase.SetNoalg(true)
 	}
diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go
index ed2f225077..5a12699472 100644
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -1725,6 +1725,14 @@ func TestSelectMaxCases(t *testing.T) {
 	_, _, _ = Select(sCases)
 }
 
+func TestSelectNop(t *testing.T) {
+	// "select { default: }" should always return the default case.
+	chosen, _, _ := Select([]SelectCase{{Dir: SelectDefault}})
+	if chosen != 0 {
+		t.Fatalf("expected Select to return 0, but got %#v", chosen)
+	}
+}
+
 func BenchmarkSelect(b *testing.B) {
 	channel := make(chan int)
 	close(channel)
diff --git a/src/runtime/select.go b/src/runtime/select.go
index d7c7d9f26f..80768b285b 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -12,23 +12,12 @@ import (
 
 const debugSelect = false
 
-// scase.kind values.
-// Known to compiler.
-// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselectcases.
-const (
-	caseNil = iota
-	caseRecv
-	caseSend
-	caseDefault
-)
-
 // Select case descriptor.
 // Known to compiler.
 // Changes here must also be made in src/cmd/internal/gc/select.go's scasetype.
 type scase struct {
 	c    *hchan         // chan
 	elem unsafe.Pointer // data element
-	kind uint16
 }
 
 var (
@@ -115,7 +104,7 @@ func block() {
 // ordinal position of its respective select{recv,send,default} call.
 // Also, if the chosen scase was a receive operation, it reports whether
 // a value was received.
-func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool) {
+func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, block bool) (int, bool) {
 	if debugSelect {
 		print("select: cas0=", cas0, "\n")
 	}
@@ -125,6 +114,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 	cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0))
 	order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0))
 
+	ncases := nsends + nrecvs
 	scases := cas1[:ncases:ncases]
 	pollorder := order1[:ncases:ncases]
 	lockorder := order1[ncases:][:ncases:ncases]
@@ -158,16 +148,12 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 	// optimizing (and needing to test).
 
 	// generate permuted order
-	dfli := -1
 	norder := 0
 	for i := range scases {
 		cas := &scases[i]
 
 		// Omit cases without channels from the poll and lock orders.
 		if cas.c == nil {
-			if cas.kind == caseDefault {
-				dfli = i
-			}
 			cas.elem = nil // allow GC
 			continue
 		}
@@ -250,8 +236,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 		cas = &scases[casi]
 		c = cas.c
 
-		switch cas.kind {
-		case caseRecv:
+		if casi >= nsends {
 			sg = c.sendq.dequeue()
 			if sg != nil {
 				goto recv
@@ -262,8 +247,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 			if c.closed != 0 {
 				goto rclose
 			}
-
-		case caseSend:
+		} else {
 			if raceenabled {
 				racereadpc(c.raceaddr(), casePC(casi), chansendpc)
 			}
@@ -280,9 +264,9 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 		}
 	}
 
-	if dfli >= 0 {
+	if !block {
 		selunlock(scases, lockorder)
-		casi = dfli
+		casi = -1
 		goto retc
 	}
 
@@ -311,12 +295,10 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 		*nextp = sg
 		nextp = &sg.waitlink
 
-		switch cas.kind {
-		case caseRecv:
-			c.recvq.enqueue(sg)
-
-		case caseSend:
+		if casi < nsends {
 			c.sendq.enqueue(sg)
+		} else {
+			c.recvq.enqueue(sg)
 		}
 	}
 
@@ -359,7 +341,7 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 			}
 		} else {
 			c = k.c
-			if k.kind == caseSend {
+			if int(casei) < nsends {
 				c.sendq.dequeueSudoG(sglist)
 			} else {
 				c.recvq.dequeueSudoG(sglist)
@@ -378,27 +360,29 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, ncases int) (int, bool)
 	c = cas.c
 
 	if debugSelect {
-		print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
+		print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " send=", casi < nsends, "\n")
 	}
 
-	if cas.kind == caseRecv {
+	if casi < nsends {
+		if !caseSuccess {
+			goto sclose
+		}
+	} else {
 		recvOK = caseSuccess
-	} else if cas.kind == caseSend && !caseSuccess {
-		goto sclose
 	}
 
 	if raceenabled {
-		if cas.kind == caseRecv && cas.elem != nil {
-			raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc)
-		} else if cas.kind == caseSend {
+		if casi < nsends {
 			raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
+		} else if cas.elem != nil {
+			raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc)
 		}
 	}
 	if msanenabled {
-		if cas.kind == caseRecv && cas.elem != nil {
-			msanwrite(cas.elem, c.elemtype.size)
-		} else if cas.kind == caseSend {
+		if casi < nsends {
 			msanread(cas.elem, c.elemtype.size)
+		} else if cas.elem != nil {
+			msanwrite(cas.elem, c.elemtype.size)
 		}
 	}
 
@@ -526,29 +510,57 @@ func reflect_rselect(cases []runtimeSelect) (int, bool) {
 		block()
 	}
 	sel := make([]scase, len(cases))
-	order := make([]uint16, 2*len(cases))
-	for i := range cases {
-		rc := &cases[i]
+	orig := make([]int, len(cases))
+	nsends, nrecvs := 0, 0
+	dflt := -1
+	for i, rc := range cases {
+		var j int
 		switch rc.dir {
 		case selectDefault:
-			sel[i] = scase{kind: caseDefault}
+			dflt = i
+			continue
 		case selectSend:
-			sel[i] = scase{kind: caseSend, c: rc.ch, elem: rc.val}
+			j = nsends
+			nsends++
 		case selectRecv:
-			sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val}
+			nrecvs++
+			j = len(cases) - nrecvs
 		}
+
+		sel[j] = scase{c: rc.ch, elem: rc.val}
+		orig[j] = i
 	}
 
+	// Only a default case.
+	if nsends+nrecvs == 0 {
+		return dflt, false
+	}
+
+	// Compact sel and orig if necessary.
+	if nsends+nrecvs < len(cases) {
+		copy(sel[nsends:], sel[len(cases)-nrecvs:])
+		copy(orig[nsends:], orig[len(cases)-nrecvs:])
+	}
+
+	order := make([]uint16, 2*(nsends+nrecvs))
 	var pc0 *uintptr
 	if raceenabled {
-		pcs := make([]uintptr, len(cases))
+		pcs := make([]uintptr, nsends+nrecvs)
 		for i := range pcs {
 			selectsetpc(&pcs[i])
 		}
 		pc0 = &pcs[0]
 	}
 
-	return selectgo(&sel[0], &order[0], pc0, len(cases))
+	chosen, recvOK := selectgo(&sel[0], &order[0], pc0, nsends, nrecvs, dflt == -1)
+
+	// Translate chosen back to caller's ordering.
+	if chosen < 0 {
+		chosen = dflt
+	} else {
+		chosen = orig[chosen]
+	}
+	return chosen, recvOK
 }
 
 func (q *waitq) dequeueSudoG(sgp *sudog) {
-- 
cgit v1.3


From 0941fc3f9ff43598d25fa6e964e7829a268102bf Mon Sep 17 00:00:00 2001
From: cui <cuiweixie@gmail.com>
Date: Wed, 12 Aug 2020 17:33:41 +0000
Subject: runtime: reduce syscall when call runtime.clone

Change-Id: I3ea398fd86aae4c86557dd6fff65d90a6f756890
GitHub-Last-Rev: 4c295388f7b5e6768ffd2530337f78b4c75a9310
GitHub-Pull-Request: golang/go#40392
Reviewed-on: https://go-review.googlesource.com/c/go/+/244626
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/sys_linux_amd64.s | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index 621c01b365..8d90813589 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -592,13 +592,25 @@ TEXT runtime·clone(SB),NOSPLIT,$0
 	MOVQ	stk+8(FP), SI
 	MOVQ	$0, DX
 	MOVQ	$0, R10
-
+	MOVQ    $0, R8
 	// Copy mp, gp, fn off parent stack for use by child.
 	// Careful: Linux system call clobbers CX and R11.
-	MOVQ	mp+16(FP), R8
+	MOVQ	mp+16(FP), R13
 	MOVQ	gp+24(FP), R9
 	MOVQ	fn+32(FP), R12
-
+	CMPQ	R13, $0    // m
+	JEQ	nog1
+	CMPQ	R9, $0    // g
+	JEQ	nog1
+	LEAQ	m_tls(R13), R8
+#ifdef GOOS_android
+	// Android stores the TLS offset in runtime·tls_g.
+	SUBQ	runtime·tls_g(SB), R8
+#else
+	ADDQ	$8, R8	// ELF wants to use -8(FS)
+#endif
+	ORQ 	$0x00080000, DI //add flag CLONE_SETTLS(0x00080000) to call clone
+nog1:
 	MOVL	$SYS_clone, AX
 	SYSCALL
 
@@ -612,27 +624,23 @@ TEXT runtime·clone(SB),NOSPLIT,$0
 	MOVQ	SI, SP
 
 	// If g or m are nil, skip Go-related setup.
-	CMPQ	R8, $0    // m
-	JEQ	nog
+	CMPQ	R13, $0    // m
+	JEQ	nog2
 	CMPQ	R9, $0    // g
-	JEQ	nog
+	JEQ	nog2
 
 	// Initialize m->procid to Linux tid
 	MOVL	$SYS_gettid, AX
 	SYSCALL
-	MOVQ	AX, m_procid(R8)
-
-	// Set FS to point at m->tls.
-	LEAQ	m_tls(R8), DI
-	CALL	runtime·settls(SB)
+	MOVQ	AX, m_procid(R13)
 
 	// In child, set up new stack
 	get_tls(CX)
-	MOVQ	R8, g_m(R9)
+	MOVQ	R13, g_m(R9)
 	MOVQ	R9, g(CX)
 	CALL	runtime·stackcheck(SB)
 
-nog:
+nog2:
 	// Call fn
 	CALL	R12
 
-- 
cgit v1.3


From bd519d0c8734c3e30cb1a8b8217dd9934cd61e25 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Thu, 25 Jun 2020 14:50:10 -0700
Subject: runtime: don't call setitimer for each thread

Previously, on Unix systems, when the profiler was enabled or disabled,
we called setitimer once per thread. With this change we instead call
it once per process.

Change-Id: I90f0189b562e11232816390dc7d55ed154bd836d
Reviewed-on: https://go-review.googlesource.com/c/go/+/240003
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/signal_unix.go | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index 6a11c91fb9..064a0ea100 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -272,6 +272,12 @@ func setProcessCPUProfiler(hz int32) {
 			atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF))
 			setsig(_SIGPROF, funcPC(sighandler))
 		}
+
+		var it itimerval
+		it.it_interval.tv_sec = 0
+		it.it_interval.set_usec(1000000 / hz)
+		it.it_value = it.it_interval
+		setitimer(_ITIMER_PROF, &it, nil)
 	} else {
 		// If the Go signal handler should be disabled by default,
 		// switch back to the signal handler that was installed
@@ -296,23 +302,16 @@ func setProcessCPUProfiler(hz int32) {
 				setsig(_SIGPROF, h)
 			}
 		}
+
+		setitimer(_ITIMER_PROF, &itimerval{}, nil)
 	}
 }
 
 // setThreadCPUProfiler makes any thread-specific changes required to
 // implement profiling at a rate of hz.
+// No changes required on Unix systems.
 func setThreadCPUProfiler(hz int32) {
-	var it itimerval
-	if hz == 0 {
-		setitimer(_ITIMER_PROF, &it, nil)
-	} else {
-		it.it_interval.tv_sec = 0
-		it.it_interval.set_usec(1000000 / hz)
-		it.it_value = it.it_interval
-		setitimer(_ITIMER_PROF, &it, nil)
-	}
-	_g_ := getg()
-	_g_.m.profilehz = hz
+	getg().m.profilehz = hz
 }
 
 func sigpipe() {
-- 
cgit v1.3


From 6b420169d798c7ebe733487b56ea5c3fa4aab5ce Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Tue, 18 Aug 2020 16:46:24 -0700
Subject: os, internal/poll: loop on EINTR for all file syscalls

When using a FUSE file system, any system call that touches the file
system can return EINTR.

Fixes #40846

Change-Id: I25d32da22cec08dea81ab297291a85ad72db2df7
Reviewed-on: https://go-review.googlesource.com/c/go/+/249178
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
---
 src/internal/poll/fd_fsync_posix.go    |  4 +++-
 src/internal/poll/fd_opendir_darwin.go |  8 ++++++-
 src/internal/poll/fd_posix.go          | 28 +++++++++++++++++++++---
 src/internal/poll/fd_unix.go           | 22 ++++++++-----------
 src/os/dir_darwin.go                   |  3 +++
 src/os/file.go                         |  5 ++++-
 src/os/file_plan9.go                   |  4 ++++
 src/os/file_posix.go                   | 32 ++++++++++++++++++++++++---
 src/os/file_unix.go                    | 40 +++++++++++++++++++++++++++-------
 src/os/getwd.go                        | 11 +++++++++-
 src/os/stat_unix.go                    |  8 +++++--
 src/runtime/trace/trace_stack_test.go  |  2 +-
 12 files changed, 133 insertions(+), 34 deletions(-)

(limited to 'src/runtime')

diff --git a/src/internal/poll/fd_fsync_posix.go b/src/internal/poll/fd_fsync_posix.go
index 69358297f4..dd7956f14d 100644
--- a/src/internal/poll/fd_fsync_posix.go
+++ b/src/internal/poll/fd_fsync_posix.go
@@ -14,5 +14,7 @@ func (fd *FD) Fsync() error {
 		return err
 	}
 	defer fd.decref()
-	return syscall.Fsync(fd.Sysfd)
+	return ignoringEINTR(func() error {
+		return syscall.Fsync(fd.Sysfd)
+	})
 }
diff --git a/src/internal/poll/fd_opendir_darwin.go b/src/internal/poll/fd_opendir_darwin.go
index c7d3318c72..8eb770c358 100644
--- a/src/internal/poll/fd_opendir_darwin.go
+++ b/src/internal/poll/fd_opendir_darwin.go
@@ -19,7 +19,13 @@ func (fd *FD) OpenDir() (uintptr, string, error) {
 	if err != nil {
 		return 0, call, err
 	}
-	dir, err := fdopendir(fd2)
+	var dir uintptr
+	for {
+		dir, err = fdopendir(fd2)
+		if err != syscall.EINTR {
+			break
+		}
+	}
 	if err != nil {
 		syscall.Close(fd2)
 		return 0, "fdopendir", err
diff --git a/src/internal/poll/fd_posix.go b/src/internal/poll/fd_posix.go
index 54747b4c99..e5fb05c9c2 100644
--- a/src/internal/poll/fd_posix.go
+++ b/src/internal/poll/fd_posix.go
@@ -35,7 +35,9 @@ func (fd *FD) Fchmod(mode uint32) error {
 		return err
 	}
 	defer fd.decref()
-	return syscall.Fchmod(fd.Sysfd, mode)
+	return ignoringEINTR(func() error {
+		return syscall.Fchmod(fd.Sysfd, mode)
+	})
 }
 
 // Fchown wraps syscall.Fchown.
@@ -44,7 +46,9 @@ func (fd *FD) Fchown(uid, gid int) error {
 		return err
 	}
 	defer fd.decref()
-	return syscall.Fchown(fd.Sysfd, uid, gid)
+	return ignoringEINTR(func() error {
+		return syscall.Fchown(fd.Sysfd, uid, gid)
+	})
 }
 
 // Ftruncate wraps syscall.Ftruncate.
@@ -53,7 +57,9 @@ func (fd *FD) Ftruncate(size int64) error {
 		return err
 	}
 	defer fd.decref()
-	return syscall.Ftruncate(fd.Sysfd, size)
+	return ignoringEINTR(func() error {
+		return syscall.Ftruncate(fd.Sysfd, size)
+	})
 }
 
 // RawControl invokes the user-defined function f for a non-IO
@@ -66,3 +72,19 @@ func (fd *FD) RawControl(f func(uintptr)) error {
 	f(uintptr(fd.Sysfd))
 	return nil
 }
+
+// ignoringEINTR makes a function call and repeats it if it returns
+// an EINTR error. This appears to be required even though we install all
+// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
+// Also #20400 and #36644 are issues in which a signal handler is
+// installed without setting SA_RESTART. None of these are the common case,
+// but there are enough of them that it seems that we can't avoid
+// an EINTR loop.
+func ignoringEINTR(fn func() error) error {
+	for {
+		err := fn()
+		if err != syscall.EINTR {
+			return err
+		}
+	}
+}
diff --git a/src/internal/poll/fd_unix.go b/src/internal/poll/fd_unix.go
index 4872fa9851..1d5101eac3 100644
--- a/src/internal/poll/fd_unix.go
+++ b/src/internal/poll/fd_unix.go
@@ -152,7 +152,7 @@ func (fd *FD) Read(p []byte) (int, error) {
 		p = p[:maxRW]
 	}
 	for {
-		n, err := ignoringEINTR(syscall.Read, fd.Sysfd, p)
+		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
 		if err != nil {
 			n = 0
 			if err == syscall.EAGAIN && fd.pd.pollable() {
@@ -264,7 +264,7 @@ func (fd *FD) Write(p []byte) (int, error) {
 		if fd.IsStream && max-nn > maxRW {
 			max = nn + maxRW
 		}
-		n, err := ignoringEINTR(syscall.Write, fd.Sysfd, p[nn:max])
+		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
 		if n > 0 {
 			nn += n
 		}
@@ -423,7 +423,7 @@ func (fd *FD) ReadDirent(buf []byte) (int, error) {
 	}
 	defer fd.decref()
 	for {
-		n, err := ignoringEINTR(syscall.ReadDirent, fd.Sysfd, buf)
+		n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
 		if err != nil {
 			n = 0
 			if err == syscall.EAGAIN && fd.pd.pollable() {
@@ -452,7 +452,9 @@ func (fd *FD) Fstat(s *syscall.Stat_t) error {
 		return err
 	}
 	defer fd.decref()
-	return syscall.Fstat(fd.Sysfd, s)
+	return ignoringEINTR(func() error {
+		return syscall.Fstat(fd.Sysfd, s)
+	})
 }
 
 // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
@@ -514,7 +516,7 @@ func (fd *FD) WriteOnce(p []byte) (int, error) {
 		return 0, err
 	}
 	defer fd.writeUnlock()
-	return ignoringEINTR(syscall.Write, fd.Sysfd, p)
+	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
 }
 
 // RawRead invokes the user-defined function f for a read operation.
@@ -555,14 +557,8 @@ func (fd *FD) RawWrite(f func(uintptr) bool) error {
 	}
 }
 
-// ignoringEINTR makes a function call and repeats it if it returns
-// an EINTR error. This appears to be required even though we install
-// all signal handlers with SA_RESTART: see #22838, #38033, #38836.
-// Also #20400 and #36644 are issues in which a signal handler is
-// installed without setting SA_RESTART. None of these are the common case,
-// but there are enough of them that it seems that we can't avoid
-// an EINTR loop.
-func ignoringEINTR(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
+// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
+func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
 	for {
 		n, err := fn(fd, p)
 		if err != syscall.EINTR {
diff --git a/src/os/dir_darwin.go b/src/os/dir_darwin.go
index 2f9ba78d68..87797e2dda 100644
--- a/src/os/dir_darwin.go
+++ b/src/os/dir_darwin.go
@@ -47,6 +47,9 @@ func (f *File) readdirnames(n int) (names []string, err error) {
 	var entptr *syscall.Dirent
 	for len(names) < size || n == -1 {
 		if res := readdir_r(d.dir, &dirent, &entptr); res != 0 {
+			if syscall.Errno(res) == syscall.EINTR {
+				continue
+			}
 			return names, wrapSyscallError("readdir", syscall.Errno(res))
 		}
 		if entptr == nil { // EOF
diff --git a/src/os/file.go b/src/os/file.go
index a2b71cb61a..05d2f83283 100644
--- a/src/os/file.go
+++ b/src/os/file.go
@@ -255,7 +255,10 @@ func Mkdir(name string, perm FileMode) error {
 	if runtime.GOOS == "windows" && isWindowsNulName(name) {
 		return &PathError{"mkdir", name, syscall.ENOTDIR}
 	}
-	e := syscall.Mkdir(fixLongPath(name), syscallMode(perm))
+	longName := fixLongPath(name)
+	e := ignoringEINTR(func() error {
+		return syscall.Mkdir(longName, syscallMode(perm))
+	})
 
 	if e != nil {
 		return &PathError{"mkdir", name, e}
diff --git a/src/os/file_plan9.go b/src/os/file_plan9.go
index eb158905ab..043500744b 100644
--- a/src/os/file_plan9.go
+++ b/src/os/file_plan9.go
@@ -558,3 +558,7 @@ func (c *rawConn) Write(f func(uintptr) bool) error {
 func newRawConn(file *File) (*rawConn, error) {
 	return nil, syscall.EPLAN9
 }
+
+func ignoringEINTR(fn func() error) error {
+	return fn()
+}
diff --git a/src/os/file_posix.go b/src/os/file_posix.go
index 24ea554b62..ae23d22d0a 100644
--- a/src/os/file_posix.go
+++ b/src/os/file_posix.go
@@ -76,7 +76,11 @@ func syscallMode(i FileMode) (o uint32) {
 
 // See docs in file.go:Chmod.
 func chmod(name string, mode FileMode) error {
-	if e := syscall.Chmod(fixLongPath(name), syscallMode(mode)); e != nil {
+	longName := fixLongPath(name)
+	e := ignoringEINTR(func() error {
+		return syscall.Chmod(longName, syscallMode(mode))
+	})
+	if e != nil {
 		return &PathError{"chmod", name, e}
 	}
 	return nil
@@ -101,7 +105,10 @@ func (f *File) chmod(mode FileMode) error {
 // On Windows or Plan 9, Chown always returns the syscall.EWINDOWS or
 // EPLAN9 error, wrapped in *PathError.
 func Chown(name string, uid, gid int) error {
-	if e := syscall.Chown(name, uid, gid); e != nil {
+	e := ignoringEINTR(func() error {
+		return syscall.Chown(name, uid, gid)
+	})
+	if e != nil {
 		return &PathError{"chown", name, e}
 	}
 	return nil
@@ -114,7 +121,10 @@ func Chown(name string, uid, gid int) error {
 // On Windows, it always returns the syscall.EWINDOWS error, wrapped
 // in *PathError.
 func Lchown(name string, uid, gid int) error {
-	if e := syscall.Lchown(name, uid, gid); e != nil {
+	e := ignoringEINTR(func() error {
+		return syscall.Lchown(name, uid, gid)
+	})
+	if e != nil {
 		return &PathError{"lchown", name, e}
 	}
 	return nil
@@ -222,3 +232,19 @@ func (f *File) checkValid(op string) error {
 	}
 	return nil
 }
+
+// ignoringEINTR makes a function call and repeats it if it returns an
+// EINTR error. This appears to be required even though we install all
+// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
+// Also #20400 and #36644 are issues in which a signal handler is
+// installed without setting SA_RESTART. None of these are the common case,
+// but there are enough of them that it seems that we can't avoid
+// an EINTR loop.
+func ignoringEINTR(fn func() error) error {
+	for {
+		err := fn()
+		if err != syscall.EINTR {
+			return err
+		}
+	}
+}
diff --git a/src/os/file_unix.go b/src/os/file_unix.go
index f2c00ae0cb..5446dd5003 100644
--- a/src/os/file_unix.go
+++ b/src/os/file_unix.go
@@ -39,7 +39,9 @@ func rename(oldname, newname string) error {
 			return &LinkError{"rename", oldname, newname, syscall.EEXIST}
 		}
 	}
-	err = syscall.Rename(oldname, newname)
+	err = ignoringEINTR(func() error {
+		return syscall.Rename(oldname, newname)
+	})
 	if err != nil {
 		return &LinkError{"rename", oldname, newname, err}
 	}
@@ -129,7 +131,9 @@ func newFile(fd uintptr, name string, kind newFileKind) *File {
 		switch runtime.GOOS {
 		case "darwin", "dragonfly", "freebsd", "netbsd", "openbsd":
 			var st syscall.Stat_t
-			err := syscall.Fstat(fdi, &st)
+			err := ignoringEINTR(func() error {
+				return syscall.Fstat(fdi, &st)
+			})
 			typ := st.Mode & syscall.S_IFMT
 			// Don't try to use kqueue with regular files on *BSDs.
 			// On FreeBSD a regular file is always
@@ -264,7 +268,10 @@ func (f *File) seek(offset int64, whence int) (ret int64, err error) {
 // If the file is a symbolic link, it changes the size of the link's target.
 // If there is an error, it will be of type *PathError.
 func Truncate(name string, size int64) error {
-	if e := syscall.Truncate(name, size); e != nil {
+	e := ignoringEINTR(func() error {
+		return syscall.Truncate(name, size)
+	})
+	if e != nil {
 		return &PathError{"truncate", name, e}
 	}
 	return nil
@@ -277,11 +284,15 @@ func Remove(name string) error {
 	// whether name is a file or directory.
 	// Try both: it is cheaper on average than
 	// doing a Stat plus the right one.
-	e := syscall.Unlink(name)
+	e := ignoringEINTR(func() error {
+		return syscall.Unlink(name)
+	})
 	if e == nil {
 		return nil
 	}
-	e1 := syscall.Rmdir(name)
+	e1 := ignoringEINTR(func() error {
+		return syscall.Rmdir(name)
+	})
 	if e1 == nil {
 		return nil
 	}
@@ -316,7 +327,9 @@ func tempDir() string {
 // Link creates newname as a hard link to the oldname file.
 // If there is an error, it will be of type *LinkError.
 func Link(oldname, newname string) error {
-	e := syscall.Link(oldname, newname)
+	e := ignoringEINTR(func() error {
+		return syscall.Link(oldname, newname)
+	})
 	if e != nil {
 		return &LinkError{"link", oldname, newname, e}
 	}
@@ -326,7 +339,9 @@ func Link(oldname, newname string) error {
 // Symlink creates newname as a symbolic link to oldname.
 // If there is an error, it will be of type *LinkError.
 func Symlink(oldname, newname string) error {
-	e := syscall.Symlink(oldname, newname)
+	e := ignoringEINTR(func() error {
+		return syscall.Symlink(oldname, newname)
+	})
 	if e != nil {
 		return &LinkError{"symlink", oldname, newname, e}
 	}
@@ -365,7 +380,16 @@ func (f *File) readdir(n int) (fi []FileInfo, err error) {
 func Readlink(name string) (string, error) {
 	for len := 128; ; len *= 2 {
 		b := make([]byte, len)
-		n, e := fixCount(syscall.Readlink(name, b))
+		var (
+			n int
+			e error
+		)
+		for {
+			n, e = fixCount(syscall.Readlink(name, b))
+			if e != syscall.EINTR {
+				break
+			}
+		}
 		// buffer too small
 		if runtime.GOOS == "aix" && e == syscall.ERANGE {
 			continue
diff --git a/src/os/getwd.go b/src/os/getwd.go
index 6d25466bb4..f3afd8c06c 100644
--- a/src/os/getwd.go
+++ b/src/os/getwd.go
@@ -45,7 +45,16 @@ func Getwd() (dir string, err error) {
 	// If the operating system provides a Getwd call, use it.
 	// Otherwise, we're trying to find our way back to ".".
 	if syscall.ImplementsGetwd {
-		s, e := syscall.Getwd()
+		var (
+			s string
+			e error
+		)
+		for {
+			s, e = syscall.Getwd()
+			if e != syscall.EINTR {
+				break
+			}
+		}
 		if useSyscallwd(e) {
 			return s, NewSyscallError("getwd", e)
 		}
diff --git a/src/os/stat_unix.go b/src/os/stat_unix.go
index 0a7e6029ac..ef74a43758 100644
--- a/src/os/stat_unix.go
+++ b/src/os/stat_unix.go
@@ -28,7 +28,9 @@ func (f *File) Stat() (FileInfo, error) {
 // statNolog stats a file with no test logging.
 func statNolog(name string) (FileInfo, error) {
 	var fs fileStat
-	err := syscall.Stat(name, &fs.sys)
+	err := ignoringEINTR(func() error {
+		return syscall.Stat(name, &fs.sys)
+	})
 	if err != nil {
 		return nil, &PathError{"stat", name, err}
 	}
@@ -39,7 +41,9 @@ func statNolog(name string) (FileInfo, error) {
 // lstatNolog lstats a file with no test logging.
 func lstatNolog(name string) (FileInfo, error) {
 	var fs fileStat
-	err := syscall.Lstat(name, &fs.sys)
+	err := ignoringEINTR(func() error {
+		return syscall.Lstat(name, &fs.sys)
+	})
 	if err != nil {
 		return nil, &PathError{"lstat", name, err}
 	}
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index cfc0419b72..be3adc9801 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -252,7 +252,7 @@ func TestTraceSymbolize(t *testing.T) {
 			{trace.EvGoSysCall, []frame{
 				{"syscall.read", 0},
 				{"syscall.Read", 0},
-				{"internal/poll.ignoringEINTR", 0},
+				{"internal/poll.ignoringEINTRIO", 0},
 				{"internal/poll.(*FD).Read", 0},
 				{"os.(*File).read", 0},
 				{"os.(*File).Read", 0},
-- 
cgit v1.3


From e94544cf012535da6b3c9e735bc4026e2db1c99c Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Wed, 19 Aug 2020 21:39:12 -0700
Subject: cmd/compile: fix checkptr handling of &^

checkptr has code to recognize &^ expressions, but it didn't take into
account that "p &^ x" gets rewritten to "p & ^x" during walk, which
resulted in false positive diagnostics.

This CL changes walkexpr to mark OANDNOT expressions with Implicit
when they're rewritten to OAND, so that walkCheckPtrArithmetic can
still recognize them later.

It would be slightly more idiomatic to instead mark the OBITNOT
expression as Implicit (as it's a compiler-generated Node), but the
OBITNOT expression might get constant folded. It's not worth the extra
complexity/subtlety of relying on n.Right.Orig, so we set Implicit on
the OAND node instead.

To atone for this transgression, I add documentation for nodeImplicit.

Fixes #40917.

Change-Id: I386304171ad299c530e151e5924f179e9a5fd5b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/249477
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
---
 src/cmd/compile/internal/gc/syntax.go     |  4 ++--
 src/cmd/compile/internal/gc/walk.go       |  7 ++++++-
 src/runtime/checkptr_test.go              |  1 +
 src/runtime/testdata/testprog/checkptr.go |  8 ++++++++
 test/fixedbugs/issue40917.go              | 23 +++++++++++++++++++++++
 5 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 test/fixedbugs/issue40917.go

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index b658410c53..47e5e59156 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -141,8 +141,8 @@ const (
 	nodeInitorder, _                   // tracks state during init1; two bits
 	_, _                               // second nodeInitorder bit
 	_, nodeHasBreak
-	_, nodeNoInline // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
-	_, nodeImplicit
+	_, nodeNoInline  // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
+	_, nodeImplicit  // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND
 	_, nodeIsDDD     // is the argument variadic
 	_, nodeDiag      // already printed error about this
 	_, nodeColas     // OAS resulting from :=
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 8ae3d9a5c7..74ed0411bd 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -973,6 +973,7 @@ opswitch:
 	case OANDNOT:
 		n.Left = walkexpr(n.Left, init)
 		n.Op = OAND
+		n.SetImplicit(true) // for walkCheckPtrArithmetic
 		n.Right = nod(OBITNOT, n.Right, nil)
 		n.Right = typecheck(n.Right, ctxExpr)
 		n.Right = walkexpr(n.Right, init)
@@ -4003,8 +4004,12 @@ func walkCheckPtrArithmetic(n *Node, init *Nodes) *Node {
 		case OADD:
 			walk(n.Left)
 			walk(n.Right)
-		case OSUB, OANDNOT:
+		case OSUB:
 			walk(n.Left)
+		case OAND:
+			if n.Implicit() { // was OANDNOT
+				walk(n.Left)
+			}
 		case OCONVNOP:
 			if n.Left.Type.Etype == TUNSAFEPTR {
 				n.Left = cheapexpr(n.Left, init)
diff --git a/src/runtime/checkptr_test.go b/src/runtime/checkptr_test.go
index 8ab8a4937c..194cc1243a 100644
--- a/src/runtime/checkptr_test.go
+++ b/src/runtime/checkptr_test.go
@@ -27,6 +27,7 @@ func TestCheckPtr(t *testing.T) {
 		{"CheckPtrAlignmentPtr", "fatal error: checkptr: misaligned pointer conversion\n"},
 		{"CheckPtrAlignmentNoPtr", ""},
 		{"CheckPtrArithmetic", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"},
+		{"CheckPtrArithmetic2", "fatal error: checkptr: pointer arithmetic result points to invalid allocation\n"},
 		{"CheckPtrSize", "fatal error: checkptr: converted pointer straddles multiple allocations\n"},
 		{"CheckPtrSmall", "fatal error: checkptr: pointer arithmetic computed bad pointer value\n"},
 	}
diff --git a/src/runtime/testdata/testprog/checkptr.go b/src/runtime/testdata/testprog/checkptr.go
index 45e6fb1aa5..e0a2794f4c 100644
--- a/src/runtime/testdata/testprog/checkptr.go
+++ b/src/runtime/testdata/testprog/checkptr.go
@@ -10,6 +10,7 @@ func init() {
 	register("CheckPtrAlignmentNoPtr", CheckPtrAlignmentNoPtr)
 	register("CheckPtrAlignmentPtr", CheckPtrAlignmentPtr)
 	register("CheckPtrArithmetic", CheckPtrArithmetic)
+	register("CheckPtrArithmetic2", CheckPtrArithmetic2)
 	register("CheckPtrSize", CheckPtrSize)
 	register("CheckPtrSmall", CheckPtrSmall)
 }
@@ -32,6 +33,13 @@ func CheckPtrArithmetic() {
 	sink2 = (*int)(unsafe.Pointer(i))
 }
 
+func CheckPtrArithmetic2() {
+	var x [2]int64
+	p := unsafe.Pointer(&x[1])
+	var one uintptr = 1
+	sink2 = unsafe.Pointer(uintptr(p) & ^one)
+}
+
 func CheckPtrSize() {
 	p := new(int64)
 	sink2 = p
diff --git a/test/fixedbugs/issue40917.go b/test/fixedbugs/issue40917.go
new file mode 100644
index 0000000000..2128be5eca
--- /dev/null
+++ b/test/fixedbugs/issue40917.go
@@ -0,0 +1,23 @@
+// run -gcflags=-d=checkptr
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "unsafe"
+
+func main() {
+	var x [2]uint64
+	a := unsafe.Pointer(&x[1])
+
+	b := a
+	b = unsafe.Pointer(uintptr(b) + 2)
+	b = unsafe.Pointer(uintptr(b) - 1)
+	b = unsafe.Pointer(uintptr(b) &^ 1)
+
+	if a != b {
+		panic("pointer arithmetic failed")
+	}
+}
-- 
cgit v1.3