[dev.cc] all: merge master (5f1efe7) into dev.cc

Conflicts: src/cmd/dist/build.go Change-Id: I98a4b5e010bee91507b85bb8efd9c74e1a1f649c
author: Russ Cox <rsc@golang.org> 2015-02-13 12:50:23 -0500
committer: Russ Cox <rsc@golang.org> 2015-02-13 12:51:56 -0500
commit: 87de9ce212988c8bdf0630750e772d8805091bcc (patch)
tree: 0bb1cc671417e9b851d35a4bbcd4d756e5aee4e9 /src/runtime
parent: 01925bd3f306c899cddfa59aa2ad41c9b77fcd74 (diff)
parent: 5f1efe738be296cdbc586348af92eab621d068f5 (diff)
download: go-87de9ce212988c8bdf0630750e772d8805091bcc.tar.xz
60 files changed, 1928 insertions, 454 deletions
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 49bba32ebe..58a0d502bd 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -1298,26 +1298,22 @@ eq:
 	RET
 
 // eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
 // See runtime_test.go:eqstring_generic for
 // equivalent Go code.
 TEXT runtime·eqstring(SB),NOSPLIT,$0-17
-	MOVL	s1len+4(FP), AX
-	MOVL	s2len+12(FP), BX
-	CMPL	AX, BX
-	JNE	different
 	MOVL	s1str+0(FP), SI
 	MOVL	s2str+8(FP), DI
 	CMPL	SI, DI
 	JEQ	same
+	MOVL	s1len+4(FP), BX
 	CALL	runtime·memeqbody(SB)
 	MOVB	AX, v+16(FP)
 	RET
 same:
 	MOVB	$1, v+16(FP)
 	RET
-different:
-	MOVB	$0, v+16(FP)
-	RET
 
 TEXT bytes·Equal(SB),NOSPLIT,$0-25
 	MOVL	a_len+4(FP), BX
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index f09e5ae250..f6c1c5f6e6 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1262,26 +1262,22 @@ eq:
 	RET
 
 // eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
 // See runtime_test.go:eqstring_generic for
 // equivalent Go code.
 TEXT runtime·eqstring(SB),NOSPLIT,$0-33
-	MOVQ	s1len+8(FP), AX
-	MOVQ	s2len+24(FP), BX
-	CMPQ	AX, BX
-	JNE	noteq
 	MOVQ	s1str+0(FP), SI
 	MOVQ	s2str+16(FP), DI
 	CMPQ	SI, DI
 	JEQ	eq
+	MOVQ	s1len+8(FP), BX
 	CALL	runtime·memeqbody(SB)
 	MOVB	AX, v+32(FP)
 	RET
 eq:
 	MOVB	$1, v+32(FP)
 	RET
-noteq:
-	MOVB	$0, v+32(FP)
-	RET
 
 // a in SI
 // b in DI
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index 77355bb998..f2324285a5 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -704,26 +704,22 @@ eq:
 	RET
 
 // eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
 // See runtime_test.go:eqstring_generic for
 // equivalent Go code.
 TEXT runtime·eqstring(SB),NOSPLIT,$0-17
-	MOVL	s1len+4(FP), AX
-	MOVL	s2len+12(FP), BX
-	CMPL	AX, BX
-	JNE	different
 	MOVL	s1str+0(FP), SI
 	MOVL	s2str+8(FP), DI
 	CMPL	SI, DI
 	JEQ	same
+	MOVL	s1len+4(FP), BX
 	CALL	runtime·memeqbody(SB)
 	MOVB	AX, v+16(FP)
 	RET
 same:
 	MOVB	$1, v+16(FP)
 	RET
-different:
-	MOVB	$0, v+16(FP)
-	RET
 
 // a in SI
 // b in DI
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index f3ce1a8f17..2efeaaa531 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -39,20 +39,8 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$-4
 
 	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
 
-#ifndef GOOS_nacl
-	// if there is an _cgo_init, call it.
-	MOVW	_cgo_init(SB), R4
-	CMP	$0, R4
-	B.EQ	nocgo
-	MRC     15, 0, R0, C13, C0, 3 	// load TLS base pointer
-	MOVW 	R0, R3 			// arg 3: TLS base pointer
-	MOVW 	$runtime·tlsg(SB), R2 	// arg 2: tlsg
-	MOVW	$setg_gcc<>(SB), R1 	// arg 1: setg
-	MOVW	g, R0 			// arg 0: G
-	BL	(R4) // will clobber R0-R3
-#endif
+	BL	runtime·_initcgo(SB)	// will clobber R0-R3
 
-nocgo:
 	// update stackguard after _cgo_init
 	MOVW	(g_stack+stack_lo)(g), R0
 	ADD	$const__StackGuard, R0
@@ -806,21 +794,18 @@ eq:
 	RET
 
 // eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
 // See runtime_test.go:eqstring_generic for
 // equivalent Go code.
 TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
-	MOVW	s1len+4(FP), R0
-	MOVW	s2len+12(FP), R1
-	MOVW	$0, R7
-	CMP	R0, R1
-	MOVB.NE R7, v+16(FP)
-	RET.NE
 	MOVW	s1str+0(FP), R2
 	MOVW	s2str+8(FP), R3
 	MOVW	$1, R8
 	MOVB	R8, v+16(FP)
 	CMP	R2, R3
 	RET.EQ
+	MOVW	s1len+4(FP), R0
 	ADD	R2, R0, R6
 loop:
 	CMP	R2, R6
@@ -829,14 +814,10 @@ loop:
 	MOVBU.P	1(R3), R5
 	CMP	R4, R5
 	BEQ	loop
-	MOVB	R7, v+16(FP)
+	MOVW	$0, R8
+	MOVB	R8, v+16(FP)
 	RET
 
-// void setg_gcc(G*); set g called from gcc.
-TEXT setg_gcc<>(SB),NOSPLIT,$0
-	MOVW	R0, g
-	B		runtime·save_g(SB)
-
 // TODO: share code with memeq?
 TEXT bytes·Equal(SB),NOSPLIT,$0
 	MOVW	a_len+4(FP), R1
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 660c7cdfe5..694dfcc24e 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -969,30 +969,30 @@ eq:
 	RETURN
 
 // eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
 // See runtime_test.go:eqstring_generic for
 // equivalent Go code.
 TEXT runtime·eqstring(SB),NOSPLIT,$0-33
-	MOVD	s1len+8(FP), R4
-	MOVD	s2len+24(FP), R5
-	CMP	R4, R5
-	BNE	noteq
-
 	MOVD	s1str+0(FP), R3
 	MOVD	s2str+16(FP), R4
+	MOVD	$1, R5
+	MOVB	R5, ret+32(FP)
+	CMP	R3, R4
+	BNE	2(PC)
+	RETURN
+	MOVD	s1len+8(FP), R5
 	SUB	$1, R3
 	SUB	$1, R4
 	ADD	R3, R5, R8
 loop:
 	CMP	R3, R8
-	BNE	4(PC)
-	MOVD	$1, R3
-	MOVB	R3, ret+32(FP)
+	BNE	2(PC)
 	RETURN
 	MOVBZU	1(R3), R6
 	MOVBZU	1(R4), R7
 	CMP	R6, R7
 	BEQ	loop
-noteq:
 	MOVB	R0, ret+32(FP)
 	RETURN
 
diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go
index 8528692f7b..9a41399cd6 100644
--- a/src/runtime/cgo/cgo.go
+++ b/src/runtime/cgo/cgo.go
@@ -11,7 +11,7 @@ package cgo
 
 /*
 
-#cgo darwin LDFLAGS: -lpthread
+#cgo darwin,!arm LDFLAGS: -lpthread
 #cgo dragonfly LDFLAGS: -lpthread
 #cgo freebsd LDFLAGS: -lpthread
 #cgo android LDFLAGS: -llog
diff --git a/src/runtime/cgo/gcc_arm.S b/src/runtime/cgo/gcc_arm.S
index d5833bfad0..980ab579e4 100644
--- a/src/runtime/cgo/gcc_arm.S
+++ b/src/runtime/cgo/gcc_arm.S
@@ -11,6 +11,10 @@
 #define EXT(s) s
 #endif
 
+// Apple's ld64 wants 4-byte alignment for ARM code sections.
+// .align in both Apple as and GNU as treat n as aligning to 2**n bytes.
+.align	2
+
 /*
  * void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
  *
diff --git a/src/runtime/cgo/gcc_darwin_arm.c b/src/runtime/cgo/gcc_darwin_arm.c
new file mode 100644
index 0000000000..d56c55777d
--- /dev/null
+++ b/src/runtime/cgo/gcc_darwin_arm.c
@@ -0,0 +1,99 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h> /* for strerror */
+#include <pthread.h>
+#include <signal.h>
+#include <limits.h>
+#include "libcgo.h"
+
+#define magic (0xe696c4f4U)
+
+// inittls allocates a thread-local storage slot for g.
+//
+// It finds the first available slot using pthread_key_create and uses
+// it as the offset value for runtime.tlsg.
+static void
+inittls(void **tlsg, void **tlsbase)
+{
+	pthread_key_t k;
+	int i, err;
+
+	err = pthread_key_create(&k, nil);
+	if(err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_key_create failed: %d\n", err);
+		abort();
+	}
+	//fprintf(stderr, "runtime/cgo: k = %d, tlsbase = %p\n", (int)k, tlsbase); // debug
+	pthread_setspecific(k, (void*)magic);
+	// The first key should be at 258.
+	for (i=0; i<PTHREAD_KEYS_MAX; i++) {
+		if (*(tlsbase+i) == (void*)magic) {
+			*tlsg = (void*)(i*sizeof(void *));
+			pthread_setspecific(k, 0);
+			return;
+		}
+	}
+	fprintf(stderr, "runtime/cgo: could not find pthread key.\n");
+	abort();
+}
+
+static void *threadentry(void*);
+void (*setg_gcc)(void*);
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	size = 0;
+	pthread_attr_getstacksize(&attr, &size);
+	// Leave stacklo=0 and set stackhi=size; mstack will do the rest.
+	ts->g->stackhi = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+extern void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	crosscall_arm1(ts.fn, setg_gcc, (void*)ts.g);
+	return nil;
+}
+
+void
+x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stacklo = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	// yes, tlsbase from mrc might not be correctly aligned.
+	inittls(tlsg, (void**)((uintptr)tlsbase & ~3));
+}
diff --git a/src/runtime/compiler.go b/src/runtime/compiler.go
index 562a460226..f6edc95959 100644
--- a/src/runtime/compiler.go
+++ b/src/runtime/compiler.go
@@ -7,7 +7,7 @@ package runtime
 // Compiler is the name of the compiler toolchain that built the
 // running binary.  Known toolchains are:
 //
-//	gc      The 5g/6g/8g compiler suite at code.google.com/p/go.
+//	gc      The 5g/6g/8g compiler suite at go.googlesource.com/go.
 //	gccgo   The gccgo front end, part of the GCC compiler suite.
 //
 const Compiler = "gc"
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 43cea9008a..715b2da232 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"regexp"
 	"runtime"
 	"strings"
 	"sync"
@@ -17,17 +18,20 @@ import (
 	"text/template"
 )
 
-// testEnv excludes GODEBUG from the environment
-// to prevent its output from breaking tests that
-// are trying to parse other command output.
 func testEnv(cmd *exec.Cmd) *exec.Cmd {
 	if cmd.Env != nil {
 		panic("environment already set")
 	}
 	for _, env := range os.Environ() {
+		// Exclude GODEBUG from the environment to prevent its output
+		// from breaking tests that are trying to parse other command output.
 		if strings.HasPrefix(env, "GODEBUG=") {
 			continue
 		}
+		// Exclude GOTRACEBACK for the same reason.
+		if strings.HasPrefix(env, "GOTRACEBACK=") {
+			continue
+		}
 		cmd.Env = append(cmd.Env, env)
 	}
 	return cmd
@@ -217,6 +221,14 @@ func TestMainGoroutineId(t *testing.T) {
 	}
 }
 
+func TestNoHelperGoroutines(t *testing.T) {
+	output := executeTest(t, noHelperGoroutinesSource, nil)
+	matches := regexp.MustCompile(`goroutine [0-9]+ \[`).FindAllStringSubmatch(output, -1)
+	if len(matches) != 1 || matches[0][0] != "goroutine 1 [" {
+		t.Fatalf("want to see only goroutine 1, see:\n%s", output)
+	}
+}
+
 func TestBreakpoint(t *testing.T) {
 	output := executeTest(t, breakpointSource, nil)
 	want := "runtime.Breakpoint()"
@@ -431,6 +443,22 @@ func main() {
 }
 `
 
+const noHelperGoroutinesSource = `
+package main
+import (
+	"runtime"
+	"time"
+)
+func init() {
+	i := 0
+	runtime.SetFinalizer(&i, func(p *int) {})
+	time.AfterFunc(time.Hour, func() {})
+	panic("oops")
+}
+func main() {
+}
+`
+
 const breakpointSource = `
 package main
 import "runtime"
diff --git a/src/runtime/defs_darwin_arm.go b/src/runtime/defs_darwin_arm.go
new file mode 100644
index 0000000000..92bab509fb
--- /dev/null
+++ b/src/runtime/defs_darwin_arm.go
@@ -0,0 +1,245 @@
+// Note: cgo can't handle some Darwin/ARM structures, so this file can't
+// be auto generated by cgo yet.
+// Created based on output of `cgo -cdefs defs_darwin.go` and Darwin/ARM
+// specific header (mainly mcontext and ucontext related stuff)
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED = 0x4
+	_MADV_FREE     = 0x5
+
+	_MACH_MSG_TYPE_MOVE_RECEIVE   = 0x10
+	_MACH_MSG_TYPE_MOVE_SEND      = 0x11
+	_MACH_MSG_TYPE_MOVE_SEND_ONCE = 0x12
+	_MACH_MSG_TYPE_COPY_SEND      = 0x13
+	_MACH_MSG_TYPE_MAKE_SEND      = 0x14
+	_MACH_MSG_TYPE_MAKE_SEND_ONCE = 0x15
+	_MACH_MSG_TYPE_COPY_RECEIVE   = 0x16
+
+	_MACH_MSG_PORT_DESCRIPTOR         = 0x0
+	_MACH_MSG_OOL_DESCRIPTOR          = 0x1
+	_MACH_MSG_OOL_PORTS_DESCRIPTOR    = 0x2
+	_MACH_MSG_OOL_VOLATILE_DESCRIPTOR = 0x3
+
+	_MACH_MSGH_BITS_COMPLEX = 0x80000000
+
+	_MACH_SEND_MSG  = 0x1
+	_MACH_RCV_MSG   = 0x2
+	_MACH_RCV_LARGE = 0x4
+
+	_MACH_SEND_TIMEOUT   = 0x10
+	_MACH_SEND_INTERRUPT = 0x40
+	_MACH_SEND_ALWAYS    = 0x10000
+	_MACH_SEND_TRAILER   = 0x20000
+	_MACH_RCV_TIMEOUT    = 0x100
+	_MACH_RCV_NOTIFY     = 0x200
+	_MACH_RCV_INTERRUPT  = 0x400
+	_MACH_RCV_OVERWRITE  = 0x1000
+
+	_NDR_PROTOCOL_2_0      = 0x0
+	_NDR_INT_BIG_ENDIAN    = 0x0
+	_NDR_INT_LITTLE_ENDIAN = 0x1
+	_NDR_FLOAT_IEEE        = 0x0
+	_NDR_CHAR_ASCII        = 0x0
+
+	_SA_SIGINFO   = 0x40
+	_SA_RESTART   = 0x2
+	_SA_ONSTACK   = 0x1
+	_SA_USERTRAMP = 0x100
+	_SA_64REGSET  = 0x200
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x7
+	_FPE_INTOVF = 0x8
+	_FPE_FLTDIV = 0x1
+	_FPE_FLTOVF = 0x2
+	_FPE_FLTUND = 0x3
+	_FPE_FLTRES = 0x4
+	_FPE_FLTINV = 0x5
+	_FPE_FLTSUB = 0x6
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type machbody struct {
+	msgh_descriptor_count uint32
+}
+
+type machheader struct {
+	msgh_bits        uint32
+	msgh_size        uint32
+	msgh_remote_port uint32
+	msgh_local_port  uint32
+	msgh_reserved    uint32
+	msgh_id          int32
+}
+
+type machndr struct {
+	mig_vers     uint8
+	if_vers      uint8
+	reserved1    uint8
+	mig_encoding uint8
+	int_rep      uint8
+	char_rep     uint8
+	float_rep    uint8
+	reserved2    uint8
+}
+
+type machport struct {
+	name        uint32
+	pad1        uint32
+	pad2        uint16
+	disposition uint8
+	_type       uint8
+}
+
+type stackt struct {
+	ss_sp    *byte
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type sigactiont struct {
+	__sigaction_u [4]byte
+	sa_tramp      unsafe.Pointer
+	sa_mask       uint32
+	sa_flags      int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   *byte
+	si_value  [4]byte
+	si_band   int32
+	__pad     [7]uint32
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+type floatstate32 struct {
+	r     [32]uint32
+	fpscr uint32
+}
+
+type regs32 struct {
+	r    [13]uint32 // r0 to r12
+	sp   uint32     // r13
+	lr   uint32     // r14
+	pc   uint32     // r15
+	cpsr uint32
+}
+
+type exceptionstate32 struct {
+	trapno     uint32 // NOTE: on 386, the trapno field is split into trapno and cpu
+	err        uint32
+	faultvaddr uint32
+}
+
+type mcontext32 struct {
+	es exceptionstate32
+	ss regs32
+	fs floatstate32
+}
+
+type ucontext struct {
+	uc_onstack  int32
+	uc_sigmask  uint32
+	uc_stack    stackt
+	uc_link     *ucontext
+	uc_mcsize   uint32
+	uc_mcontext *mcontext32
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int32
+	udata  *byte
+}
diff --git a/src/runtime/defs_windows_386.go b/src/runtime/defs_windows_386.go
index abec2d839f..c860f74a3f 100644
--- a/src/runtime/defs_windows_386.go
+++ b/src/runtime/defs_windows_386.go
@@ -101,6 +101,12 @@ type context struct {
 	extendedregisters [512]uint8
 }
 
+func (c *context) ip() uintptr { return uintptr(c.eip) }
+func (c *context) sp() uintptr { return uintptr(c.esp) }
+
+func (c *context) setip(x uintptr) { c.eip = uint32(x) }
+func (c *context) setsp(x uintptr) { c.esp = uint32(x) }
+
 type overlapped struct {
 	internal     uint32
 	internalhigh uint32
diff --git a/src/runtime/defs_windows_amd64.go b/src/runtime/defs_windows_amd64.go
index 81b13597b7..d1e55ec426 100644
--- a/src/runtime/defs_windows_amd64.go
+++ b/src/runtime/defs_windows_amd64.go
@@ -116,6 +116,12 @@ type context struct {
 	lastexceptionfromrip uint64
 }
 
+func (c *context) ip() uintptr { return uintptr(c.rip) }
+func (c *context) sp() uintptr { return uintptr(c.rsp) }
+
+func (c *context) setip(x uintptr) { c.rip = uint64(x) }
+func (c *context) setsp(x uintptr) { c.rsp = uint64(x) }
+
 type overlapped struct {
 	internal     uint64
 	internalhigh uint64
diff --git a/src/runtime/gcwork.go b/src/runtime/gcwork.go
new file mode 100644
index 0000000000..cf5a97957f
--- /dev/null
+++ b/src/runtime/gcwork.go
@@ -0,0 +1,338 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_Debugwbufs  = true    // if true check wbufs consistency
+	_WorkbufSize = 1 * 256 // in bytes - if small wbufs are passed to GC in a timely fashion.
+)
+
+type workbufhdr struct {
+	node  lfnode // must be first
+	nobj  uintptr
+	id    uintptr
+	inuse bool       // This workbuf is in use by some gorotuine and is not on the work.empty/partial/full queues.
+	log   [4]uintptr // line numbers forming a history of ownership changes to workbuf
+}
+
+type workbuf struct {
+	workbufhdr
+	// account for the above fields
+	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / ptrSize]uintptr
+}
+
+// workbuf factory routines. These funcs are used to manage the
+// workbufs. They cache workbuf in the m struct field currentwbuf.
+// If the GC asks for some work these are the only routines that
+// make partially full wbufs available to the GC.
+// Each of the gets and puts also take an distinct integer that is used
+// to record a brief history of changes to ownership of the workbuf.
+// The convention is to use a unique line number but any encoding
+// is permissible. For example if you want to pass in 2 bits of information
+// you could simple add lineno1*100000+lineno2.
+
+// logget records the past few values of entry to aid in debugging.
+// logget checks the buffer b is not currently in use.
+func (b *workbuf) logget(entry uintptr) {
+	if !_Debugwbufs {
+		return
+	}
+	if b.inuse {
+		println("runtime: logget fails log entry=", entry,
+			"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
+			"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
+		throw("logget: get not legal")
+	}
+	b.inuse = true
+	copy(b.log[1:], b.log[:])
+	b.log[0] = entry
+}
+
+// logput records the past few values of entry to aid in debugging.
+// logput checks the buffer b is currently in use.
+func (b *workbuf) logput(entry uintptr) {
+	if !_Debugwbufs {
+		return
+	}
+	if !b.inuse {
+		println("runtime:logput fails log entry=", entry,
+			"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
+			"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
+		throw("logput: put not legal")
+	}
+	b.inuse = false
+	copy(b.log[1:], b.log[:])
+	b.log[0] = entry
+}
+
+func (b *workbuf) checknonempty() {
+	if b.nobj == 0 {
+		println("runtime: nonempty check fails",
+			"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
+			"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
+		throw("workbuf is empty")
+	}
+}
+
+func (b *workbuf) checkempty() {
+	if b.nobj != 0 {
+		println("runtime: empty check fails",
+			"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
+			"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
+		throw("workbuf is not empty")
+	}
+}
+
+// checknocurrentwbuf checks that the m's currentwbuf field is empty
+func checknocurrentwbuf() {
+	if getg().m.currentwbuf != 0 {
+		throw("unexpected currentwbuf")
+	}
+}
+
+// getempty pops an empty work buffer off the work.empty list,
+// allocating new buffers if none are available.
+// entry is used to record a brief history of ownership.
+//go:nowritebarrier
+func getempty(entry uintptr) *workbuf {
+	var b *workbuf
+	if work.empty != 0 {
+		b = (*workbuf)(lfstackpop(&work.empty))
+		if b != nil {
+			b.checkempty()
+		}
+	}
+	if b == nil {
+		b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
+	}
+	b.logget(entry)
+	return b
+}
+
+// putempty puts a workbuf onto the work.empty list.
+// Upon entry this go routine owns b. The lfstackpush relinquishes ownership.
+//go:nowritebarrier
+func putempty(b *workbuf, entry uintptr) {
+	b.checkempty()
+	b.logput(entry)
+	lfstackpush(&work.empty, &b.node)
+}
+
+// putfull puts the workbuf on the work.full list for the GC.
+// putfull accepts partially full buffers so the GC can avoid competing
+// with the mutators for ownership of partially full buffers.
+//go:nowritebarrier
+func putfull(b *workbuf, entry uintptr) {
+	b.checknonempty()
+	b.logput(entry)
+	lfstackpush(&work.full, &b.node)
+}
+
+// getpartialorempty tries to return a partially empty
+// and if none are available returns an empty one.
+// entry is used to provide a brief histoy of ownership
+// using entry + xxx00000 to
+// indicating that two line numbers in the call chain.
+//go:nowritebarrier
+func getpartialorempty(entry uintptr) *workbuf {
+	var b *workbuf
+	// If this m has a buf in currentwbuf then as an optimization
+	// simply return that buffer. If it turns out currentwbuf
+	// is full, put it on the work.full queue and get another
+	// workbuf off the partial or empty queue.
+	if getg().m.currentwbuf != 0 {
+		b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0)))
+		if b != nil {
+			if b.nobj <= uintptr(len(b.obj)) {
+				return b
+			}
+			putfull(b, entry+80100000)
+		}
+	}
+	b = (*workbuf)(lfstackpop(&work.partial))
+	if b != nil {
+		b.logget(entry)
+		return b
+	}
+	// Let getempty do the logget check but
+	// use the entry to encode that it passed
+	// through this routine.
+	b = getempty(entry + 80700000)
+	return b
+}
+
+// putpartial puts empty buffers on the work.empty queue,
+// full buffers on the work.full queue and
+// others on the work.partial queue.
+// entry is used to provide a brief histoy of ownership
+// using entry + xxx00000 to
+// indicating that two call chain line numbers.
+//go:nowritebarrier
+func putpartial(b *workbuf, entry uintptr) {
+	if b.nobj == 0 {
+		putempty(b, entry+81500000)
+	} else if b.nobj < uintptr(len(b.obj)) {
+		b.logput(entry)
+		lfstackpush(&work.partial, &b.node)
+	} else if b.nobj == uintptr(len(b.obj)) {
+		b.logput(entry)
+		lfstackpush(&work.full, &b.node)
+	} else {
+		throw("putpartial: bad Workbuf b.nobj")
+	}
+}
+
+// trygetfull tries to get a full or partially empty workbuffer.
+// If one is not immediately available return nil
+//go:nowritebarrier
+func trygetfull(entry uintptr) *workbuf {
+	b := (*workbuf)(lfstackpop(&work.full))
+	if b == nil {
+		b = (*workbuf)(lfstackpop(&work.partial))
+	}
+	if b != nil {
+		b.logget(entry)
+		b.checknonempty()
+		return b
+	}
+	// full and partial are both empty so see if there
+	// is an work available on currentwbuf.
+	// This is an optimization to shift
+	// processing from the STW marktermination phase into
+	// the concurrent mark phase.
+	if getg().m.currentwbuf != 0 {
+		b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0)))
+		if b != nil {
+			if b.nobj != 0 {
+				return b
+			}
+			putempty(b, 839)
+			b = nil
+		}
+	}
+	return b
+}
+
+// Get a full work buffer off the work.full or a partially
+// filled one off the work.partial list. If nothing is available
+// wait until all the other gc helpers have finished and then
+// return nil.
+// getfull acts as a barrier for work.nproc helpers. As long as one
+// gchelper is actively marking objects it
+// may create a workbuffer that the other helpers can work on.
+// The for loop either exits when a work buffer is found
+// or when _all_ of the work.nproc GC helpers are in the loop
+// looking for work and thus not capable of creating new work.
+// This is in fact the termination condition for the STW mark
+// phase.
+//go:nowritebarrier
+func getfull(entry uintptr) *workbuf {
+	b := (*workbuf)(lfstackpop(&work.full))
+	if b != nil {
+		b.logget(entry)
+		b.checknonempty()
+		return b
+	}
+	b = (*workbuf)(lfstackpop(&work.partial))
+	if b != nil {
+		b.logget(entry)
+		return b
+	}
+	// Make sure that currentwbuf is also not a source for pointers to be
+	// processed. This is an optimization that shifts processing
+	// from the mark termination STW phase to the concurrent mark phase.
+	if getg().m.currentwbuf != 0 {
+		b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0)))
+		if b != nil {
+			if b.nobj != 0 {
+				return b
+			}
+			putempty(b, 877)
+			b = nil
+		}
+	}
+
+	xadd(&work.nwait, +1)
+	for i := 0; ; i++ {
+		if work.full != 0 {
+			xadd(&work.nwait, -1)
+			b = (*workbuf)(lfstackpop(&work.full))
+			if b == nil {
+				b = (*workbuf)(lfstackpop(&work.partial))
+			}
+			if b != nil {
+				b.logget(entry)
+				b.checknonempty()
+				return b
+			}
+			xadd(&work.nwait, +1)
+		}
+		if work.nwait == work.nproc {
+			return nil
+		}
+		_g_ := getg()
+		if i < 10 {
+			_g_.m.gcstats.nprocyield++
+			procyield(20)
+		} else if i < 20 {
+			_g_.m.gcstats.nosyield++
+			osyield()
+		} else {
+			_g_.m.gcstats.nsleep++
+			usleep(100)
+		}
+	}
+}
+
+//go:nowritebarrier
+func handoff(b *workbuf) *workbuf {
+	// Make new buffer with half of b's pointers.
+	b1 := getempty(915)
+	n := b.nobj / 2
+	b.nobj -= n
+	b1.nobj = n
+	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0]))
+	_g_ := getg()
+	_g_.m.gcstats.nhandoff++
+	_g_.m.gcstats.nhandoffcnt += uint64(n)
+
+	// Put b on full list - let first half of b get stolen.
+	putfull(b, 942)
+	return b1
+}
+
+// 1 when you are harvesting so that the write buffer code shade can
+// detect calls during a presumable STW write barrier.
+var harvestingwbufs uint32
+
+// harvestwbufs moves non-empty workbufs to work.full from  m.currentwuf
+// Must be in a STW phase.
+// xchguintptr is used since there are write barrier calls from the GC helper
+// routines even during a STW phase.
+// TODO: chase down write barrier calls in STW phase and understand and eliminate
+// them.
+//go:nowritebarrier
+func harvestwbufs() {
+	// announce to write buffer that you are harvesting the currentwbufs
+	atomicstore(&harvestingwbufs, 1)
+
+	for mp := allm; mp != nil; mp = mp.alllink {
+		wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&mp.currentwbuf, 0)))
+		// TODO: beat write barriers out of the mark termination and eliminate xchg
+		//		tempwbuf := (*workbuf)(unsafe.Pointer(tempm.currentwbuf))
+		//		tempm.currentwbuf = 0
+		if wbuf != nil {
+			if wbuf.nobj == 0 {
+				putempty(wbuf, 945)
+			} else {
+				putfull(wbuf, 947) //use full instead of partial so GC doesn't compete to get wbuf
+			}
+		}
+	}
+
+	atomicstore(&harvestingwbufs, 0)
+}
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 058d1c76c4..c7c1198259 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -182,8 +182,14 @@ func (h *hmap) createOverflow() {
 	}
 }
 
-func makemap(t *maptype, hint int64) *hmap {
+// makemap implements a Go map creation make(map[k]v, hint)
+// If the compiler has determined that the map or the first bucket
+// can be created on the stack, h and/or bucket may be non-nil.
+// If h != nil, the map can be created directly in h.
+// If bucket != nil, bucket can be used as the first bucket.
+func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != uintptr(t.hmap.size) {
+		println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
 		throw("bad hmap size")
 	}
 
@@ -238,7 +244,7 @@ func makemap(t *maptype, hint int64) *hmap {
 	// allocate initial hash table
 	// if B == 0, the buckets field is allocated lazily later (in mapassign)
 	// If hint is large zeroing this memory could take a while.
-	var buckets unsafe.Pointer
+	buckets := bucket
 	if B != 0 {
 		if checkgc {
 			memstats.next_gc = memstats.heap_alloc
@@ -250,7 +256,9 @@ func makemap(t *maptype, hint int64) *hmap {
 	if checkgc {
 		memstats.next_gc = memstats.heap_alloc
 	}
-	h := (*hmap)(newobject(t.hmap))
+	if h == nil {
+		h = (*hmap)(newobject(t.hmap))
+	}
 	h.count = 0
 	h.B = B
 	h.flags = 0
@@ -956,7 +964,7 @@ func ismapkey(t *_type) bool {
 
 //go:linkname reflect_makemap reflect.makemap
 func reflect_makemap(t *maptype) *hmap {
-	return makemap(t, 0)
+	return makemap(t, 0, nil, nil)
 }
 
 //go:linkname reflect_mapaccess reflect.mapaccess
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 6cbb5f3775..7b4a846195 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -344,7 +344,7 @@ func dumpgoroutine(gp *g) {
 	dumpint(uint64(gp.goid))
 	dumpint(uint64(gp.gopc))
 	dumpint(uint64(readgstatus(gp)))
-	dumpbool(gp.issystem)
+	dumpbool(isSystemGoroutine(gp))
 	dumpbool(false) // isbackground
 	dumpint(uint64(gp.waitsince))
 	dumpstr(gp.waitreason)
diff --git a/src/runtime/iface_test.go b/src/runtime/iface_test.go
index bca0ea0ee7..bfeb94b8aa 100644
--- a/src/runtime/iface_test.go
+++ b/src/runtime/iface_test.go
@@ -5,6 +5,7 @@
 package runtime_test
 
 import (
+	"runtime"
 	"testing"
 )
 
@@ -38,6 +39,47 @@ var (
 	tl TL
 )
 
+// Issue 9370
+func TestCmpIfaceConcreteAlloc(t *testing.T) {
+	if runtime.Compiler != "gc" {
+		t.Skip("skipping on non-gc compiler")
+	}
+
+	n := testing.AllocsPerRun(1, func() {
+		_ = e == ts
+		_ = i1 == ts
+		_ = e == 1
+	})
+
+	if n > 0 {
+		t.Fatalf("iface cmp allocs=%v; want 0", n)
+	}
+}
+
+func BenchmarkEqEfaceConcrete(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = e == ts
+	}
+}
+
+func BenchmarkEqIfaceConcrete(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = i1 == ts
+	}
+}
+
+func BenchmarkNeEfaceConcrete(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = e != ts
+	}
+}
+
+func BenchmarkNeIfaceConcrete(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = i1 != ts
+	}
+}
+
 func BenchmarkConvT2ESmall(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		e = ts
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index b8b1f4ed36..06ba124473 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -312,6 +312,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
 
 // For now this must be bracketed with a stoptheworld and a starttheworld to ensure
 // all go routines see the new barrier.
+//go:nowritebarrier
 func gcinstallmarkwb() {
 	gcphase = _GCmark
 }
@@ -389,6 +390,7 @@ func gcwork(force int32) {
 		gctimer.cycle.installmarkwb = nanotime()
 		systemstack(stoptheworld)
 		systemstack(gcinstallmarkwb)
+		systemstack(harvestwbufs)
 		systemstack(starttheworld)
 		gctimer.cycle.mark = nanotime()
 		systemstack(gcmark_m)
diff --git a/src/runtime/malloc1.go b/src/runtime/malloc1.go
index 4d0754ba9d..18d998b554 100644
--- a/src/runtime/malloc1.go
+++ b/src/runtime/malloc1.go
@@ -99,7 +99,7 @@ func mallocinit() {
 	var reserved bool
 
 	// limit = runtime.memlimit();
-	// See https://code.google.com/p/go/issues/detail?id=5049
+	// See https://golang.org/issue/5049
 	// TODO(rsc): Fix after 1.1.
 	limit = 0
 
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index 92da2d8209..55f1f82625 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -535,3 +535,13 @@ func benchmarkMapPop(b *testing.B, n int) {
 func BenchmarkMapPop100(b *testing.B)   { benchmarkMapPop(b, 100) }
 func BenchmarkMapPop1000(b *testing.B)  { benchmarkMapPop(b, 1000) }
 func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) }
+
+func TestNonEscapingMap(t *testing.T) {
+	n := testing.AllocsPerRun(1000, func() {
+		m := make(map[int]int)
+		m[0] = 0
+	})
+	if n != 0 {
+		t.Fatalf("want 0 allocs, got %v", n)
+	}
+}
diff --git a/src/runtime/mapspeed_test.go b/src/runtime/mapspeed_test.go
index 119eb3f39c..b036d2a3ab 100644
--- a/src/runtime/mapspeed_test.go
+++ b/src/runtime/mapspeed_test.go
@@ -234,6 +234,15 @@ func BenchmarkNewEmptyMap(b *testing.B) {
 	}
 }
 
+func BenchmarkNewSmallMap(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		m := make(map[int]int)
+		m[0] = 0
+		m[1] = 1
+	}
+}
+
 func BenchmarkMapIter(b *testing.B) {
 	m := make(map[int]bool)
 	for i := 0; i < 8; i++ {
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index 33d67c4976..f6e9269858 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -328,6 +328,13 @@ func typedslicecopy(typ *_type, dst, src slice) int {
 	dstp := unsafe.Pointer(dst.array)
 	srcp := unsafe.Pointer(src.array)
 
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&typ))
+		pc := funcPC(slicecopy)
+		racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc)
+		racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
+	}
+
 	if !needwb() {
 		memmove(dstp, srcp, uintptr(n)*typ.size)
 		return int(n)
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index aa99b762bf..920fbcf6d6 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -54,7 +54,6 @@ func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
 	if uintptr(p) < 4096 {
 		if uintptr(p) == _EACCES {
 			print("runtime: mmap: access denied\n")
-			print("if you're running SELinux, enable execmem for this process.\n")
 			exit(2)
 		}
 		if uintptr(p) == _EAGAIN {
diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go
index 477a52700e..6ceed25d87 100644
--- a/src/runtime/mem_plan9.go
+++ b/src/runtime/mem_plan9.go
@@ -48,6 +48,7 @@ func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
 	n = memRound(n)
 	if bloc == uintptr(v)+n {
 		bloc -= n
+		memclr(unsafe.Pointer(bloc), n)
 	}
 	unlock(&memlock)
 }
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
index d72d49b975..a800ccae1d 100644
--- a/src/runtime/mem_windows.go
+++ b/src/runtime/mem_windows.go
@@ -18,16 +18,6 @@ const (
 	_PAGE_NOACCESS  = 0x0001
 )
 
-//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc "kernel32.dll"
-//go:cgo_import_dynamic runtime._VirtualFree VirtualFree "kernel32.dll"
-//go:cgo_import_dynamic runtime._VirtualProtect VirtualProtect "kernel32.dll"
-
-var (
-	_VirtualAlloc,
-	_VirtualFree,
-	_VirtualProtect stdFunction
-)
-
 //go:nosplit
 func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
 	xadd64(stat, int64(n))
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 28afa0dfab..525aa0955a 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -102,7 +102,10 @@ func wakefing() *g {
 	return res
 }
 
-var fingCreate uint32
+var (
+	fingCreate  uint32
+	fingRunning bool
+)
 
 func createfing() {
 	// start the finalizer goroutine exactly once
@@ -126,9 +129,7 @@ func runfinq() {
 			gp := getg()
 			fing = gp
 			fingwait = true
-			gp.issystem = true
 			goparkunlock(&finlock, "finalizer wait", traceEvGoBlock)
-			gp.issystem = false
 			continue
 		}
 		unlock(&finlock)
@@ -169,7 +170,9 @@ func runfinq() {
 				default:
 					throw("bad kind in runfinq")
 				}
+				fingRunning = true
 				reflectcall(nil, unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
+				fingRunning = false
 
 				// drop finalizer queue references to finalized object
 				f.fn = nil
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 75b1e52916..1a3e70fcdd 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -122,8 +122,6 @@ const (
 	_DebugGC         = 0
 	_DebugGCPtrs     = false // if true, print trace of every pointer load during GC
 	_ConcurrentSweep = true
-
-	_WorkbufSize     = 4 * 256
 	_FinBlockSize    = 4 * 1024
 	_RootData        = 0
 	_RootBss         = 1
@@ -154,12 +152,6 @@ var gcpercent int32
 //
 var worldsema uint32 = 1
 
-type workbuf struct {
-	node lfnode // must be first
-	nobj uintptr
-	obj  [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr
-}
-
 var data, edata, bss, ebss, gcdata, gcbss struct{}
 
 var gcdatamask bitvector
@@ -347,7 +339,8 @@ func greyobject(obj, base, off uintptr, hbits heapBits, wbuf *workbuf) *workbuf
 
 	// If workbuf is full, obtain an empty one.
 	if wbuf.nobj >= uintptr(len(wbuf.obj)) {
-		wbuf = getempty(wbuf)
+		putfull(wbuf, 358)
+		wbuf = getempty(359)
 	}
 
 	wbuf.obj[wbuf.nobj] = obj
@@ -426,13 +419,10 @@ func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf {
 	return wbuf
 }
 
-// scanblock starts by scanning b as scanobject would.
-// If the gcphase is GCscan, that's all scanblock does.
-// Otherwise it traverses some fraction of the pointers it found in b, recursively.
-// As a special case, scanblock(nil, 0, nil) means to scan previously queued work,
-// stopping only when no work is left in the system.
+// scanblock scans b as scanobject would.
+// If the gcphase is GCscan, scanblock performs additional checks.
 //go:nowritebarrier
-func scanblock(b0, n0 uintptr, ptrmask *uint8) {
+func scanblock(b0, n0 uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf {
 	// Use local copies of original parameters, so that a stack trace
 	// due to one of the throws below shows the original block
 	// base and extent.
@@ -443,48 +433,40 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
 	// 1. nil - obtain pointer mask from GC bitmap.
 	// 2. pointer to a compact mask (for stacks and data).
 
-	wbuf := getpartialorempty()
-	if b != 0 {
-		wbuf = scanobject(b, n, ptrmask, wbuf)
-		if gcphase == _GCscan {
-			if inheap(b) && ptrmask == nil {
-				// b is in heap, we are in GCscan so there should be a ptrmask.
-				throw("scanblock: In GCscan phase and inheap is true.")
-			}
-			// GCscan only goes one level deep since mark wb not turned on.
-			putpartial(wbuf)
-			return
+	if wbuf == nil {
+		wbuf = getpartialorempty(460) // no wbuf passed in.
+	}
+	wbuf = scanobject(b, n, ptrmask, wbuf)
+	if gcphase == _GCscan {
+		if inheap(b) && ptrmask == nil {
+			// b is in heap, we are in GCscan so there should be a ptrmask.
+			throw("scanblock: In GCscan phase and inheap is true.")
 		}
 	}
-
-	drainallwbufs := b == 0
-	drainworkbuf(wbuf, drainallwbufs)
+	return wbuf
 }
 
-// Scan objects in wbuf until wbuf is empty.
-// If drainallwbufs is true find all other available workbufs and repeat the process.
+// gcDrain scans objects in work buffers (starting with wbuf), blackening grey
+// objects until all work buffers have been drained.
 //go:nowritebarrier
-func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
+func gcDrain(wbuf *workbuf) {
+	if wbuf == nil {
+		wbuf = getpartialorempty(472)
+	}
+	checknocurrentwbuf()
 	if gcphase != _GCmark && gcphase != _GCmarktermination {
-		println("gcphase", gcphase)
-		throw("scanblock phase")
+		throw("scanblock phase incorrect")
 	}
 
 	for {
 		if wbuf.nobj == 0 {
-			if !drainallwbufs {
-				putempty(wbuf)
-				return
-			}
+			putempty(wbuf, 496)
 			// Refill workbuf from global queue.
-			wbuf = getfull(wbuf)
+			wbuf = getfull(504)
 			if wbuf == nil { // nil means out of work barrier reached
-				return
-			}
-
-			if wbuf.nobj <= 0 {
-				throw("runtime:scanblock getfull returns empty buffer")
+				break
 			}
+			wbuf.checknonempty()
 		}
 
 		// If another proc wants a pointer, give it some.
@@ -493,52 +475,61 @@ func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
 		}
 
 		// This might be a good place to add prefetch code...
-		// if(wbuf->nobj > 4) {
-		//         PREFETCH(wbuf->obj[wbuf->nobj - 3];
+		// if(wbuf.nobj > 4) {
+		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
 		//  }
 		wbuf.nobj--
 		b := wbuf.obj[wbuf.nobj]
+		// If the current wbuf is filled by the scan a new wbuf might be
+		// returned that could possibly hold only a single object. This
+		// could result in each iteration draining only a single object
+		// out of the wbuf passed in + a single object placed
+		// into an empty wbuf in scanobject so there could be
+		// a performance hit as we keep fetching fresh wbufs.
 		wbuf = scanobject(b, 0, nil, wbuf)
 	}
+	checknocurrentwbuf()
 }
 
-// Scan count objects starting with those in wbuf.
+// gcDrainN scans n objects starting with those in wbuf, blackening
+// grey objects.
 //go:nowritebarrier
-func drainobjects(wbuf *workbuf, count uintptr) {
-	for i := uintptr(0); i < count; i++ {
+func gcDrainN(wbuf *workbuf, n uintptr) *workbuf {
+	checknocurrentwbuf()
+	for i := uintptr(0); i < n; i++ {
 		if wbuf.nobj == 0 {
-			putempty(wbuf)
-			wbuf = trygetfull()
+			putempty(wbuf, 544)
+			wbuf = trygetfull(545)
 			if wbuf == nil {
-				return
+				return nil
 			}
 		}
 
 		// This might be a good place to add prefetch code...
-		// if(wbuf->nobj > 4) {
-		//         PREFETCH(wbuf->obj[wbuf->nobj - 3];
+		// if(wbuf.nobj > 4) {
+		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
 		//  }
 		wbuf.nobj--
 		b := wbuf.obj[wbuf.nobj]
 		wbuf = scanobject(b, 0, nil, wbuf)
 	}
-	putpartial(wbuf)
-	return
+	return wbuf
 }
 
 //go:nowritebarrier
 func markroot(desc *parfor, i uint32) {
 	// Note: if you add a case here, please also update heapdump.c:dumproots.
+	wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0)))
 	switch i {
 	case _RootData:
-		scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata)
+		wbuf = scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata, wbuf)
 
 	case _RootBss:
-		scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata)
+		wbuf = scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata, wbuf)
 
 	case _RootFinalizers:
 		for fb := allfin; fb != nil; fb = fb.alllink {
-			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0])
+			wbuf = scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], wbuf)
 		}
 
 	case _RootSpans:
@@ -564,9 +555,9 @@ func markroot(desc *parfor, i uint32) {
 				// A finalizer can be set for an inner byte of an object, find object beginning.
 				p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
 				if gcphase != _GCscan {
-					scanblock(p, s.elemsize, nil) // scanned during mark phase
+					wbuf = scanblock(p, s.elemsize, nil, wbuf) // scanned during mark phase
 				}
-				scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0])
+				wbuf = scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], wbuf)
 			}
 		}
 
@@ -626,157 +617,14 @@ func markroot(desc *parfor, i uint32) {
 			restartg(gp)
 		}
 	}
-}
-
-// Get an empty work buffer off the work.empty list,
-// allocating new buffers as needed.
-//go:nowritebarrier
-func getempty(b *workbuf) *workbuf {
-	if b != nil {
-		putfull(b)
-		b = nil
-	}
-	if work.empty != 0 {
-		b = (*workbuf)(lfstackpop(&work.empty))
-	}
-	if b != nil && b.nobj != 0 {
-		_g_ := getg()
-		print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n")
-		throw("getempty: workbuffer not empty, b->nobj not 0")
-	}
-	if b == nil {
-		b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
-		b.nobj = 0
-	}
-	return b
-}
-
-//go:nowritebarrier
-func putempty(b *workbuf) {
-	if b.nobj != 0 {
-		throw("putempty: b->nobj not 0")
-	}
-	lfstackpush(&work.empty, &b.node)
-}
-
-//go:nowritebarrier
-func putfull(b *workbuf) {
-	if b.nobj <= 0 {
-		throw("putfull: b->nobj <= 0")
-	}
-	lfstackpush(&work.full, &b.node)
-}
-
-// Get an partially empty work buffer
-// if none are available get an empty one.
-//go:nowritebarrier
-func getpartialorempty() *workbuf {
-	b := (*workbuf)(lfstackpop(&work.partial))
-	if b == nil {
-		b = getempty(nil)
-	}
-	return b
-}
-
-//go:nowritebarrier
-func putpartial(b *workbuf) {
-	if b.nobj == 0 {
-		lfstackpush(&work.empty, &b.node)
-	} else if b.nobj < uintptr(len(b.obj)) {
-		lfstackpush(&work.partial, &b.node)
-	} else if b.nobj == uintptr(len(b.obj)) {
-		lfstackpush(&work.full, &b.node)
-	} else {
-		print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n")
-		throw("putpartial: bad Workbuf b.nobj")
-	}
-}
-
-// trygetfull tries to get a full or partially empty workbuffer.
-// if one is not immediately available return nil
-//go:nowritebarrier
-func trygetfull() *workbuf {
-	wbuf := (*workbuf)(lfstackpop(&work.full))
 	if wbuf == nil {
-		wbuf = (*workbuf)(lfstackpop(&work.partial))
-	}
-	return wbuf
-}
-
-// Get a full work buffer off the work.full or a partially
-// filled one off the work.partial list. If nothing is available
-// wait until all the other gc helpers have finished and then
-// return nil.
-// getfull acts as a barrier for work.nproc helpers. As long as one
-// gchelper is actively marking objects it
-// may create a workbuffer that the other helpers can work on.
-// The for loop either exits when a work buffer is found
-// or when _all_ of the work.nproc GC helpers are in the loop
-// looking for work and thus not capable of creating new work.
-// This is in fact the termination condition for the STW mark
-// phase.
-//go:nowritebarrier
-func getfull(b *workbuf) *workbuf {
-	if b != nil {
-		putempty(b)
-	}
-
-	b = (*workbuf)(lfstackpop(&work.full))
-	if b == nil {
-		b = (*workbuf)(lfstackpop(&work.partial))
-	}
-	if b != nil {
-		return b
-	}
-
-	xadd(&work.nwait, +1)
-	for i := 0; ; i++ {
-		if work.full != 0 {
-			xadd(&work.nwait, -1)
-			b = (*workbuf)(lfstackpop(&work.full))
-			if b == nil {
-				b = (*workbuf)(lfstackpop(&work.partial))
-			}
-			if b != nil {
-				return b
-			}
-			xadd(&work.nwait, +1)
-		}
-		if work.nwait == work.nproc {
-			return nil
-		}
-		_g_ := getg()
-		if i < 10 {
-			_g_.m.gcstats.nprocyield++
-			procyield(20)
-		} else if i < 20 {
-			_g_.m.gcstats.nosyield++
-			osyield()
-		} else {
-			_g_.m.gcstats.nsleep++
-			usleep(100)
-		}
+		return
+	} else {
+		putpartial(wbuf, 670)
 	}
 }
 
 //go:nowritebarrier
-func handoff(b *workbuf) *workbuf {
-	// Make new buffer with half of b's pointers.
-	b1 := getempty(nil)
-	n := b.nobj / 2
-	b.nobj -= n
-	b1.nobj = n
-	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0]))
-	_g_ := getg()
-	_g_.m.gcstats.nhandoff++
-	_g_.m.gcstats.nhandoffcnt += uint64(n)
-
-	// Put b on full list - let first half of b get stolen.
-	lfstackpush(&work.full, &b.node)
-	return b1
-}
-
-//go:nowritebarrier
 func stackmapdata(stkmap *stackmap, n int32) bitvector {
 	if n < 0 || n >= stkmap.n {
 		throw("stackmapdata: index out of range")
@@ -786,13 +634,13 @@ func stackmapdata(stkmap *stackmap, n int32) bitvector {
 
 // Scan a stack frame: local variables and function arguments/results.
 //go:nowritebarrier
-func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
+func scanframeworker(frame *stkframe, unused unsafe.Pointer, wbuf *workbuf) *workbuf {
 
 	f := frame.fn
 	targetpc := frame.continpc
 	if targetpc == 0 {
 		// Frame is dead.
-		return true
+		return wbuf
 	}
 	if _DebugGC > 1 {
 		print("scanframe ", funcname(f), "\n")
@@ -831,7 +679,7 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
 		}
 		bv := stackmapdata(stkmap, pcdata)
 		size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
-		scanblock(frame.varp-size, size, bv.bytedata)
+		wbuf = scanblock(frame.varp-size, size, bv.bytedata, wbuf)
 	}
 
 	// Scan arguments.
@@ -852,9 +700,9 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
 			}
 			bv = stackmapdata(stkmap, pcdata)
 		}
-		scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata)
+		wbuf = scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, wbuf)
 	}
-	return true
+	return wbuf
 }
 
 //go:nowritebarrier
@@ -889,8 +737,19 @@ func scanstack(gp *g) {
 		throw("can't scan gchelper stack")
 	}
 
+	wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0)))
+	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
+		// Pick up wbuf as free variable so gentraceback and friends can
+		// keep the same signature.
+		wbuf = scanframeworker(frame, unused, wbuf)
+		return true
+	}
 	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
 	tracebackdefers(gp, scanframe, nil)
+	wbuf = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, uintptr(unsafe.Pointer(wbuf)))))
+	if wbuf != nil {
+		throw("wbuf not nil after stack scans")
+	}
 	gp.gcscanvalid = true
 }
 
@@ -898,17 +757,34 @@ func scanstack(gp *g) {
 // The object is not nil and known to be in the heap.
 //go:nowritebarrier
 func shade(b uintptr) {
+	var wbuf *workbuf
+
 	if !inheap(b) {
 		throw("shade: passed an address not in the heap")
 	}
-
-	wbuf := getpartialorempty()
-
 	if obj, hbits := heapBitsForObject(b); obj != 0 {
-		wbuf = greyobject(obj, 0, 0, hbits, wbuf)
-	}
+		// TODO: this would be a great place to put a check to see
+		// if we are harvesting and if we are then we should
+		// figure out why there is a call to shade when the
+		// harvester thinks we are in a STW.
+		// if atomicload(&harvestingwbufs) == uint32(1) {
+		//	// Throw here to discover write barriers
+		//	// being executed during a STW.
+		// }
 
-	putpartial(wbuf)
+		wbuf = getpartialorempty(1181)
+		wbuf := greyobject(obj, 0, 0, hbits, wbuf)
+		checknocurrentwbuf()
+		// This is part of the write barrier so put the wbuf back.
+		if gcphase == _GCmarktermination {
+			putpartial(wbuf, 1191) // Put on full???
+		} else {
+			wbuf = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, uintptr(unsafe.Pointer(wbuf)))))
+			if wbuf != nil {
+				throw("m.currentwbuf lost in shade")
+			}
+		}
+	}
 }
 
 // gchelpwork does a small bounded amount of gc work. The purpose is to
@@ -931,10 +807,24 @@ func gchelpwork() {
 		// scanstack(gp)
 	case _GCmark:
 		// Get a full work buffer and empty it.
-		var wbuf *workbuf
-		wbuf = trygetfull()
+		m := getg().m
+		// drain your own currentwbuf first in the hopes that it will
+		// be more cache friendly.
+		wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&m.currentwbuf, 0)))
+		//		wbuf := (*workbuf)(unsafe.Pointer(m.currentwbuf))
+		//		m.currentwbuf = 0
+		if wbuf == nil {
+			wbuf = trygetfull(1228)
+		}
 		if wbuf != nil {
-			drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
+			wbuf = gcDrainN(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
+			if wbuf != nil {
+				if wbuf.nobj != 0 {
+					putfull(wbuf, 1175)
+				} else {
+					putempty(wbuf, 1177)
+				}
+			}
 		}
 	case _GCmarktermination:
 		// We should never be here since the world is stopped.
@@ -1249,7 +1139,7 @@ func gchelper() {
 	// parallel mark for over GC roots
 	parfordo(work.markfor)
 	if gcphase != _GCscan {
-		scanblock(0, 0, nil) // blocks in getfull
+		gcDrain(nil) // blocks in getfull
 	}
 
 	if trace.enabled {
@@ -1380,7 +1270,7 @@ var heapminimum = uint64(4 << 20)
 
 func gcinit() {
 	if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
-		throw("runtime: size of Workbuf is suboptimal")
+		throw("size of Workbuf is suboptimal")
 	}
 
 	work.markfor = parforalloc(_MaxGcproc)
@@ -1508,16 +1398,12 @@ func gcscan_m() {
 }
 
 // Mark all objects that are known about.
+// This is the concurrent mark phase.
 //go:nowritebarrier
 func gcmark_m() {
-	scanblock(0, 0, nil)
-}
-
-// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
-// all go routines see the new barrier.
-//go:nowritebarrier
-func gcinstallmarkwb_m() {
-	gcphase = _GCmark
+	gcDrain(nil)
+	// TODO add another harvestwbuf and reset work.nwait=0, work.ndone=0, and work.nproc=1
+	// and repeat the above gcDrain.
 }
 
 // For now this must be bracketed with a stoptheworld and a starttheworld to ensure
@@ -1527,12 +1413,14 @@ func gcinstalloffwb_m() {
 	gcphase = _GCoff
 }
 
+// STW is in effect at this point.
 //TODO go:nowritebarrier
 func gc(start_time int64, eagersweep bool) {
 	if _DebugGCPtrs {
 		print("GC start\n")
 	}
 
+	gcphase = _GCmarktermination
 	if debug.allocfreetrace > 0 {
 		tracegc()
 	}
@@ -1571,12 +1459,10 @@ func gc(start_time int64, eagersweep bool) {
 	mheap_.gcspans = mheap_.allspans
 	work.spans = h_allspans
 	unlock(&mheap_.lock)
-	oldphase := gcphase
 
 	work.nwait = 0
 	work.ndone = 0
 	work.nproc = uint32(gcprocs())
-	gcphase = _GCmarktermination
 
 	// World is stopped so allglen will not change.
 	for i := uintptr(0); i < allglen; i++ {
@@ -1599,9 +1485,10 @@ func gc(start_time int64, eagersweep bool) {
 		t2 = nanotime()
 	}
 
+	harvestwbufs() // move local workbufs onto global queues where the GC can find them
 	gchelperstart()
 	parfordo(work.markfor)
-	scanblock(0, 0, nil)
+	gcDrain(nil)
 
 	if work.full != 0 {
 		throw("work.full != 0")
@@ -1610,7 +1497,7 @@ func gc(start_time int64, eagersweep bool) {
 		throw("work.partial != 0")
 	}
 
-	gcphase = oldphase
+	gcphase = _GCoff
 	var t3 int64
 	if debug.gctrace > 0 {
 		t3 = nanotime()
@@ -1755,7 +1642,7 @@ func readGCStats_m(pauses *[]uint64) {
 	p := *pauses
 	// Calling code in runtime/debug should make the slice large enough.
 	if cap(p) < len(memstats.pause_ns)+3 {
-		throw("runtime: short slice passed to readGCStats")
+		throw("short slice passed to readGCStats")
 	}
 
 	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
index bbd786d519..f54d93377d 100644
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -62,7 +62,6 @@ func clearpools() {
 // bggc holds the state of the backgroundgc.
 func backgroundgc() {
 	bggc.g = getg()
-	bggc.g.issystem = true
 	for {
 		gcwork(0)
 		lock(&bggc.lock)
@@ -73,7 +72,6 @@ func backgroundgc() {
 
 func bgsweep() {
 	sweep.g = getg()
-	getg().issystem = true
 	for {
 		for gosweepone() != ^uintptr(0) {
 			sweep.nbgsweep++
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 11bc809ec1..13f1b363d0 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -318,8 +318,7 @@ HaveSpan:
 		t.needzero = s.needzero
 		s.state = _MSpanStack // prevent coalescing with s
 		t.state = _MSpanStack
-		mHeap_FreeSpanLocked(h, t, false, false)
-		t.unusedsince = s.unusedsince // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
+		mHeap_FreeSpanLocked(h, t, false, false, s.unusedsince)
 		s.state = _MSpanFree
 	}
 	s.unusedsince = 0
@@ -395,7 +394,7 @@ func mHeap_Grow(h *mheap, npage uintptr) bool {
 	h_spans[p+s.npages-1] = s
 	atomicstore(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
-	mHeap_FreeSpanLocked(h, s, false, true)
+	mHeap_FreeSpanLocked(h, s, false, true, 0)
 	return true
 }
 
@@ -442,7 +441,7 @@ func mHeap_Free(h *mheap, s *mspan, acct int32) {
 			memstats.heap_alloc -= uint64(s.npages << _PageShift)
 			memstats.heap_objects--
 		}
-		mHeap_FreeSpanLocked(h, s, true, true)
+		mHeap_FreeSpanLocked(h, s, true, true, 0)
 		if trace.enabled {
 			traceHeapAlloc()
 		}
@@ -458,11 +457,11 @@ func mHeap_FreeStack(h *mheap, s *mspan) {
 	s.needzero = 1
 	lock(&h.lock)
 	memstats.stacks_inuse -= uint64(s.npages << _PageShift)
-	mHeap_FreeSpanLocked(h, s, true, true)
+	mHeap_FreeSpanLocked(h, s, true, true, 0)
 	unlock(&h.lock)
 }
 
-func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool) {
+func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool, unusedsince int64) {
 	switch s.state {
 	case _MSpanStack:
 		if s.ref != 0 {
@@ -488,7 +487,10 @@ func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool) {
 
 	// Stamp newly unused spans. The scavenger will use that
 	// info to potentially give back some pages to the OS.
-	s.unusedsince = nanotime()
+	s.unusedsince = unusedsince
+	if unusedsince == 0 {
+		s.unusedsince = nanotime()
+	}
 	s.npreleased = 0
 
 	// Coalesce with earlier, later spans.
diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go
index 8e0750d607..0861e20f0c 100644
--- a/src/runtime/netpoll_windows.go
+++ b/src/runtime/netpoll_windows.go
@@ -10,16 +10,6 @@ import (
 
 const _DWORD_MAX = 0xffffffff
 
-//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
-//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
-
-var (
-	_CreateIoCompletionPort,
-	_GetQueuedCompletionStatus,
-	_WSAGetOverlappedResult stdFunction
-)
-
 const _INVALID_HANDLE_VALUE = ^uintptr(0)
 
 // net_op must be the same as beginning of net.operation. Keep these in sync.
diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index 8655c083b2..2de6b09343 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -11,6 +11,7 @@ import (
 //go:cgo_import_dynamic runtime._AddVectoredExceptionHandler AddVectoredExceptionHandler "kernel32.dll"
 //go:cgo_import_dynamic runtime._CloseHandle CloseHandle "kernel32.dll"
 //go:cgo_import_dynamic runtime._CreateEventA CreateEventA "kernel32.dll"
+//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
 //go:cgo_import_dynamic runtime._CreateThread CreateThread "kernel32.dll"
 //go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA "kernel32.dll"
 //go:cgo_import_dynamic runtime._CryptAcquireContextW CryptAcquireContextW "advapi32.dll"
@@ -21,6 +22,7 @@ import (
 //go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetEnvironmentStringsW GetEnvironmentStringsW "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetProcAddress GetProcAddress "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetStdHandle GetStdHandle "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext "kernel32.dll"
@@ -37,14 +39,22 @@ import (
 //go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer "kernel32.dll"
 //go:cgo_import_dynamic runtime._Sleep Sleep "kernel32.dll"
 //go:cgo_import_dynamic runtime._SuspendThread SuspendThread "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualFree VirtualFree "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualProtect VirtualProtect "kernel32.dll"
+//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
 //go:cgo_import_dynamic runtime._WaitForSingleObject WaitForSingleObject "kernel32.dll"
 //go:cgo_import_dynamic runtime._WriteFile WriteFile "kernel32.dll"
 //go:cgo_import_dynamic runtime._timeBeginPeriod timeBeginPeriod "winmm.dll"
 
 var (
+	// Following syscalls are available on every Windows PC.
+	// All these variables are set by the Windows executable
+	// loader before the Go program starts.
 	_AddVectoredExceptionHandler,
 	_CloseHandle,
 	_CreateEventA,
+	_CreateIoCompletionPort,
 	_CreateThread,
 	_CreateWaitableTimerA,
 	_CryptAcquireContextW,
@@ -55,6 +65,7 @@ var (
 	_FreeEnvironmentStringsW,
 	_GetEnvironmentStringsW,
 	_GetProcAddress,
+	_GetQueuedCompletionStatus,
 	_GetStdHandle,
 	_GetSystemInfo,
 	_GetThreadContext,
@@ -71,12 +82,36 @@ var (
 	_SetWaitableTimer,
 	_Sleep,
 	_SuspendThread,
+	_VirtualAlloc,
+	_VirtualFree,
+	_VirtualProtect,
+	_WSAGetOverlappedResult,
 	_WaitForSingleObject,
 	_WriteFile,
 	_timeBeginPeriod stdFunction
+
+	// Following syscalls are only available on some Windows PCs.
+	// We will load syscalls, if available, before using them.
+	_AddVectoredContinueHandler,
+	_GetQueuedCompletionStatusEx stdFunction
 )
 
-var _GetQueuedCompletionStatusEx stdFunction
+func loadOptionalSyscalls() {
+	var buf [50]byte // large enough for longest string
+	strtoptr := func(s string) uintptr {
+		buf[copy(buf[:], s)] = 0 // nil-terminated for OS
+		return uintptr(noescape(unsafe.Pointer(&buf[0])))
+	}
+	l := stdcall1(_LoadLibraryA, strtoptr("kernel32.dll"))
+	findfunc := func(name string) stdFunction {
+		f := stdcall2(_GetProcAddress, l, strtoptr(name))
+		return stdFunction(unsafe.Pointer(f))
+	}
+	if l != 0 {
+		_AddVectoredContinueHandler = findfunc("AddVectoredContinueHandler")
+		_GetQueuedCompletionStatusEx = findfunc("GetQueuedCompletionStatusEx")
+	}
+}
 
 // in sys_windows_386.s and sys_windows_amd64.s
 func externalthreadhandler()
@@ -117,34 +152,24 @@ func disableWER() {
 	stdcall1(_SetErrorMode, uintptr(errormode)|SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX)
 }
 
-var (
-	kernel32Name                    = []byte("kernel32.dll\x00")
-	addVectoredContinueHandlerName  = []byte("AddVectoredContinueHandler\x00")
-	getQueuedCompletionStatusExName = []byte("GetQueuedCompletionStatusEx\x00")
-)
-
 func osinit() {
 	setBadSignalMsg()
 
-	kernel32 := stdcall1(_LoadLibraryA, uintptr(unsafe.Pointer(&kernel32Name[0])))
+	loadOptionalSyscalls()
 
 	disableWER()
 
 	externalthreadhandlerp = funcPC(externalthreadhandler)
 
 	stdcall2(_AddVectoredExceptionHandler, 1, funcPC(exceptiontramp))
-	addVectoredContinueHandler := uintptr(0)
-	if kernel32 != 0 {
-		addVectoredContinueHandler = stdcall2(_GetProcAddress, kernel32, uintptr(unsafe.Pointer(&addVectoredContinueHandlerName[0])))
-	}
-	if addVectoredContinueHandler == 0 || unsafe.Sizeof(&kernel32) == 4 {
+	if _AddVectoredContinueHandler == nil || unsafe.Sizeof(&_AddVectoredContinueHandler) == 4 {
 		// use SetUnhandledExceptionFilter for windows-386 or
 		// if VectoredContinueHandler is unavailable.
 		// note: SetUnhandledExceptionFilter handler won't be called, if debugging.
 		stdcall1(_SetUnhandledExceptionFilter, funcPC(lastcontinuetramp))
 	} else {
-		stdcall2(stdFunction(unsafe.Pointer(addVectoredContinueHandler)), 1, funcPC(firstcontinuetramp))
-		stdcall2(stdFunction(unsafe.Pointer(addVectoredContinueHandler)), 0, funcPC(lastcontinuetramp))
+		stdcall2(_AddVectoredContinueHandler, 1, funcPC(firstcontinuetramp))
+		stdcall2(_AddVectoredContinueHandler, 0, funcPC(lastcontinuetramp))
 	}
 
 	stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
@@ -158,10 +183,6 @@ func osinit() {
 	// equivalent threads that all do a mix of GUI, IO, computations, etc.
 	// In such context dynamic priority boosting does nothing but harm, so we turn it off.
 	stdcall2(_SetProcessPriorityBoost, currentProcess, 1)
-
-	if kernel32 != 0 {
-		_GetQueuedCompletionStatusEx = stdFunction(unsafe.Pointer(stdcall2(_GetProcAddress, kernel32, uintptr(unsafe.Pointer(&getQueuedCompletionStatusExName[0])))))
-	}
 }
 
 //go:nosplit
@@ -506,7 +527,7 @@ func profilem(mp *m) {
 	r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
 	r.contextflags = _CONTEXT_CONTROL
 	stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
-	dosigprof(r, gp, mp)
+	sigprof((*byte)(unsafe.Pointer(r.ip())), (*byte)(unsafe.Pointer(r.sp())), nil, gp, mp)
 }
 
 func profileloop1() {
diff --git a/src/runtime/os1_windows_386.go b/src/runtime/os1_windows_386.go
index 7b4fdfe94a..b7eae204d1 100644
--- a/src/runtime/os1_windows_386.go
+++ b/src/runtime/os1_windows_386.go
@@ -118,7 +118,3 @@ func sigenable(sig uint32) {
 
 func sigdisable(sig uint32) {
 }
-
-func dosigprof(r *context, gp *g, mp *m) {
-	sigprof((*byte)(unsafe.Pointer(uintptr(r.eip))), (*byte)(unsafe.Pointer(uintptr(r.esp))), nil, gp, mp)
-}
diff --git a/src/runtime/os1_windows_amd64.go b/src/runtime/os1_windows_amd64.go
index c211f6fd91..4163fcf23d 100644
--- a/src/runtime/os1_windows_amd64.go
+++ b/src/runtime/os1_windows_amd64.go
@@ -137,7 +137,3 @@ func sigenable(sig uint32) {
 
 func sigdisable(sig uint32) {
 }
-
-func dosigprof(r *context, gp *g, mp *m) {
-	sigprof((*byte)(unsafe.Pointer(uintptr(r.rip))), (*byte)(unsafe.Pointer(uintptr(r.rsp))), nil, gp, mp)
-}
diff --git a/src/runtime/os3_plan9.go b/src/runtime/os3_plan9.go
index 58ca0e7b2f..facaab2546 100644
--- a/src/runtime/os3_plan9.go
+++ b/src/runtime/os3_plan9.go
@@ -72,9 +72,9 @@ func sighandler(_ureg *ureg, note *byte, gp *g) int {
 		return _NCONT
 	}
 	if flags&_SigNotify != 0 {
-		// TODO(ality): See if os/signal wants it.
-		//if(sigsend(...))
-		//	return _NCONT;
+		if sendNote(note) {
+			return _NCONT
+		}
 	}
 	if flags&_SigKill != 0 {
 		goto Exit
diff --git a/src/runtime/os_darwin_arm.go b/src/runtime/os_darwin_arm.go
new file mode 100644
index 0000000000..d3336c012a
--- /dev/null
+++ b/src/runtime/os_darwin_arm.go
@@ -0,0 +1,17 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func checkgoarm() {
+	return // TODO(minux)
+}
+
+//go:nosplit
+func cputicks() int64 {
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand1.
+	return nanotime()
+}
diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
index 679bf34519..870404b948 100644
--- a/src/runtime/os_plan9.go
+++ b/src/runtime/os_plan9.go
@@ -6,6 +6,11 @@ package runtime
 
 import "unsafe"
 
+func close(fd int32) int32
+
+//go:noescape
+func open(name *byte, mode, perm int32) int32
+
 //go:noescape
 func pread(fd int32, buf unsafe.Pointer, nbytes int32, offset int64) int32
 
diff --git a/src/runtime/os_windows_386.go b/src/runtime/os_windows_386.go
deleted file mode 100644
index 86a1906c0c..0000000000
--- a/src/runtime/os_windows_386.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-// contextPC returns the EIP (program counter) register from the context.
-func contextPC(r *context) uintptr { return uintptr(r.eip) }
-
-// contextSP returns the ESP (stack pointer) register from the context.
-func contextSP(r *context) uintptr { return uintptr(r.esp) }
diff --git a/src/runtime/os_windows_amd64.go b/src/runtime/os_windows_amd64.go
deleted file mode 100644
index 3f4d4d07cb..0000000000
--- a/src/runtime/os_windows_amd64.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-// contextPC returns the RIP (program counter) register from the context.
-func contextPC(r *context) uintptr { return uintptr(r.rip) }
-
-// contextSP returns the RSP (stack pointer) register from the context.
-func contextSP(r *context) uintptr { return uintptr(r.rsp) }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index c8f6de1ac8..027416a9ec 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -32,7 +32,9 @@ func main() {
 		maxstacksize = 250000000
 	}
 
-	systemstack(newsysmon)
+	systemstack(func() {
+		newm(sysmon, nil)
+	})
 
 	// Lock the main goroutine onto this, the main OS thread,
 	// during initialization.  Most programs won't care, but a few
@@ -110,7 +112,6 @@ func init() {
 
 func forcegchelper() {
 	forcegc.g = getg()
-	forcegc.g.issystem = true
 	for {
 		lock(&forcegc.lock)
 		if forcegc.idle != 0 {
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 70addbffad..471ffc83a3 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -81,10 +81,6 @@ func schedinit() {
 	}
 }
 
-func newsysmon() {
-	_newm(sysmon, nil)
-}
-
 func dumpgstatus(gp *g) {
 	_g_ := getg()
 	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
@@ -638,7 +634,7 @@ func starttheworld() {
 			notewakeup(&mp.park)
 		} else {
 			// Start M to run P.  Do not start another M below.
-			_newm(nil, p)
+			newm(nil, p)
 			add = false
 		}
 	}
@@ -658,7 +654,7 @@ func starttheworld() {
 		// coordinate.  This lazy approach works out in practice:
 		// we don't mind if the first couple gc rounds don't have quite
 		// the maximum number of procs.
-		_newm(mhelpgc, nil)
+		newm(mhelpgc, nil)
 	}
 	_g_.m.locks--
 	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
@@ -960,7 +956,7 @@ func unlockextra(mp *m) {
 }
 
 // Create a new m.  It will start off with a call to fn, or else the scheduler.
-func _newm(fn func(), _p_ *p) {
+func newm(fn func(), _p_ *p) {
 	mp := allocm(_p_)
 	mp.nextp = _p_
 	mp.mstartfn = *(*unsafe.Pointer)(unsafe.Pointer(&fn))
@@ -1037,7 +1033,7 @@ func startm(_p_ *p, spinning bool) {
 		if spinning {
 			fn = mspinning
 		}
-		_newm(fn, _p_)
+		newm(fn, _p_)
 		return
 	}
 	if mp.spinning {
@@ -2636,7 +2632,7 @@ func checkdead() {
 	lock(&allglock)
 	for i := 0; i < len(allgs); i++ {
 		gp := allgs[i]
-		if gp.issystem {
+		if isSystemGoroutine(gp) {
 			continue
 		}
 		s := readgstatus(gp)
@@ -2667,7 +2663,7 @@ func checkdead() {
 		}
 		mp := mget()
 		if mp == nil {
-			_newm(nil, _p_)
+			newm(nil, _p_)
 		} else {
 			mp.nextp = _p_
 			notewakeup(&mp.park)
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 3b78b01ca3..88cd48486a 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -435,6 +435,18 @@ func BenchmarkCreateGoroutinesCapture(b *testing.B) {
 	}
 }
 
+func BenchmarkClosureCall(b *testing.B) {
+	sum := 0
+	off1 := 1
+	for i := 0; i < b.N; i++ {
+		off2 := 2
+		func() {
+			sum += i + off1 + off2
+		}()
+	}
+	_ = sum
+}
+
 type Matrix [][]float64
 
 func BenchmarkMatmult(b *testing.B) {
diff --git a/src/runtime/race/race_test.go b/src/runtime/race/race_test.go
index 7e0ee866a6..f4caff0ed4 100644
--- a/src/runtime/race/race_test.go
+++ b/src/runtime/race/race_test.go
@@ -152,7 +152,7 @@ func runTests() ([]byte, error) {
 		}
 		cmd.Env = append(cmd.Env, env)
 	}
-	cmd.Env = append(cmd.Env, `GORACE="suppress_equal_stacks=0 suppress_equal_addresses=0 exitcode=0"`)
+	cmd.Env = append(cmd.Env, `GORACE=suppress_equal_stacks=0 suppress_equal_addresses=0 exitcode=0`)
 	return cmd.CombinedOutput()
 }
 
diff --git a/src/runtime/race/testdata/slice_test.go b/src/runtime/race/testdata/slice_test.go
index 5702d1ac85..32ae878970 100644
--- a/src/runtime/race/testdata/slice_test.go
+++ b/src/runtime/race/testdata/slice_test.go
@@ -144,6 +144,54 @@ func TestNoRaceSliceCopyRead(t *testing.T) {
 	<-ch
 }
 
+func TestRacePointerSliceCopyRead(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]*int, 10)
+	b := make([]*int, 10)
+	go func() {
+		_ = a[5]
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
+func TestNoRacePointerSliceWriteCopy(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]*int, 10)
+	b := make([]*int, 10)
+	go func() {
+		a[5] = new(int)
+		ch <- true
+	}()
+	copy(a[:5], b[:5])
+	<-ch
+}
+
+func TestRacePointerSliceCopyWrite2(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]*int, 10)
+	b := make([]*int, 10)
+	go func() {
+		b[5] = new(int)
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
+func TestNoRacePointerSliceCopyRead(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]*int, 10)
+	b := make([]*int, 10)
+	go func() {
+		_ = b[5]
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
 func TestNoRaceSliceWriteSlice2(t *testing.T) {
 	ch := make(chan bool, 1)
 	a := make([]float64, 10)
@@ -395,6 +443,53 @@ func TestRaceSliceAppendString(t *testing.T) {
 	<-c
 }
 
+func TestRacePointerSliceAppend(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]*int, 10, 20)
+	go func() {
+		_ = append(s, new(int))
+		c <- true
+	}()
+	_ = append(s, new(int))
+	<-c
+}
+
+func TestRacePointerSliceAppendWrite(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]*int, 10)
+	go func() {
+		_ = append(s, new(int))
+		c <- true
+	}()
+	s[0] = new(int)
+	<-c
+}
+
+func TestRacePointerSliceAppendSlice(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]*int, 10)
+	go func() {
+		s2 := make([]*int, 10)
+		_ = append(s, s2...)
+		c <- true
+	}()
+	s[0] = new(int)
+	<-c
+}
+
+func TestRacePointerSliceAppendSlice2(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]*int, 10)
+	s2foobar := make([]*int, 10)
+	go func() {
+		_ = append(s, s2foobar...)
+		c <- true
+	}()
+	println("WRITE:", &s2foobar[5])
+	s2foobar[5] = nil
+	<-c
+}
+
 func TestNoRaceSliceIndexAccess(t *testing.T) {
 	c := make(chan bool, 1)
 	s := make([]int, 10)
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
index 972cbe3f8a..267cd6cec4 100644
--- a/src/runtime/race_amd64.s
+++ b/src/runtime/race_amd64.s
@@ -383,8 +383,9 @@ TEXT	runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
 	MOVQ	g_m(R13), R13
 	MOVQ	m_g0(R13), R14
 	MOVQ	R14, g(R12)	// g = m->g0
-	MOVQ	RARG0, 0(SP)	// func arg
+	PUSHQ	RARG0	// func arg
 	CALL	runtime·racesymbolize(SB)
+	POPQ	R12
 	// All registers are smashed after Go code, reload.
 	get_tls(R12)
 	MOVQ	g(R12), R13
diff --git a/src/runtime/rt0_darwin_arm.s b/src/runtime/rt0_darwin_arm.s
new file mode 100644
index 0000000000..4d31e3a78a
--- /dev/null
+++ b/src/runtime/rt0_darwin_arm.s
@@ -0,0 +1,18 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_arm_darwin(SB),7,$-4
+	// prepare arguments for main (_rt0_go)
+	MOVW	(R13), R0	// argc
+	MOVW	$4(R13), R1		// argv
+	MOVW	$main(SB), R4
+	B		(R4)
+
+TEXT main(SB),NOSPLIT,$-8
+	// save argc and argv onto stack
+	MOVM.DB.W [R0-R1], (R13)
+	MOVW	$runtime·rt0_go(SB), R4
+	B		(R4)
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index 4a74dd372a..4911dc000d 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -1,21 +1,23 @@
 package runtime_test
 
 import (
+	"fmt"
 	"io/ioutil"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"runtime"
 	"testing"
 )
 
 func checkGdbPython(t *testing.T) {
-	cmd := exec.Command("gdb", "-nx", "-q", "--batch", "-ex", "python import sys; print('golang gdb python support')")
+	cmd := exec.Command("gdb", "-nx", "-q", "--batch", "-iex", "python import sys; print('go gdb python support')")
 	out, err := cmd.CombinedOutput()
 
 	if err != nil {
-		t.Skipf("skipping due to issue running gdb%v", err)
+		t.Skipf("skipping due to issue running gdb: %v", err)
 	}
-	if string(out) != "golang gdb python support\n" {
+	if string(out) != "go gdb python support\n" {
 		t.Skipf("skipping due to lack of python gdb support: %s", out)
 	}
 }
@@ -29,7 +31,6 @@ func main() {
 `
 
 func TestGdbLoadRuntimeSupport(t *testing.T) {
-
 	checkGdbPython(t)
 
 	dir, err := ioutil.TempDir("", "go-build")
@@ -51,7 +52,8 @@ func TestGdbLoadRuntimeSupport(t *testing.T) {
 		t.Fatalf("building source %v\n%s", err, out)
 	}
 
-	got, _ := exec.Command("gdb", "-nx", "-q", "--batch", "-ex", "source runtime-gdb.py",
+	got, _ := exec.Command("gdb", "-nx", "-q", "--batch", "-iex",
+		fmt.Sprintf("add-auto-load-safe-path %s/src/runtime", runtime.GOROOT()),
 		filepath.Join(dir, "a.exe")).CombinedOutput()
 	if string(got) != "Loading Go Runtime support.\n" {
 		t.Fatalf("%s", got)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index e38d11a59d..ca3e7d564e 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -199,7 +199,6 @@ type g struct {
 	waitsince    int64  // approx time when the g become blocked
 	waitreason   string // if status==gwaiting
 	schedlink    *g
-	issystem     bool // do not output in stack dump, ignore in deadlock detector
 	preempt      bool // preemption signal, duplicates stackguard0 = stackpreempt
 	paniconfault bool // panic (instead of crash) on unexpected fault address
 	preemptscan  bool // preempted g does scan for gc
@@ -275,6 +274,7 @@ type m struct {
 	waitsemacount uint32
 	waitsemalock  uint32
 	gcstats       gcstats
+	currentwbuf   uintptr // use locks or atomic operations such as xchguinptr to access.
 	needextram    bool
 	traceback     uint8
 	waitunlockf   unsafe.Pointer // todo go func(*g, unsafe.pointer) bool
diff --git a/src/runtime/signal_darwin_arm.go b/src/runtime/signal_darwin_arm.go
new file mode 100644
index 0000000000..1441a655ef
--- /dev/null
+++ b/src/runtime/signal_darwin_arm.go
@@ -0,0 +1,44 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *regs32   { return &(*ucontext)(c.ctxt).uc_mcontext.ss }
+func (c *sigctxt) r0() uint32      { return c.regs().r[0] }
+func (c *sigctxt) r1() uint32      { return c.regs().r[1] }
+func (c *sigctxt) r2() uint32      { return c.regs().r[2] }
+func (c *sigctxt) r3() uint32      { return c.regs().r[3] }
+func (c *sigctxt) r4() uint32      { return c.regs().r[4] }
+func (c *sigctxt) r5() uint32      { return c.regs().r[5] }
+func (c *sigctxt) r6() uint32      { return c.regs().r[6] }
+func (c *sigctxt) r7() uint32      { return c.regs().r[7] }
+func (c *sigctxt) r8() uint32      { return c.regs().r[8] }
+func (c *sigctxt) r9() uint32      { return c.regs().r[9] }
+func (c *sigctxt) r10() uint32     { return c.regs().r[10] }
+func (c *sigctxt) fp() uint32      { return c.regs().r[11] }
+func (c *sigctxt) ip() uint32      { return c.regs().r[12] }
+func (c *sigctxt) sp() uint32      { return c.regs().sp }
+func (c *sigctxt) lr() uint32      { return c.regs().lr }
+func (c *sigctxt) pc() uint32      { return c.regs().pc }
+func (c *sigctxt) cpsr() uint32    { return c.regs().cpsr }
+func (c *sigctxt) fault() uint32   { return uint32(uintptr(unsafe.Pointer(c.info.si_addr))) }
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) trap() uint32    { return 0 }
+func (c *sigctxt) error() uint32   { return 0 }
+func (c *sigctxt) oldmask() uint32 { return 0 }
+
+func (c *sigctxt) set_pc(x uint32)  { c.regs().pc = x }
+func (c *sigctxt) set_sp(x uint32)  { c.regs().sp = x }
+func (c *sigctxt) set_lr(x uint32)  { c.regs().lr = x }
+func (c *sigctxt) set_r10(x uint32) { c.regs().r[10] = x }
+
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) { c.info.si_addr = (*byte)(unsafe.Pointer(uintptr(x))) }
diff --git a/src/runtime/signals_windows.h b/src/runtime/signals_windows.h
deleted file mode 100644
index 6943714b0f..0000000000
--- a/src/runtime/signals_windows.h
+++ /dev/null
@@ -1,3 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index fbe3425fa6..5cfc926f67 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -24,6 +24,8 @@
 // unnecessary rechecks of sig.mask, but it cannot lead to missed signals
 // nor deadlocks.
 
+// +build !plan9
+
 package runtime
 
 import "unsafe"
diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go
new file mode 100644
index 0000000000..b029a300a8
--- /dev/null
+++ b/src/runtime/sigqueue_plan9.go
@@ -0,0 +1,115 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements runtime support for signal handling.
+
+package runtime
+
+const qsize = 64
+
+var sig struct {
+	q     noteQueue
+	inuse bool
+
+	lock     mutex
+	note     note
+	sleeping bool
+}
+
+type noteQueue struct {
+	lock mutex
+	data [qsize]*byte
+	ri   int
+	wi   int
+	full bool
+}
+
+func (q *noteQueue) push(item *byte) bool {
+	lock(&q.lock)
+	if q.full {
+		unlock(&q.lock)
+		return false
+	}
+	q.data[q.wi] = item
+	q.wi++
+	if q.wi == qsize {
+		q.wi = 0
+	}
+	if q.wi == q.ri {
+		q.full = true
+	}
+	unlock(&q.lock)
+	return true
+}
+
+func (q *noteQueue) pop() *byte {
+	lock(&q.lock)
+	q.full = false
+	if q.ri == q.wi {
+		unlock(&q.lock)
+		return nil
+	}
+	item := q.data[q.ri]
+	q.ri++
+	if q.ri == qsize {
+		q.ri = 0
+	}
+	unlock(&q.lock)
+	return item
+}
+
+// Called from sighandler to send a signal back out of the signal handling thread.
+// Reports whether the signal was sent. If not, the caller typically crashes the program.
+func sendNote(s *byte) bool {
+	if !sig.inuse {
+		return false
+	}
+
+	// Add signal to outgoing queue.
+	if !sig.q.push(s) {
+		return false
+	}
+
+	lock(&sig.lock)
+	if sig.sleeping {
+		sig.sleeping = false
+		notewakeup(&sig.note)
+	}
+	unlock(&sig.lock)
+
+	return true
+}
+
+// Called to receive the next queued signal.
+// Must only be called from a single goroutine at a time.
+func signal_recv() string {
+	for {
+		note := sig.q.pop()
+		if note != nil {
+			return gostring(note)
+		}
+
+		lock(&sig.lock)
+		sig.sleeping = true
+		noteclear(&sig.note)
+		unlock(&sig.lock)
+		notetsleepg(&sig.note, -1)
+	}
+}
+
+// Must only be called from a single goroutine at a time.
+func signal_enable(s uint32) {
+	if !sig.inuse {
+		// The first call to signal_enable is for us
+		// to use for initialization.  It does not pass
+		// signal information in m.
+		sig.inuse = true // enable reception of signals; cannot disable
+		noteclear(&sig.note)
+		return
+	}
+}
+
+// Must only be called from a single goroutine at a time.
+func signal_disable(s uint32) {
+}
diff --git a/src/runtime/stack.h b/src/runtime/stack.h
index b790e70103..88c7e02f40 100644
--- a/src/runtime/stack.h
+++ b/src/runtime/stack.h
@@ -7,14 +7,22 @@
 
 enum {
 #ifdef GOOS_windows
-#define StackSystem (512*sizeof(uintptr))
-#else
+#define STACKSYSTEM (512 * sizeof(uintptr))
+#endif // GOOS_windows
 #ifdef GOOS_plan9
-#define StackSystem (512)
-#else
-	StackSystem = 0,
-#endif	// Plan 9
-#endif	// Windows
+#define STACKSYSTEM	512
+#endif // GOOS_plan9
+#ifdef GOOS_darwin
+#ifdef GOARCH_arm
+#define STACKSYSTEM 1024
+#endif // GOARCH_arm
+#endif // GOOS_darwin
+
+#ifndef STACKSYSTEM
+#define STACKSYSTEM 0
+#endif
+
+	StackSystem = STACKSYSTEM,
 
 	StackBig = 4096,
 	StackGuard = 640 + StackSystem,
diff --git a/src/runtime/stack2.go b/src/runtime/stack2.go
index 8a78b1ad96..07a7d38f0c 100644
--- a/src/runtime/stack2.go
+++ b/src/runtime/stack2.go
@@ -57,9 +57,9 @@ functions to make sure that this limit cannot be violated.
 const (
 	// StackSystem is a number of additional bytes to add
 	// to each stack below the usual guard area for OS-specific
-	// purposes like signal handling. Used on Windows and on
-	// Plan 9 because they do not use a separate stack.
-	_StackSystem = goos_windows*512*ptrSize + goos_plan9*512
+	// purposes like signal handling. Used on Windows, Plan 9,
+	// and Darwin/ARM because they do not use a separate stack.
+	_StackSystem = goos_windows*512*ptrSize + goos_plan9*512 + goos_darwin*goarch_arm*1024
 
 	// The minimum size of stack used by Go code
 	_StackMin = 2048
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 46c3502f77..0ba309cf02 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -129,8 +129,13 @@ func slicebytetostringtmp(b []byte) string {
 	return *(*string)(unsafe.Pointer(&b))
 }
 
-func stringtoslicebyte(s string) []byte {
-	b := rawbyteslice(len(s))
+func stringtoslicebyte(buf *tmpBuf, s string) []byte {
+	var b []byte
+	if buf != nil && len(s) <= len(buf) {
+		b = buf[:len(s)]
+	} else {
+		b = rawbyteslice(len(s))
+	}
 	copy(b, s)
 	return b
 }
@@ -147,7 +152,7 @@ func stringtoslicebytetmp(s string) []byte {
 	return *(*[]byte)(unsafe.Pointer(&ret))
 }
 
-func stringtoslicerune(s string) []rune {
+func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
 	// two passes.
 	// unlike slicerunetostring, no race because strings are immutable.
 	n := 0
@@ -157,7 +162,12 @@ func stringtoslicerune(s string) []rune {
 		s = s[k:]
 		n++
 	}
-	a := rawruneslice(n)
+	var a []rune
+	if buf != nil && n <= len(buf) {
+		a = buf[:n]
+	} else {
+		a = rawruneslice(n)
+	}
 	n = 0
 	for len(t) > 0 {
 		r, k := charntorune(t)
@@ -168,7 +178,7 @@ func stringtoslicerune(s string) []rune {
 	return a
 }
 
-func slicerunetostring(a []rune) string {
+func slicerunetostring(buf *tmpBuf, a []rune) string {
 	if raceenabled && len(a) > 0 {
 		racereadrangepc(unsafe.Pointer(&a[0]),
 			uintptr(len(a))*unsafe.Sizeof(a[0]),
@@ -180,7 +190,7 @@ func slicerunetostring(a []rune) string {
 	for _, r := range a {
 		size1 += runetochar(dum[:], r)
 	}
-	s, b := rawstring(size1 + 3)
+	s, b := rawstringtmp(buf, size1+3)
 	size2 := 0
 	for _, r := range a {
 		// check for race
@@ -309,11 +319,6 @@ func gobytes(p *byte, n int) []byte {
 	return x
 }
 
-func gostringsize(n int) string {
-	s, _ := rawstring(n)
-	return s
-}
-
 func gostring(p *byte) string {
 	l := findnull(p)
 	if l == 0 {
diff --git a/src/runtime/stubs3.go b/src/runtime/stubs3.go
deleted file mode 100644
index ffaa28775d..0000000000
--- a/src/runtime/stubs3.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build plan9
-
-package runtime
-
-func close(fd int32) int32
-
-//go:noescape
-func open(name *byte, mode, perm int32) int32
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index db20ab11e1..3e46d428f7 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -44,8 +44,8 @@ type functab struct {
 	funcoff uintptr
 }
 
-const minfunc = 16 // minimum function size
-const pcbucketsize = 256*minfunc // size of bucket in the pc->func lookup table
+const minfunc = 16                 // minimum function size
+const pcbucketsize = 256 * minfunc // size of bucket in the pc->func lookup table
 
 // findfunctab is an array of these structures.
 // Each bucket represents 4096 bytes of the text segment.
@@ -56,7 +56,7 @@ const pcbucketsize = 256*minfunc // size of bucket in the pc->func lookup table
 // index to find the target function.
 // This table uses 20 bytes for every 4096 bytes of code, or ~0.5% overhead.
 type findfuncbucket struct {
-	idx uint32
+	idx        uint32
 	subbuckets [16]byte
 }
 
@@ -154,9 +154,9 @@ func findfunc(pc uintptr) *_func {
 
 	x := pc - minpc
 	b := x / pcbucketsize
-	i := x % pcbucketsize / (pcbucketsize/nsub)
+	i := x % pcbucketsize / (pcbucketsize / nsub)
 
-	ffb := (*findfuncbucket)(add(unsafe.Pointer(&findfunctab), b * unsafe.Sizeof(findfuncbucket{})))
+	ffb := (*findfuncbucket)(add(unsafe.Pointer(&findfunctab), b*unsafe.Sizeof(findfuncbucket{})))
 	idx := ffb.idx + uint32(ffb.subbuckets[i])
 	if pc < ftab[idx].entry {
 		throw("findfunc: bad findfunctab entry")
diff --git a/src/runtime/sys_darwin_arm.s b/src/runtime/sys_darwin_arm.s
new file mode 100644
index 0000000000..e1b2b664b6
--- /dev/null
+++ b/src/runtime/sys_darwin_arm.s
@@ -0,0 +1,481 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// System calls and other sys.stuff for ARM, Darwin
+// See http://fxr.watson.org/fxr/source/bsd/kern/syscalls.c?v=xnu-1228
+// or /usr/include/sys/syscall.h (on a Mac) for system call numbers.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+// Copied from /usr/include/sys/syscall.h
+#define	SYS_exit           1
+#define	SYS_read           3
+#define	SYS_write          4
+#define	SYS_open           5
+#define	SYS_close          6
+#define	SYS_mmap           197
+#define	SYS_munmap         73
+#define	SYS_madvise        75
+#define	SYS_mincore        78
+#define	SYS_gettimeofday   116
+#define	SYS_kill           37
+#define	SYS_getpid         20
+#define	SYS___pthread_kill 328
+#define	SYS_setitimer      83
+#define	SYS___sysctl       202
+#define	SYS_sigprocmask    48
+#define	SYS_sigaction      46
+#define	SYS_sigreturn      184
+#define	SYS_select         93
+#define	SYS_bsdthread_register 366
+#define	SYS_bsdthread_create 360
+#define	SYS_bsdthread_terminate 361
+#define	SYS_kqueue         362
+#define	SYS_kevent         363
+#define	SYS_fcntl          92
+
+TEXT notok<>(SB),NOSPLIT,$0
+	MOVW	$0, R8
+	MOVW	R8, (R8)
+	B		0(PC)
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVW	name+0(FP), R0
+	MOVW	mode+4(FP), R1
+	MOVW	perm+8(FP), R2
+	MOVW	$SYS_open, R12
+	SWI	$0x80
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	MOVW	$SYS_close, R12
+	SWI	$0x80
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
+	MOVW	$SYS_write, R12
+	SWI	$0x80
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
+	MOVW	$SYS_read, R12
+	SWI	$0x80
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVW	0(FP), R0
+	MOVW	$SYS_exit, R12
+	SWI	$0x80
+	MOVW	$1234, R0
+	MOVW	$1002, R1
+	MOVW	R0, (R1)	// fail hard
+
+// Exit this OS thread (like pthread_exit, which eventually
+// calls __bsdthread_terminate).
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVW	$SYS_bsdthread_terminate, R12
+	SWI	$0x80
+	MOVW	$1234, R0
+	MOVW	$1003, R1
+	MOVW	R0, (R1)	// fail hard
+
+TEXT runtime·raise(SB),NOSPLIT,$24
+	MOVW	$SYS_getpid, R12
+	SWI	$0x80
+	// arg 1 pid already in R0 from getpid
+	MOVW	sig+0(FP), R1	// arg 2 - signal
+	MOVW	$1, R2	// arg 3 - posix
+	MOVW	$SYS_kill, R12
+	SWI $0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVW	addr+0(FP), R0
+	MOVW	n+4(FP), R1
+	MOVW	prot+8(FP), R2
+	MOVW	flags+12(FP), R3
+	MOVW	fd+16(FP), R4
+	MOVW	off+20(FP), R5
+	MOVW	$0, R6 // off_t is uint64_t
+	MOVW	$SYS_mmap, R12
+	SWI	$0x80
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	$SYS_munmap, R12
+	SWI	$0x80
+	BL.CS	notok<>(SB)
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_madvise, R12
+	SWI	$0x80
+	BL.CS	notok<>(SB)
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_setitimer, R12
+	SWI	$0x80
+	RET
+
+TEXT runtime·mincore(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_mincore, R12
+	SWI	$0x80
+	RET
+
+TEXT time·now(SB), 7, $32
+	MOVW	$8(R13), R0  // timeval
+	MOVW	$0, R1  // zone
+	MOVW	$SYS_gettimeofday, R12
+	SWI	$0x80 // Note: R0 is tv_sec, R1 is tv_usec
+
+	MOVW    R1, R2  // usec
+
+	MOVW	R0, 0(FP)
+	MOVW	$0, R1
+	MOVW	R1, 4(FP)
+	MOVW	$1000, R3
+	MUL	R3, R2
+	MOVW	R2, 8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$32
+	MOVW	$8(R13), R0  // timeval
+	MOVW	$0, R1  // zone
+	MOVW	$SYS_gettimeofday, R12
+	SWI	$0x80 // Note: R0 is tv_sec, R1 is tv_usec
+
+	MOVW    R1, R2
+	MOVW	$1000000000, R3
+	MULLU	R0, R3, (R1, R0)
+	MOVW	$1000, R3
+	MOVW	$0, R4
+	MUL	R3, R2
+	ADD.S	R2, R0
+	ADC	R4, R1
+
+	MOVW	R0, 0(FP)
+	MOVW	R1, 4(FP)
+	RET
+
+// Sigtramp's job is to call the actual signal handler.
+// It is called with the following arguments on the stack:
+//	 LR  	"return address" - ignored
+//	 R0  	actual handler
+//	 R1  	siginfo style - ignored
+//	 R2   	signal number
+//	 R3   	siginfo
+//	 -4(FP)	context, beware that 0(FP) is the saved LR
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+	// this might be called in external code context,
+	// where g is not set.
+	// first save R0, because runtime·load_g will clobber it
+	MOVM.DB.W [R0], (R13)
+	MOVB	runtime·iscgo(SB), R0
+	CMP 	$0, R0
+	BL.NE	runtime·load_g(SB)
+
+	CMP 	$0, g
+	BNE 	cont
+	// fake function call stack frame for badsignal
+	// we only need to pass R2 (signal number), but
+	// badsignal will expect R2 at 4(R13), so we also
+	// push R1 onto stack. turns out we do need R1
+	// to do sigreturn.
+	MOVM.DB.W [R1,R2], (R13)
+	MOVW  	$runtime·badsignal(SB), R11
+	BL	(R11)
+	MOVM.IA.W [R1], (R13) // saved infostype
+	ADD		$(4+4), R13 // +4: also need to remove the pushed R0.
+	MOVW    -4(FP), R0 // load ucontext
+	B	ret
+
+cont:
+	// Restore R0
+	MOVM.IA.W (R13), [R0]
+
+	// NOTE: some Darwin/ARM kernels always use the main stack to run the
+	// signal handler. We need to switch to gsignal ourselves.
+	MOVW	g_m(g), R11
+	MOVW	m_gsignal(R11), R5
+	MOVW	(g_stack+stack_hi)(R5), R6
+	SUB		$28, R6
+
+	// copy arguments for call to sighandler
+	MOVW	R2, 4(R6) // signal num
+	MOVW	R3, 8(R6) // signal info
+	MOVW	g, 16(R6) // old_g
+	MOVW    -4(FP), R4
+	MOVW	R4, 12(R6) // context
+
+	// Backup ucontext and infostyle
+	MOVW    R4, 20(R6)
+	MOVW    R1, 24(R6)
+
+	// switch stack and g
+	MOVW	R6, R13 // sigtramp can not re-entrant, so no need to back up R13.
+	MOVW	R5, g
+
+	BL	(R0)
+
+	// call sigreturn
+	MOVW	20(R13), R0	// saved ucontext
+	MOVW	24(R13), R1	// saved infostyle
+ret:
+	MOVW	$SYS_sigreturn, R12 // sigreturn(ucontext, infostyle)
+	SWI	$0x80
+
+	// if sigreturn fails, we can do nothing but exit
+	B	runtime·exit(SB)
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_sigprocmask, R12
+	SWI	$0x80
+	BL.CS	notok<>(SB)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_sigaction, R12
+	SWI	$0x80
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$12
+	MOVW	usec+0(FP), R0
+	MOVW	R0, R1
+	MOVW	$1000000, R2
+	DIV     R2, R0
+	MOD     R2, R1
+	MOVW	R0, -12(SP)
+	MOVW	R1, -8(SP)
+
+	// select(0, 0, 0, 0, &tv)
+	MOVW	$0, R0
+	MOVW	$0, R1
+	MOVW	$0, R2
+	MOVW	$0, R3
+	MOVW	$-12(SP), R4
+	MOVW	$SYS_select, R12
+	SWI	$0x80
+	RET
+
+TEXT runtime·cas(SB),NOSPLIT,$0
+	B	runtime·armcas(SB)
+
+TEXT runtime·casp1(SB),NOSPLIT,$0
+	B	runtime·cas(SB)
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	12(FP), R3
+	MOVW	16(FP), R4
+	MOVW	20(FP), R5
+	MOVW	$SYS___sysctl, R12 // syscall entry
+	SWI	$0x80
+	BCC     sysctl_ret
+	RSB     $0, R0, R0
+	MOVW	R0, ret+24(FP)
+	RET
+sysctl_ret:
+	MOVW	$0, R0
+	MOVW	R0, ret+24(FP)
+	RET
+
+// Thread related functions
+// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
+TEXT runtime·bsdthread_create(SB),NOSPLIT,$0
+	// Set up arguments to bsdthread_create system call.
+	// The ones in quotes pass through to the thread callback
+	// uninterpreted, so we can put whatever we want there.
+	MOVW    fn+12(FP), R0   // "func"
+	MOVW    mm+4(FP), R1 // "arg"
+	MOVW    stk+0(FP), R2 // stack
+	MOVW    gg+8(FP), R3 // "pthread"
+	MOVW	$0x01000000, R4	// flags = PTHREAD_START_CUSTOM
+	MOVW	$0, R5	// paranoia
+	MOVW	$SYS_bsdthread_create, R12
+	SWI	$0x80
+	BCC		create_ret
+	RSB 	$0, R0, R0
+	MOVW	R0, ret+16(FP)
+	RET
+create_ret:
+	MOVW	$0, R0
+	MOVW	R0, ret+16(FP)
+	RET
+
+// The thread that bsdthread_create creates starts executing here,
+// because we registered this function using bsdthread_register
+// at startup.
+//	R0 = "pthread"
+//	R1 = mach thread port
+//	R2 = "func" (= fn)
+//	R3 = "arg" (= m)
+//	R4 = stack
+//	R5 = flags (= 0)
+// XXX: how to deal with R4/SP? ref: Libc-594.9.1/arm/pthreads/thread_start.s
+TEXT runtime·bsdthread_start(SB),NOSPLIT,$0
+	MOVW    R1, m_procid(R3) // thread port is m->procid
+	MOVW	m_g0(R3), g
+	MOVW	R3, g_m(g)
+	// ARM don't have runtime·stackcheck(SB)
+	// disable runfast mode of vfp
+	EOR     R12, R12
+	WORD    $0xeee1ca10 // fmxr	fpscr, ip
+	BL      (R2) // fn
+	BL      runtime·exit1(SB)
+	RET
+
+// int32 bsdthread_register(void)
+// registers callbacks for threadstart (see bsdthread_create above
+// and wqthread and pthsize (not used).  returns 0 on success.
+TEXT runtime·bsdthread_register(SB),NOSPLIT,$0
+	MOVW	$runtime·bsdthread_start(SB), R0	// threadstart
+	MOVW	$0, R1	// wqthread, not used by us
+	MOVW	$0, R2	// pthsize, not used by us
+	MOVW	$0, R3 	// dummy_value [sic]
+	MOVW	$0, R4	// targetconc_ptr
+	MOVW	$0, R5	// dispatchqueue_offset
+	MOVW	$SYS_bsdthread_register, R12	// bsdthread_register
+	SWI	$0x80
+	MOVW	R0, ret+0(FP)
+	RET
+
+// uint32 mach_msg_trap(void*, uint32, uint32, uint32, uint32, uint32, uint32)
+TEXT runtime·mach_msg_trap(SB),NOSPLIT,$0
+	MOVW    0(FP), R0
+	MOVW    4(FP), R1
+	MOVW    8(FP), R2
+	MOVW    12(FP), R3
+	MOVW    16(FP), R4
+	MOVW    20(FP), R5
+	MOVW    24(FP), R6
+	MVN     $30, R12
+	SWI	$0x80
+	MOVW	R0, 28(FP)
+	RET
+
+TEXT runtime·mach_task_self(SB),NOSPLIT,$0
+	MVN     $27, R12 // task_self_trap
+	SWI	$0x80
+	MOVW	R0, 0(FP)
+	RET
+
+TEXT runtime·mach_thread_self(SB),NOSPLIT,$0
+	MVN 	$26, R12 // thread_self_trap
+	SWI	$0x80
+	MOVW	R0, 0(FP)
+	RET
+
+TEXT runtime·mach_reply_port(SB),NOSPLIT,$0
+	MVN 	$25, R12	// mach_reply_port
+	SWI	$0x80
+	MOVW	R0, 0(FP)
+	RET
+
+// Mach provides trap versions of the semaphore ops,
+// instead of requiring the use of RPC.
+
+// uint32 mach_semaphore_wait(uint32)
+TEXT runtime·mach_semaphore_wait(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MVN 	$35, R12	// semaphore_wait_trap
+	SWI	$0x80
+	MOVW	R0, ret+4(FP)
+	RET
+
+// uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
+TEXT runtime·mach_semaphore_timedwait(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MVN 	$37, R12	// semaphore_timedwait_trap
+	SWI	$0x80
+	MOVW	R0, ret+12(FP)
+	RET
+
+// uint32 mach_semaphore_signal(uint32)
+TEXT runtime·mach_semaphore_signal(SB),NOSPLIT,$0
+	MOVW    0(FP), R0
+	MVN 	$32, R12	// semaphore_signal_trap
+	SWI	$0x80
+	MOVW	R0, ret+4(FP)
+	RET
+
+// uint32 mach_semaphore_signal_all(uint32)
+TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MVN 	$33, R12	// semaphore_signal_all_trap
+	SWI	$0x80
+	MOVW	R0, ret+4(FP)
+	RET
+
+// int32 runtime·kqueue(void)
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVW	$SYS_kqueue, R12
+	SWI	$0x80
+	RSB.CS	$0, R0, R0
+	MOVW	R0, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int events, Timespec *timeout)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVW	$SYS_kevent, R12
+	MOVW	kq+0(FP), R0
+	MOVW	changelist+4(FP), R1
+	MOVW	nchanges+8(FP), R2
+	MOVW	eventlist+12(FP), R3
+	MOVW	nevents+16(FP), R4
+	MOVW	timeout+20(FP), R5
+	SWI	$0x80
+	RSB.CS	$0, R0, R0
+	MOVW	R0, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVW	$SYS_fcntl, R12
+	MOVW	0(FP), R0
+	MOVW	$2, R1	// F_SETFD
+	MOVW	$1, R2	// FD_CLOEXEC
+	SWI	$0x80
+	RET
+
+// sigaltstack on some darwin/arm version is buggy and will always
+// run the signal handler on the main stack, so our sigtramp has
+// to do the stack switch ourselves.
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	RET
diff --git a/src/runtime/time.go b/src/runtime/time.go
index 50895ca8ec..6a2cc2136c 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -153,7 +153,6 @@ func deltimer(t *timer) bool {
 // If addtimer inserts a new earlier event, addtimer1 wakes timerproc early.
 func timerproc() {
 	timers.gp = getg()
-	timers.gp.issystem = true
 	for {
 		lock(&timers.lock)
 		timers.sleeping = false
diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s
index 7c5c0e215e..d130d42cf2 100644
--- a/src/runtime/tls_arm.s
+++ b/src/runtime/tls_arm.s
@@ -15,8 +15,14 @@
 // Note: both functions will clobber R0 and R11 and
 // can be called from 5c ABI code.
 
-// On android, runtime.tlsg is a normal variable.
+// On android and darwin, runtime.tlsg is a normal variable.
 // TLS offset is computed in x_cgo_inittls.
+#ifdef GOOS_android
+#define TLSG_IS_VARIABLE
+#endif
+#ifdef GOOS_darwin
+#define TLSG_IS_VARIABLE
+#endif
 
 // save_g saves the g register into pthread-provided
 // thread-local memory, so that we can call externally compiled
@@ -34,10 +40,11 @@ TEXT runtime·save_g(SB),NOSPLIT,$-4
 	// a call to runtime.read_tls_fallback which jumps to __kuser_get_tls.
 	// The replacement function saves LR in R11 over the call to read_tls_fallback.
 	MRC	15, 0, R0, C13, C0, 3 // fetch TLS base pointer
+	BIC $3, R0 // Darwin/ARM might return unaligned pointer
 	// $runtime.tlsg(SB) is a special linker symbol.
 	// It is the offset from the TLS base pointer to our
 	// thread-local storage for g.
-#ifdef GOOS_android
+#ifdef TLSG_IS_VARIABLE
 	MOVW	runtime·tlsg(SB), R11
 #else
 	MOVW	$runtime·tlsg(SB), R11
@@ -57,10 +64,11 @@ TEXT runtime·load_g(SB),NOSPLIT,$0
 #endif
 	// See save_g
 	MRC	15, 0, R0, C13, C0, 3 // fetch TLS base pointer
+	BIC $3, R0 // Darwin/ARM might return unaligned pointer
 	// $runtime.tlsg(SB) is a special linker symbol.
 	// It is the offset from the TLS base pointer to our
 	// thread-local storage for g.
-#ifdef GOOS_android
+#ifdef TLSG_IS_VARIABLE
 	MOVW	runtime·tlsg(SB), R11
 #else
 	MOVW	$runtime·tlsg(SB), R11
@@ -68,3 +76,28 @@ TEXT runtime·load_g(SB),NOSPLIT,$0
 	ADD	R11, R0
 	MOVW	0(R0), g
 	RET
+
+TEXT runtime·_initcgo(SB),NOSPLIT,$0
+#ifndef GOOS_nacl
+	// if there is an _cgo_init, call it.
+	MOVW	_cgo_init(SB), R4
+	CMP	$0, R4
+	B.EQ	nocgo
+	MRC     15, 0, R0, C13, C0, 3 	// load TLS base pointer
+	MOVW 	R0, R3 			// arg 3: TLS base pointer
+	MOVW 	$runtime·tlsg(SB), R2 	// arg 2: tlsg
+	MOVW	$setg_gcc<>(SB), R1 	// arg 1: setg
+	MOVW	g, R0 			// arg 0: G
+	BL	(R4) // will clobber R0-R3
+#endif
+nocgo:
+	RET
+
+// void setg_gcc(G*); set g called from gcc.
+TEXT setg_gcc<>(SB),NOSPLIT,$0
+	MOVW	R0, g
+	B		runtime·save_g(SB)
+
+#ifdef TLSG_IS_VARIABLE
+GLOBL runtime·tlsg+0(SB), NOPTR, $4
+#endif
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 6c87d7e2e4..8c31c5abad 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -39,6 +39,11 @@ var (
 	mstartPC             uintptr
 	rt0_goPC             uintptr
 	sigpanicPC           uintptr
+	runfinqPC            uintptr
+	backgroundgcPC       uintptr
+	bgsweepPC            uintptr
+	forcegchelperPC      uintptr
+	timerprocPC          uintptr
 	systemstack_switchPC uintptr
 
 	externalthreadhandlerp uintptr // initialized elsewhere
@@ -56,6 +61,11 @@ func tracebackinit() {
 	mstartPC = funcPC(mstart)
 	rt0_goPC = funcPC(rt0_go)
 	sigpanicPC = funcPC(sigpanic)
+	runfinqPC = funcPC(runfinq)
+	backgroundgcPC = funcPC(backgroundgc)
+	bgsweepPC = funcPC(bgsweep)
+	forcegchelperPC = funcPC(forcegchelper)
+	timerprocPC = funcPC(timerproc)
 	systemstack_switchPC = funcPC(systemstack_switch)
 }
 
@@ -606,7 +616,7 @@ func tracebackothers(me *g) {
 
 	lock(&allglock)
 	for _, gp := range allgs {
-		if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || gp.issystem && level < 2 {
+		if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp) && level < 2 {
 			continue
 		}
 		print("\n")
@@ -631,3 +641,14 @@ func topofstack(f *_func) bool {
 		pc == rt0_goPC ||
 		externalthreadhandlerp != 0 && pc == externalthreadhandlerp
 }
+
+// isSystemGoroutine returns true if the goroutine g must be omitted in
+// stack dumps and deadlock detector.
+func isSystemGoroutine(gp *g) bool {
+	pc := gp.startpc
+	return pc == runfinqPC && !fingRunning ||
+		pc == backgroundgcPC ||
+		pc == bgsweepPC ||
+		pc == forcegchelperPC ||
+		pc == timerprocPC
+}
author	Russ Cox <rsc@golang.org>	2015-02-13 12:50:23 -0500
committer	Russ Cox <rsc@golang.org>	2015-02-13 12:51:56 -0500
commit	87de9ce212988c8bdf0630750e772d8805091bcc (patch)
tree	0bb1cc671417e9b851d35a4bbcd4d756e5aee4e9 /src/runtime
parent	01925bd3f306c899cddfa59aa2ad41c9b77fcd74 (diff)
parent	5f1efe738be296cdbc586348af92eab621d068f5 (diff)
download	go-87de9ce212988c8bdf0630750e772d8805091bcc.tar.xz