From 8556c76f88a6e80aafb535802be71cc79bd22c75 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Tue, 5 Apr 2016 18:22:23 +0000
Subject: runtime: minor Windows cleanup

Change-Id: I9a8081ef1109469e9577c642156aa635188d8954
Reviewed-on: https://go-review.googlesource.com/21538
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
---
 src/runtime/os1_windows.go | 22 +++++++++-------------
 src/runtime/os_windows.go  |  2 +-
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index 315dd9816a..a36def0ffe 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -110,28 +110,24 @@ func asmstdcall(fn unsafe.Pointer)
 
 var asmstdcallAddr unsafe.Pointer
 
-func windowsFindfunc(name []byte, lib uintptr) stdFunction {
+func windowsFindfunc(lib uintptr, name []byte) stdFunction {
+	if name[len(name)-1] != 0 {
+		throw("usage")
+	}
 	f := stdcall2(_GetProcAddress, lib, uintptr(unsafe.Pointer(&name[0])))
 	return stdFunction(unsafe.Pointer(f))
 }
 
 func loadOptionalSyscalls() {
-	var (
-		kernel32dll                 = []byte("kernel32.dll\000")
-		addVectoredContinueHandler  = []byte("AddVectoredContinueHandler\000")
-		getQueuedCompletionStatusEx = []byte("GetQueuedCompletionStatusEx\000")
-		addDllDirectory             = []byte("AddDllDirectory\000")
-		loadLibraryExW              = []byte("LoadLibraryExW\000")
-	)
-
+	var kernel32dll = []byte("kernel32.dll\000")
 	k32 := stdcall1(_LoadLibraryA, uintptr(unsafe.Pointer(&kernel32dll[0])))
 	if k32 == 0 {
 		throw("kernel32.dll not found")
 	}
-	_AddDllDirectory = windowsFindfunc(addDllDirectory, k32)
-	_AddVectoredContinueHandler = windowsFindfunc(addVectoredContinueHandler, k32)
-	_GetQueuedCompletionStatusEx = windowsFindfunc(getQueuedCompletionStatusEx, k32)
-	_LoadLibraryExW = windowsFindfunc(loadLibraryExW, k32)
+	_AddDllDirectory = windowsFindfunc(k32, []byte("AddDllDirectory\000"))
+	_AddVectoredContinueHandler = windowsFindfunc(k32, []byte("AddVectoredContinueHandler\000"))
+	_GetQueuedCompletionStatusEx = windowsFindfunc(k32, []byte("GetQueuedCompletionStatusEx\000"))
+	_LoadLibraryExW = windowsFindfunc(k32, []byte("LoadLibraryExW\000"))
 }
 
 //go:nosplit
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 8bdf5a271f..24b3b8cf29 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -10,7 +10,7 @@ type mOS struct {
 	waitsema uintptr // semaphore for parking on locks
 }
 
-type stdFunction *byte
+type stdFunction unsafe.Pointer
 
 //go:linkname os_sigpipe os.sigpipe
 func os_sigpipe() {
-- 
cgit v1.3


From 5103fbfdb29278533c666163a9d56f85408224d9 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 02:51:55 +0000
Subject: runtime: merge os_linux.go into os1_linux.go

Change-Id: I791c47014fe69e8529c7b2f0b9a554e47902d46c
Reviewed-on: https://go-review.googlesource.com/21566
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/os1_linux.go | 29 +++++++++++++++++++++++++++++
 src/runtime/os_linux.go  | 36 ------------------------------------
 2 files changed, 29 insertions(+), 36 deletions(-)
 delete mode 100644 src/runtime/os_linux.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go
index 726dd649fe..7d8cc7e5c4 100644
--- a/src/runtime/os1_linux.go
+++ b/src/runtime/os1_linux.go
@@ -9,6 +9,11 @@ import (
 	"unsafe"
 )
 
+type mOS struct{}
+
+//go:noescape
+func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+
 // Linux futex.
 //
 //	futexsleep(uint32 *addr, uint32 val)
@@ -127,6 +132,9 @@ const (
 		_CLONE_THREAD /* revisit - okay for now */
 )
 
+//go:noescape
+func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+
 // May run with m.p==nil, so write barriers are not allowed.
 //go:nowritebarrier
 func newosproc(mp *m, stk unsafe.Pointer) {
@@ -307,6 +315,27 @@ func sigreturn()
 func sigtramp()
 func cgoSigtramp()
 
+//go:noescape
+func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
+
+//go:noescape
+func sigaltstack(new, old *sigaltstackt)
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+//go:noescape
+func rtsigprocmask(sig uint32, new, old *sigset, size int32)
+
+//go:noescape
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func raiseproc(sig int32)
+
+//go:noescape
+func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+func osyield()
+
 //go:nosplit
 //go:nowritebarrierrec
 func setsig(i int32, fn uintptr, restart bool) {
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
deleted file mode 100644
index dd69743e10..0000000000
--- a/src/runtime/os_linux.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type mOS struct{}
-
-//go:noescape
-func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
-
-//go:noescape
-func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
-
-//go:noescape
-func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
-
-//go:noescape
-func sigaltstack(new, old *sigaltstackt)
-
-//go:noescape
-func setitimer(mode int32, new, old *itimerval)
-
-//go:noescape
-func rtsigprocmask(sig uint32, new, old *sigset, size int32)
-
-//go:noescape
-func getrlimit(kind int32, limit unsafe.Pointer) int32
-func raise(sig int32)
-func raiseproc(sig int32)
-
-//go:noescape
-func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
-func osyield()
-- 
cgit v1.3


From 34c58065e54e0ac2d610b4a550bdba8f1db90ec6 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 02:52:17 +0000
Subject: runtime: rename os1_linux.go to os_linux.go

Change-Id: I938f61763c3256a876d62aeb54ef8c25cc4fc90e
Reviewed-on: https://go-review.googlesource.com/21567
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/os1_linux.go | 422 -----------------------------------------------
 src/runtime/os_linux.go  | 422 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 422 insertions(+), 422 deletions(-)
 delete mode 100644 src/runtime/os1_linux.go
 create mode 100644 src/runtime/os_linux.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go
deleted file mode 100644
index 7d8cc7e5c4..0000000000
--- a/src/runtime/os1_linux.go
+++ /dev/null
@@ -1,422 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
-
-type mOS struct{}
-
-//go:noescape
-func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
-
-// Linux futex.
-//
-//	futexsleep(uint32 *addr, uint32 val)
-//	futexwakeup(uint32 *addr)
-//
-// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
-// Futexwakeup wakes up threads sleeping on addr.
-// Futexsleep is allowed to wake up spuriously.
-
-const (
-	_FUTEX_WAIT = 0
-	_FUTEX_WAKE = 1
-)
-
-// Atomically,
-//	if(*addr == val) sleep
-// Might be woken up spuriously; that's allowed.
-// Don't sleep longer than ns; ns < 0 means forever.
-//go:nosplit
-func futexsleep(addr *uint32, val uint32, ns int64) {
-	var ts timespec
-
-	// Some Linux kernels have a bug where futex of
-	// FUTEX_WAIT returns an internal error code
-	// as an errno. Libpthread ignores the return value
-	// here, and so can we: as it says a few lines up,
-	// spurious wakeups are allowed.
-	if ns < 0 {
-		futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0)
-		return
-	}
-
-	// It's difficult to live within the no-split stack limits here.
-	// On ARM and 386, a 64-bit divide invokes a general software routine
-	// that needs more stack than we can afford. So we use timediv instead.
-	// But on real 64-bit systems, where words are larger but the stack limit
-	// is not, even timediv is too heavy, and we really need to use just an
-	// ordinary machine instruction.
-	if sys.PtrSize == 8 {
-		ts.set_sec(ns / 1000000000)
-		ts.set_nsec(int32(ns % 1000000000))
-	} else {
-		ts.tv_nsec = 0
-		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
-	}
-	futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0)
-}
-
-// If any procs are sleeping on addr, wake up at most cnt.
-//go:nosplit
-func futexwakeup(addr *uint32, cnt uint32) {
-	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0)
-	if ret >= 0 {
-		return
-	}
-
-	// I don't know that futex wakeup can return
-	// EAGAIN or EINTR, but if it does, it would be
-	// safe to loop and call futex again.
-	systemstack(func() {
-		print("futexwakeup addr=", addr, " returned ", ret, "\n")
-	})
-
-	*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
-}
-
-func getproccount() int32 {
-	// This buffer is huge (8 kB) but we are on the system stack
-	// and there should be plenty of space (64 kB).
-	// Also this is a leaf, so we're not holding up the memory for long.
-	// See golang.org/issue/11823.
-	// The suggested behavior here is to keep trying with ever-larger
-	// buffers, but we don't have a dynamic memory allocator at the
-	// moment, so that's a bit tricky and seems like overkill.
-	const maxCPUs = 64 * 1024
-	var buf [maxCPUs / (sys.PtrSize * 8)]uintptr
-	r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
-	n := int32(0)
-	for _, v := range buf[:r/sys.PtrSize] {
-		for v != 0 {
-			n += int32(v & 1)
-			v >>= 1
-		}
-	}
-	if n == 0 {
-		n = 1
-	}
-	return n
-}
-
-// Clone, the Linux rfork.
-const (
-	_CLONE_VM             = 0x100
-	_CLONE_FS             = 0x200
-	_CLONE_FILES          = 0x400
-	_CLONE_SIGHAND        = 0x800
-	_CLONE_PTRACE         = 0x2000
-	_CLONE_VFORK          = 0x4000
-	_CLONE_PARENT         = 0x8000
-	_CLONE_THREAD         = 0x10000
-	_CLONE_NEWNS          = 0x20000
-	_CLONE_SYSVSEM        = 0x40000
-	_CLONE_SETTLS         = 0x80000
-	_CLONE_PARENT_SETTID  = 0x100000
-	_CLONE_CHILD_CLEARTID = 0x200000
-	_CLONE_UNTRACED       = 0x800000
-	_CLONE_CHILD_SETTID   = 0x1000000
-	_CLONE_STOPPED        = 0x2000000
-	_CLONE_NEWUTS         = 0x4000000
-	_CLONE_NEWIPC         = 0x8000000
-
-	cloneFlags = _CLONE_VM | /* share memory */
-		_CLONE_FS | /* share cwd, etc */
-		_CLONE_FILES | /* share fd table */
-		_CLONE_SIGHAND | /* share sig handler table */
-		_CLONE_THREAD /* revisit - okay for now */
-)
-
-//go:noescape
-func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
-
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newosproc(mp *m, stk unsafe.Pointer) {
-	/*
-	 * note: strace gets confused if we use CLONE_PTRACE here.
-	 */
-	if false {
-		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", funcPC(clone), " id=", mp.id, " ostk=", &mp, "\n")
-	}
-
-	// Disable signals during clone, so that the new thread starts
-	// with signals disabled. It will enable them in minit.
-	var oset sigset
-	rtsigprocmask(_SIG_SETMASK, &sigset_all, &oset, int32(unsafe.Sizeof(oset)))
-	ret := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(funcPC(mstart)))
-	rtsigprocmask(_SIG_SETMASK, &oset, nil, int32(unsafe.Sizeof(oset)))
-
-	if ret < 0 {
-		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
-		throw("newosproc")
-	}
-}
-
-// Version of newosproc that doesn't require a valid G.
-//go:nosplit
-func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
-	stack := sysAlloc(stacksize, &memstats.stacks_sys)
-	if stack == nil {
-		write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
-		exit(1)
-	}
-	ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
-	if ret < 0 {
-		write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
-		exit(1)
-	}
-}
-
-var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
-func osinit() {
-	ncpu = getproccount()
-}
-
-var urandom_dev = []byte("/dev/urandom\x00")
-
-func getRandomData(r []byte) {
-	if startupRandomData != nil {
-		n := copy(r, startupRandomData)
-		extendRandom(r, n)
-		return
-	}
-	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
-	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
-	closefd(fd)
-	extendRandom(r, int(n))
-}
-
-func goenvs() {
-	goenvs_unix()
-}
-
-// Called to do synchronous initialization of Go code built with
-// -buildmode=c-archive or -buildmode=c-shared.
-// None of the Go runtime is initialized.
-//go:nosplit
-//go:nowritebarrierrec
-func libpreinit() {
-	initsig(true)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
-	mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
-	mp.gsignal.m = mp
-}
-
-//go:nosplit
-func msigsave(mp *m) {
-	smask := &mp.sigmask
-	rtsigprocmask(_SIG_SETMASK, nil, smask, int32(unsafe.Sizeof(*smask)))
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-	rtsigprocmask(_SIG_SETMASK, &sigmask, nil, int32(unsafe.Sizeof(sigmask)))
-}
-
-//go:nosplit
-func sigblock() {
-	rtsigprocmask(_SIG_SETMASK, &sigset_all, nil, int32(unsafe.Sizeof(sigset_all)))
-}
-
-func gettid() uint32
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
-	// Initialize signal handling.
-	_g_ := getg()
-
-	var st sigaltstackt
-	sigaltstack(nil, &st)
-	if st.ss_flags&_SS_DISABLE != 0 {
-		signalstack(&_g_.m.gsignal.stack)
-		_g_.m.newSigstack = true
-	} else {
-		// Use existing signal stack.
-		stsp := uintptr(unsafe.Pointer(st.ss_sp))
-		_g_.m.gsignal.stack.lo = stsp
-		_g_.m.gsignal.stack.hi = stsp + st.ss_size
-		_g_.m.gsignal.stackguard0 = stsp + _StackGuard
-		_g_.m.gsignal.stackguard1 = stsp + _StackGuard
-		_g_.m.gsignal.stackAlloc = st.ss_size
-		_g_.m.newSigstack = false
-	}
-
-	// for debuggers, in case cgo created the thread
-	_g_.m.procid = uint64(gettid())
-
-	// restore signal mask from m.sigmask and unblock essential signals
-	nmask := _g_.m.sigmask
-	for i := range sigtable {
-		if sigtable[i].flags&_SigUnblock != 0 {
-			sigdelset(&nmask, i)
-		}
-	}
-	rtsigprocmask(_SIG_SETMASK, &nmask, nil, int32(unsafe.Sizeof(nmask)))
-}
-
-// Called from dropm to undo the effect of an minit.
-//go:nosplit
-func unminit() {
-	if getg().m.newSigstack {
-		signalstack(nil)
-	}
-}
-
-func memlimit() uintptr {
-	/*
-		TODO: Convert to Go when something actually uses the result.
-
-		Rlimit rl;
-		extern byte runtime·text[], runtime·end[];
-		uintptr used;
-
-		if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-			return 0;
-		if(rl.rlim_cur >= 0x7fffffff)
-			return 0;
-
-		// Estimate our VM footprint excluding the heap.
-		// Not an exact science: use size of binary plus
-		// some room for thread stacks.
-		used = runtime·end - runtime·text + (64<<20);
-		if(used >= rl.rlim_cur)
-			return 0;
-
-		// If there's not at least 16 MB left, we're probably
-		// not going to be able to do much. Treat as no limit.
-		rl.rlim_cur -= used;
-		if(rl.rlim_cur < (16<<20))
-			return 0;
-
-		return rl.rlim_cur - used;
-	*/
-
-	return 0
-}
-
-//#ifdef GOARCH_386
-//#define sa_handler k_sa_handler
-//#endif
-
-func sigreturn()
-func sigtramp()
-func cgoSigtramp()
-
-//go:noescape
-func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
-
-//go:noescape
-func sigaltstack(new, old *sigaltstackt)
-
-//go:noescape
-func setitimer(mode int32, new, old *itimerval)
-
-//go:noescape
-func rtsigprocmask(sig uint32, new, old *sigset, size int32)
-
-//go:noescape
-func getrlimit(kind int32, limit unsafe.Pointer) int32
-func raise(sig int32)
-func raiseproc(sig int32)
-
-//go:noescape
-func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
-func osyield()
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsig(i int32, fn uintptr, restart bool) {
-	var sa sigactiont
-	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
-	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER
-	if restart {
-		sa.sa_flags |= _SA_RESTART
-	}
-	sigfillset(&sa.sa_mask)
-	// Although Linux manpage says "sa_restorer element is obsolete and
-	// should not be used". x86_64 kernel requires it. Only use it on
-	// x86.
-	if GOARCH == "386" || GOARCH == "amd64" {
-		sa.sa_restorer = funcPC(sigreturn)
-	}
-	if fn == funcPC(sighandler) {
-		if iscgo {
-			fn = funcPC(cgoSigtramp)
-		} else {
-			fn = funcPC(sigtramp)
-		}
-	}
-	sa.sa_handler = fn
-	rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask))
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsigstack(i int32) {
-	var sa sigactiont
-	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
-		throw("rt_sigaction failure")
-	}
-	if sa.sa_handler == 0 || sa.sa_handler == _SIG_DFL || sa.sa_handler == _SIG_IGN || sa.sa_flags&_SA_ONSTACK != 0 {
-		return
-	}
-	sa.sa_flags |= _SA_ONSTACK
-	if rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask)) != 0 {
-		throw("rt_sigaction failure")
-	}
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func getsig(i int32) uintptr {
-	var sa sigactiont
-
-	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
-	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
-		throw("rt_sigaction read failure")
-	}
-	if sa.sa_handler == funcPC(sigtramp) || sa.sa_handler == funcPC(cgoSigtramp) {
-		return funcPC(sighandler)
-	}
-	return sa.sa_handler
-}
-
-//go:nosplit
-func signalstack(s *stack) {
-	var st sigaltstackt
-	if s == nil {
-		st.ss_flags = _SS_DISABLE
-	} else {
-		st.ss_sp = (*byte)(unsafe.Pointer(s.lo))
-		st.ss_size = s.hi - s.lo
-		st.ss_flags = 0
-	}
-	sigaltstack(&st, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func updatesigmask(m sigmask) {
-	var mask sigset
-	sigcopyset(&mask, m)
-	rtsigprocmask(_SIG_SETMASK, &mask, nil, int32(unsafe.Sizeof(mask)))
-}
-
-func unblocksig(sig int32) {
-	var mask sigset
-	sigaddset(&mask, int(sig))
-	rtsigprocmask(_SIG_UNBLOCK, &mask, nil, int32(unsafe.Sizeof(mask)))
-}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
new file mode 100644
index 0000000000..7d8cc7e5c4
--- /dev/null
+++ b/src/runtime/os_linux.go
@@ -0,0 +1,422 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+type mOS struct{}
+
+//go:noescape
+func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+
+// Linux futex.
+//
+//	futexsleep(uint32 *addr, uint32 val)
+//	futexwakeup(uint32 *addr)
+//
+// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
+// Futexwakeup wakes up threads sleeping on addr.
+// Futexsleep is allowed to wake up spuriously.
+
+const (
+	_FUTEX_WAIT = 0
+	_FUTEX_WAKE = 1
+)
+
+// Atomically,
+//	if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+// Don't sleep longer than ns; ns < 0 means forever.
+//go:nosplit
+func futexsleep(addr *uint32, val uint32, ns int64) {
+	var ts timespec
+
+	// Some Linux kernels have a bug where futex of
+	// FUTEX_WAIT returns an internal error code
+	// as an errno. Libpthread ignores the return value
+	// here, and so can we: as it says a few lines up,
+	// spurious wakeups are allowed.
+	if ns < 0 {
+		futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0)
+		return
+	}
+
+	// It's difficult to live within the no-split stack limits here.
+	// On ARM and 386, a 64-bit divide invokes a general software routine
+	// that needs more stack than we can afford. So we use timediv instead.
+	// But on real 64-bit systems, where words are larger but the stack limit
+	// is not, even timediv is too heavy, and we really need to use just an
+	// ordinary machine instruction.
+	if sys.PtrSize == 8 {
+		ts.set_sec(ns / 1000000000)
+		ts.set_nsec(int32(ns % 1000000000))
+	} else {
+		ts.tv_nsec = 0
+		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
+	}
+	futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0)
+}
+
+// If any procs are sleeping on addr, wake up at most cnt.
+//go:nosplit
+func futexwakeup(addr *uint32, cnt uint32) {
+	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0)
+	if ret >= 0 {
+		return
+	}
+
+	// I don't know that futex wakeup can return
+	// EAGAIN or EINTR, but if it does, it would be
+	// safe to loop and call futex again.
+	systemstack(func() {
+		print("futexwakeup addr=", addr, " returned ", ret, "\n")
+	})
+
+	*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
+}
+
+func getproccount() int32 {
+	// This buffer is huge (8 kB) but we are on the system stack
+	// and there should be plenty of space (64 kB).
+	// Also this is a leaf, so we're not holding up the memory for long.
+	// See golang.org/issue/11823.
+	// The suggested behavior here is to keep trying with ever-larger
+	// buffers, but we don't have a dynamic memory allocator at the
+	// moment, so that's a bit tricky and seems like overkill.
+	const maxCPUs = 64 * 1024
+	var buf [maxCPUs / (sys.PtrSize * 8)]uintptr
+	r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
+	n := int32(0)
+	for _, v := range buf[:r/sys.PtrSize] {
+		for v != 0 {
+			n += int32(v & 1)
+			v >>= 1
+		}
+	}
+	if n == 0 {
+		n = 1
+	}
+	return n
+}
+
+// Clone, the Linux rfork.
+const (
+	_CLONE_VM             = 0x100
+	_CLONE_FS             = 0x200
+	_CLONE_FILES          = 0x400
+	_CLONE_SIGHAND        = 0x800
+	_CLONE_PTRACE         = 0x2000
+	_CLONE_VFORK          = 0x4000
+	_CLONE_PARENT         = 0x8000
+	_CLONE_THREAD         = 0x10000
+	_CLONE_NEWNS          = 0x20000
+	_CLONE_SYSVSEM        = 0x40000
+	_CLONE_SETTLS         = 0x80000
+	_CLONE_PARENT_SETTID  = 0x100000
+	_CLONE_CHILD_CLEARTID = 0x200000
+	_CLONE_UNTRACED       = 0x800000
+	_CLONE_CHILD_SETTID   = 0x1000000
+	_CLONE_STOPPED        = 0x2000000
+	_CLONE_NEWUTS         = 0x4000000
+	_CLONE_NEWIPC         = 0x8000000
+
+	cloneFlags = _CLONE_VM | /* share memory */
+		_CLONE_FS | /* share cwd, etc */
+		_CLONE_FILES | /* share fd table */
+		_CLONE_SIGHAND | /* share sig handler table */
+		_CLONE_THREAD /* revisit - okay for now */
+)
+
+//go:noescape
+func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m, stk unsafe.Pointer) {
+	/*
+	 * note: strace gets confused if we use CLONE_PTRACE here.
+	 */
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", funcPC(clone), " id=", mp.id, " ostk=", &mp, "\n")
+	}
+
+	// Disable signals during clone, so that the new thread starts
+	// with signals disabled. It will enable them in minit.
+	var oset sigset
+	rtsigprocmask(_SIG_SETMASK, &sigset_all, &oset, int32(unsafe.Sizeof(oset)))
+	ret := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(funcPC(mstart)))
+	rtsigprocmask(_SIG_SETMASK, &oset, nil, int32(unsafe.Sizeof(oset)))
+
+	if ret < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
+		throw("newosproc")
+	}
+}
+
+// Version of newosproc that doesn't require a valid G.
+//go:nosplit
+func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
+	stack := sysAlloc(stacksize, &memstats.stacks_sys)
+	if stack == nil {
+		write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
+		exit(1)
+	}
+	ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
+	if ret < 0 {
+		write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+		exit(1)
+	}
+}
+
+var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
+var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
+
+func osinit() {
+	ncpu = getproccount()
+}
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+func getRandomData(r []byte) {
+	if startupRandomData != nil {
+		n := copy(r, startupRandomData)
+		extendRandom(r, n)
+		return
+	}
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+	closefd(fd)
+	extendRandom(r, int(n))
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to do synchronous initialization of Go code built with
+// -buildmode=c-archive or -buildmode=c-shared.
+// None of the Go runtime is initialized.
+//go:nosplit
+//go:nowritebarrierrec
+func libpreinit() {
+	initsig(true)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+	smask := &mp.sigmask
+	rtsigprocmask(_SIG_SETMASK, nil, smask, int32(unsafe.Sizeof(*smask)))
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+	rtsigprocmask(_SIG_SETMASK, &sigmask, nil, int32(unsafe.Sizeof(sigmask)))
+}
+
+//go:nosplit
+func sigblock() {
+	rtsigprocmask(_SIG_SETMASK, &sigset_all, nil, int32(unsafe.Sizeof(sigset_all)))
+}
+
+func gettid() uint32
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	// Initialize signal handling.
+	_g_ := getg()
+
+	var st sigaltstackt
+	sigaltstack(nil, &st)
+	if st.ss_flags&_SS_DISABLE != 0 {
+		signalstack(&_g_.m.gsignal.stack)
+		_g_.m.newSigstack = true
+	} else {
+		// Use existing signal stack.
+		stsp := uintptr(unsafe.Pointer(st.ss_sp))
+		_g_.m.gsignal.stack.lo = stsp
+		_g_.m.gsignal.stack.hi = stsp + st.ss_size
+		_g_.m.gsignal.stackguard0 = stsp + _StackGuard
+		_g_.m.gsignal.stackguard1 = stsp + _StackGuard
+		_g_.m.gsignal.stackAlloc = st.ss_size
+		_g_.m.newSigstack = false
+	}
+
+	// for debuggers, in case cgo created the thread
+	_g_.m.procid = uint64(gettid())
+
+	// restore signal mask from m.sigmask and unblock essential signals
+	nmask := _g_.m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			sigdelset(&nmask, i)
+		}
+	}
+	rtsigprocmask(_SIG_SETMASK, &nmask, nil, int32(unsafe.Sizeof(nmask)))
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	if getg().m.newSigstack {
+		signalstack(nil)
+	}
+}
+
+func memlimit() uintptr {
+	/*
+		TODO: Convert to Go when something actually uses the result.
+
+		Rlimit rl;
+		extern byte runtime·text[], runtime·end[];
+		uintptr used;
+
+		if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+			return 0;
+		if(rl.rlim_cur >= 0x7fffffff)
+			return 0;
+
+		// Estimate our VM footprint excluding the heap.
+		// Not an exact science: use size of binary plus
+		// some room for thread stacks.
+		used = runtime·end - runtime·text + (64<<20);
+		if(used >= rl.rlim_cur)
+			return 0;
+
+		// If there's not at least 16 MB left, we're probably
+		// not going to be able to do much. Treat as no limit.
+		rl.rlim_cur -= used;
+		if(rl.rlim_cur < (16<<20))
+			return 0;
+
+		return rl.rlim_cur - used;
+	*/
+
+	return 0
+}
+
+//#ifdef GOARCH_386
+//#define sa_handler k_sa_handler
+//#endif
+
+func sigreturn()
+func sigtramp()
+func cgoSigtramp()
+
+//go:noescape
+func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
+
+//go:noescape
+func sigaltstack(new, old *sigaltstackt)
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+//go:noescape
+func rtsigprocmask(sig uint32, new, old *sigset, size int32)
+
+//go:noescape
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func raiseproc(sig int32)
+
+//go:noescape
+func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+func osyield()
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sigfillset(&sa.sa_mask)
+	// Although Linux manpage says "sa_restorer element is obsolete and
+	// should not be used". x86_64 kernel requires it. Only use it on
+	// x86.
+	if GOARCH == "386" || GOARCH == "amd64" {
+		sa.sa_restorer = funcPC(sigreturn)
+	}
+	if fn == funcPC(sighandler) {
+		if iscgo {
+			fn = funcPC(cgoSigtramp)
+		} else {
+			fn = funcPC(sigtramp)
+		}
+	}
+	sa.sa_handler = fn
+	rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask))
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsigstack(i int32) {
+	var sa sigactiont
+	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
+		throw("rt_sigaction failure")
+	}
+	if sa.sa_handler == 0 || sa.sa_handler == _SIG_DFL || sa.sa_handler == _SIG_IGN || sa.sa_flags&_SA_ONSTACK != 0 {
+		return
+	}
+	sa.sa_flags |= _SA_ONSTACK
+	if rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask)) != 0 {
+		throw("rt_sigaction failure")
+	}
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func getsig(i int32) uintptr {
+	var sa sigactiont
+
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
+		throw("rt_sigaction read failure")
+	}
+	if sa.sa_handler == funcPC(sigtramp) || sa.sa_handler == funcPC(cgoSigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_handler
+}
+
+//go:nosplit
+func signalstack(s *stack) {
+	var st sigaltstackt
+	if s == nil {
+		st.ss_flags = _SS_DISABLE
+	} else {
+		st.ss_sp = (*byte)(unsafe.Pointer(s.lo))
+		st.ss_size = s.hi - s.lo
+		st.ss_flags = 0
+	}
+	sigaltstack(&st, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func updatesigmask(m sigmask) {
+	var mask sigset
+	sigcopyset(&mask, m)
+	rtsigprocmask(_SIG_SETMASK, &mask, nil, int32(unsafe.Sizeof(mask)))
+}
+
+func unblocksig(sig int32) {
+	var mask sigset
+	sigaddset(&mask, int(sig))
+	rtsigprocmask(_SIG_UNBLOCK, &mask, nil, int32(unsafe.Sizeof(mask)))
+}
-- 
cgit v1.3


From a2eded3421f144983c0ccb9e6c0a325fa1ba1f82 Mon Sep 17 00:00:00 2001
From: Shenghou Ma <minux@golang.org>
Date: Tue, 5 Apr 2016 23:09:39 -0400
Subject: runtime: get randomness from AT_RANDOM AUXV on linux/arm64

Fixes #15147.

Change-Id: Ibfe46c747dea987787a51eb0c95ccd8c5f24f366
Reviewed-on: https://go-review.googlesource.com/21580
Run-TryBot: Minux Ma <minux@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/os_linux_arm64.go | 26 ++++++++++++++++++++++++++
 src/runtime/vdso_none.go      |  1 +
 2 files changed, 27 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index 3f994f128b..57184b0d3a 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -4,6 +4,11 @@
 
 package runtime
 
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
 const (
 	_AT_NULL   = 0
 	_AT_RANDOM = 25 // introduced in 2.6.29
@@ -11,6 +16,27 @@ const (
 
 var randomNumber uint32
 
+func sysargs(argc int32, argv **byte) {
+	// skip over argv, envv to get to auxv
+	n := argc + 1
+	for argv_index(argv, n) != nil {
+		n++
+	}
+	n++
+	auxv := (*[1 << 29]uint64)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
+
+	for i := 0; auxv[i] != _AT_NULL; i += 2 {
+		switch auxv[i] {
+		case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
+			startupRandomData = (*[16]byte)(unsafe.Pointer(uintptr(auxv[i+1])))[:]
+			// the pointer provided may not be word aligned, so we must treat it
+			// as a byte array.
+			randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+		}
+	}
+}
+
 //go:nosplit
 func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed fastrand1().
diff --git a/src/runtime/vdso_none.go b/src/runtime/vdso_none.go
index b4e0a0e349..e14e1a4707 100644
--- a/src/runtime/vdso_none.go
+++ b/src/runtime/vdso_none.go
@@ -5,6 +5,7 @@
 // +build !linux !amd64
 // +build !linux !386
 // +build !linux !arm
+// +build !linux !arm64
 
 package runtime
 
-- 
cgit v1.3


From 0f08dd21831a71292dc306cfc05e28a3a9d73786 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Fri, 18 Mar 2016 19:02:52 -0400
Subject: runtime: add s390x support (modified files only)

Change-Id: Ib79ad4a890994ad64edb1feb79bd242d26b5b08a
Reviewed-on: https://go-review.googlesource.com/20945
Reviewed-by: Minux Ma <minux@golang.org>
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/cgocall.go           | 6 +++---
 src/runtime/extern.go            | 2 +-
 src/runtime/gcinfo_test.go       | 2 +-
 src/runtime/hash64.go            | 2 +-
 src/runtime/noasm.go             | 2 +-
 src/runtime/os1_linux_generic.go | 1 +
 src/runtime/os2_linux_generic.go | 1 +
 src/runtime/runtime-gdb_test.go  | 2 +-
 src/runtime/unaligned1.go        | 2 +-
 9 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index d5248803a4..c6000bf98f 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -246,8 +246,8 @@ func cgocallbackg1() {
 	case "386":
 		// On 386, stack frame is three words, plus caller PC.
 		cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize))
-	case "ppc64", "ppc64le":
-		// On ppc64, the callback arguments are in the arguments area of
+	case "ppc64", "ppc64le", "s390x":
+		// On ppc64 and s390x, the callback arguments are in the arguments area of
 		// cgocallback's stack frame. The stack looks like this:
 		// +--------------------+------------------------------+
 		// |                    | ...                          |
@@ -300,7 +300,7 @@ func unwindm(restore *bool) {
 	switch GOARCH {
 	default:
 		throw("unwindm not implemented")
-	case "386", "amd64", "arm", "ppc64", "ppc64le":
+	case "386", "amd64", "arm", "ppc64", "ppc64le", "s390x":
 		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize))
 	case "arm64":
 		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16))
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 984b0ca817..1d8304f4fc 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -227,5 +227,5 @@ func Version() string {
 const GOOS string = sys.TheGoos
 
 // GOARCH is the running program's architecture target:
-// 386, amd64, or arm.
+// 386, amd64, arm, or s390x.
 const GOARCH string = sys.TheGoarch
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index edb6361642..c1c2354bf9 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -144,7 +144,7 @@ func infoBigStruct() []byte {
 			typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
 			typePointer, typeScalar, // i string
 		}
-	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le":
+	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x":
 		return []byte{
 			typePointer,                        // q *int
 			typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
diff --git a/src/runtime/hash64.go b/src/runtime/hash64.go
index fb3dba4000..d61f114475 100644
--- a/src/runtime/hash64.go
+++ b/src/runtime/hash64.go
@@ -6,7 +6,7 @@
 //   xxhash: https://code.google.com/p/xxhash/
 // cityhash: https://code.google.com/p/cityhash/
 
-// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le
+// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x
 
 package runtime
 
diff --git a/src/runtime/noasm.go b/src/runtime/noasm.go
index 351e325f4f..0a8f9e6f52 100644
--- a/src/runtime/noasm.go
+++ b/src/runtime/noasm.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Routines that are implemented in assembly in asm_{amd64,386,arm,arm64,ppc64x}.s
+// Routines that are implemented in assembly in asm_{amd64,386,arm,arm64,ppc64x,s390x}.s
 
 // +build mips64 mips64le
 
diff --git a/src/runtime/os1_linux_generic.go b/src/runtime/os1_linux_generic.go
index 2c8b743aeb..50d6d6afb4 100644
--- a/src/runtime/os1_linux_generic.go
+++ b/src/runtime/os1_linux_generic.go
@@ -4,6 +4,7 @@
 
 // +build !mips64
 // +build !mips64le
+// +build !s390x
 // +build linux
 
 package runtime
diff --git a/src/runtime/os2_linux_generic.go b/src/runtime/os2_linux_generic.go
index 01e6c8a5ec..f1a2dd5130 100644
--- a/src/runtime/os2_linux_generic.go
+++ b/src/runtime/os2_linux_generic.go
@@ -4,6 +4,7 @@
 
 // +build !mips64
 // +build !mips64le
+// +build !s390x
 // +build linux
 
 package runtime
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index 110d99064f..7cfcefc2c2 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -104,7 +104,7 @@ func TestGdbPython(t *testing.T) {
 	// stack frames on RISC architectures.
 	canBackTrace := false
 	switch runtime.GOARCH {
-	case "amd64", "386", "ppc64", "ppc64le", "arm", "arm64", "mips64", "mips64le":
+	case "amd64", "386", "ppc64", "ppc64le", "arm", "arm64", "mips64", "mips64le", "s390x":
 		canBackTrace = true
 		args = append(args,
 			"-ex", "echo BEGIN goroutine 2 bt\n",
diff --git a/src/runtime/unaligned1.go b/src/runtime/unaligned1.go
index 6bd9018352..754d63b417 100644
--- a/src/runtime/unaligned1.go
+++ b/src/runtime/unaligned1.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le
+// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x
 
 package runtime
 
-- 
cgit v1.3


From e095f53e9be7aadc74fddf5532296a438410df40 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 02:38:45 +0000
Subject: runtime: merge os{,2}_windows.go into os1_windows.go.

A future CL will rename os1_windows.go to os_windows.go.

Change-Id: I223e76002dd1e9c9d1798fb0beac02c7d3bf4812
Reviewed-on: https://go-review.googlesource.com/21564
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/os1_windows.go | 40 +++++++++++++++++++++++++++++++++++++++-
 src/runtime/os2_windows.go | 19 -------------------
 src/runtime/os_windows.go  | 32 --------------------------------
 3 files changed, 39 insertions(+), 52 deletions(-)
 delete mode 100644 src/runtime/os2_windows.go
 delete mode 100644 src/runtime/os_windows.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index a36def0ffe..7244706b92 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -9,6 +9,11 @@ import (
 	"unsafe"
 )
 
+// TODO(brainman): should not need those
+const (
+	_NSIG = 65
+)
+
 //go:cgo_import_dynamic runtime._AddVectoredExceptionHandler AddVectoredExceptionHandler%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._CloseHandle CloseHandle%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._CreateEventA CreateEventA%4 "kernel32.dll"
@@ -49,6 +54,8 @@ import (
 //go:cgo_import_dynamic runtime._WriteConsoleW WriteConsoleW%5 "kernel32.dll"
 //go:cgo_import_dynamic runtime._WriteFile WriteFile%5 "kernel32.dll"
 
+type stdFunction unsafe.Pointer
+
 var (
 	// Following syscalls are available on every Windows PC.
 	// All these variables are set by the Windows executable
@@ -102,6 +109,35 @@ var (
 	_ stdFunction
 )
 
+// Function to be called by windows CreateThread
+// to start new os thread.
+func tstart_stdcall(newm *m) uint32
+
+func ctrlhandler(_type uint32) uint32
+
+type mOS struct {
+	waitsema uintptr // semaphore for parking on locks
+}
+
+//go:linkname os_sigpipe os.sigpipe
+func os_sigpipe() {
+	throw("too many writes on closed pipe")
+}
+
+// Stubs so tests can link correctly. These should never be called.
+func open(name *byte, mode, perm int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+func closefd(fd int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+func read(fd int32, p unsafe.Pointer, n int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+
 type sigset struct{}
 
 // Call a Windows function with stdcall conventions,
@@ -171,8 +207,10 @@ const (
 	currentThread  = ^uintptr(1) // -2 = current thread
 )
 
-// in sys_windows_386.s and sys_windows_amd64.s
+// in sys_windows_386.s and sys_windows_amd64.s:
 func externalthreadhandler()
+func getlasterror() uint32
+func setlasterror(err uint32)
 
 // When loading DLLs, we prefer to use LoadLibraryEx with
 // LOAD_LIBRARY_SEARCH_* flags, if available. LoadLibraryEx is not
diff --git a/src/runtime/os2_windows.go b/src/runtime/os2_windows.go
deleted file mode 100644
index a867dfeb64..0000000000
--- a/src/runtime/os2_windows.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-func getlasterror() uint32
-func setlasterror(err uint32)
-
-// Function to be called by windows CreateThread
-// to start new os thread.
-func tstart_stdcall(newm *m) uint32
-
-func ctrlhandler(_type uint32) uint32
-
-// TODO(brainman): should not need those
-const (
-	_NSIG = 65
-)
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
deleted file mode 100644
index 24b3b8cf29..0000000000
--- a/src/runtime/os_windows.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type mOS struct {
-	waitsema uintptr // semaphore for parking on locks
-}
-
-type stdFunction unsafe.Pointer
-
-//go:linkname os_sigpipe os.sigpipe
-func os_sigpipe() {
-	throw("too many writes on closed pipe")
-}
-
-// Stubs so tests can link correctly. These should never be called.
-func open(name *byte, mode, perm int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-func closefd(fd int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-func read(fd int32, p unsafe.Pointer, n int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-- 
cgit v1.3


From fd2bb1e30ae2f489e05ec022a8457a680663c27d Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 02:39:47 +0000
Subject: runtime: rename os1_windows.go to os_windows.go

Change-Id: I11172f3d0e28f17b812e67a4db9cfe513b8e1974
Reviewed-on: https://go-review.googlesource.com/21565
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/os1_windows.go | 737 ---------------------------------------------
 src/runtime/os_windows.go  | 737 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 737 insertions(+), 737 deletions(-)
 delete mode 100644 src/runtime/os1_windows.go
 create mode 100644 src/runtime/os_windows.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
deleted file mode 100644
index 7244706b92..0000000000
--- a/src/runtime/os1_windows.go
+++ /dev/null
@@ -1,737 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"runtime/internal/atomic"
-	"unsafe"
-)
-
-// TODO(brainman): should not need those
-const (
-	_NSIG = 65
-)
-
-//go:cgo_import_dynamic runtime._AddVectoredExceptionHandler AddVectoredExceptionHandler%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CloseHandle CloseHandle%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CreateEventA CreateEventA%4 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort%4 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CreateThread CreateThread%6 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA%3 "kernel32.dll"
-//go:cgo_import_dynamic runtime._CryptAcquireContextW CryptAcquireContextW%5 "advapi32.dll"
-//go:cgo_import_dynamic runtime._CryptGenRandom CryptGenRandom%3 "advapi32.dll"
-//go:cgo_import_dynamic runtime._CryptReleaseContext CryptReleaseContext%2 "advapi32.dll"
-//go:cgo_import_dynamic runtime._DuplicateHandle DuplicateHandle%7 "kernel32.dll"
-//go:cgo_import_dynamic runtime._ExitProcess ExitProcess%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetConsoleMode GetConsoleMode%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetEnvironmentStringsW GetEnvironmentStringsW%0 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetProcAddress GetProcAddress%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetProcessAffinityMask GetProcessAffinityMask%3 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus%5 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetStdHandle GetStdHandle%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._NtWaitForSingleObject NtWaitForSingleObject%3 "ntdll.dll"
-//go:cgo_import_dynamic runtime._ResumeThread ResumeThread%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetConsoleCtrlHandler SetConsoleCtrlHandler%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetErrorMode SetErrorMode%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetEvent SetEvent%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetProcessPriorityBoost SetProcessPriorityBoost%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetThreadPriority SetThreadPriority%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetUnhandledExceptionFilter SetUnhandledExceptionFilter%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer%6 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SuspendThread SuspendThread%1 "kernel32.dll"
-//go:cgo_import_dynamic runtime._SwitchToThread SwitchToThread%0 "kernel32.dll"
-//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc%4 "kernel32.dll"
-//go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
-//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult%5 "ws2_32.dll"
-//go:cgo_import_dynamic runtime._WaitForSingleObject WaitForSingleObject%2 "kernel32.dll"
-//go:cgo_import_dynamic runtime._WriteConsoleW WriteConsoleW%5 "kernel32.dll"
-//go:cgo_import_dynamic runtime._WriteFile WriteFile%5 "kernel32.dll"
-
-type stdFunction unsafe.Pointer
-
-var (
-	// Following syscalls are available on every Windows PC.
-	// All these variables are set by the Windows executable
-	// loader before the Go program starts.
-	_AddVectoredExceptionHandler,
-	_CloseHandle,
-	_CreateEventA,
-	_CreateIoCompletionPort,
-	_CreateThread,
-	_CreateWaitableTimerA,
-	_CryptAcquireContextW,
-	_CryptGenRandom,
-	_CryptReleaseContext,
-	_DuplicateHandle,
-	_ExitProcess,
-	_FreeEnvironmentStringsW,
-	_GetConsoleMode,
-	_GetEnvironmentStringsW,
-	_GetProcAddress,
-	_GetProcessAffinityMask,
-	_GetQueuedCompletionStatus,
-	_GetStdHandle,
-	_GetSystemInfo,
-	_GetThreadContext,
-	_LoadLibraryW,
-	_LoadLibraryA,
-	_NtWaitForSingleObject,
-	_ResumeThread,
-	_SetConsoleCtrlHandler,
-	_SetErrorMode,
-	_SetEvent,
-	_SetProcessPriorityBoost,
-	_SetThreadPriority,
-	_SetUnhandledExceptionFilter,
-	_SetWaitableTimer,
-	_SuspendThread,
-	_SwitchToThread,
-	_VirtualAlloc,
-	_VirtualFree,
-	_WSAGetOverlappedResult,
-	_WaitForSingleObject,
-	_WriteConsoleW,
-	_WriteFile stdFunction
-
-	// Following syscalls are only available on some Windows PCs.
-	// We will load syscalls, if available, before using them.
-	_AddDllDirectory,
-	_AddVectoredContinueHandler,
-	_GetQueuedCompletionStatusEx,
-	_LoadLibraryExW,
-	_ stdFunction
-)
-
-// Function to be called by windows CreateThread
-// to start new os thread.
-func tstart_stdcall(newm *m) uint32
-
-func ctrlhandler(_type uint32) uint32
-
-type mOS struct {
-	waitsema uintptr // semaphore for parking on locks
-}
-
-//go:linkname os_sigpipe os.sigpipe
-func os_sigpipe() {
-	throw("too many writes on closed pipe")
-}
-
-// Stubs so tests can link correctly. These should never be called.
-func open(name *byte, mode, perm int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-func closefd(fd int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-func read(fd int32, p unsafe.Pointer, n int32) int32 {
-	throw("unimplemented")
-	return -1
-}
-
-type sigset struct{}
-
-// Call a Windows function with stdcall conventions,
-// and switch to os stack during the call.
-func asmstdcall(fn unsafe.Pointer)
-
-var asmstdcallAddr unsafe.Pointer
-
-func windowsFindfunc(lib uintptr, name []byte) stdFunction {
-	if name[len(name)-1] != 0 {
-		throw("usage")
-	}
-	f := stdcall2(_GetProcAddress, lib, uintptr(unsafe.Pointer(&name[0])))
-	return stdFunction(unsafe.Pointer(f))
-}
-
-func loadOptionalSyscalls() {
-	var kernel32dll = []byte("kernel32.dll\000")
-	k32 := stdcall1(_LoadLibraryA, uintptr(unsafe.Pointer(&kernel32dll[0])))
-	if k32 == 0 {
-		throw("kernel32.dll not found")
-	}
-	_AddDllDirectory = windowsFindfunc(k32, []byte("AddDllDirectory\000"))
-	_AddVectoredContinueHandler = windowsFindfunc(k32, []byte("AddVectoredContinueHandler\000"))
-	_GetQueuedCompletionStatusEx = windowsFindfunc(k32, []byte("GetQueuedCompletionStatusEx\000"))
-	_LoadLibraryExW = windowsFindfunc(k32, []byte("LoadLibraryExW\000"))
-}
-
-//go:nosplit
-func getLoadLibrary() uintptr {
-	return uintptr(unsafe.Pointer(_LoadLibraryW))
-}
-
-//go:nosplit
-func getLoadLibraryEx() uintptr {
-	return uintptr(unsafe.Pointer(_LoadLibraryExW))
-}
-
-//go:nosplit
-func getGetProcAddress() uintptr {
-	return uintptr(unsafe.Pointer(_GetProcAddress))
-}
-
-func getproccount() int32 {
-	var mask, sysmask uintptr
-	ret := stdcall3(_GetProcessAffinityMask, currentProcess, uintptr(unsafe.Pointer(&mask)), uintptr(unsafe.Pointer(&sysmask)))
-	if ret != 0 {
-		n := 0
-		maskbits := int(unsafe.Sizeof(mask) * 8)
-		for i := 0; i < maskbits; i++ {
-			if mask&(1<<uint(i)) != 0 {
-				n++
-			}
-		}
-		if n != 0 {
-			return int32(n)
-		}
-	}
-	// use GetSystemInfo if GetProcessAffinityMask fails
-	var info systeminfo
-	stdcall1(_GetSystemInfo, uintptr(unsafe.Pointer(&info)))
-	return int32(info.dwnumberofprocessors)
-}
-
-const (
-	currentProcess = ^uintptr(0) // -1 = current process
-	currentThread  = ^uintptr(1) // -2 = current thread
-)
-
-// in sys_windows_386.s and sys_windows_amd64.s:
-func externalthreadhandler()
-func getlasterror() uint32
-func setlasterror(err uint32)
-
-// When loading DLLs, we prefer to use LoadLibraryEx with
-// LOAD_LIBRARY_SEARCH_* flags, if available. LoadLibraryEx is not
-// available on old Windows, though, and the LOAD_LIBRARY_SEARCH_*
-// flags are not available on some versions of Windows without a
-// security patch.
-//
-// https://msdn.microsoft.com/en-us/library/ms684179(v=vs.85).aspx says:
-// "Windows 7, Windows Server 2008 R2, Windows Vista, and Windows
-// Server 2008: The LOAD_LIBRARY_SEARCH_* flags are available on
-// systems that have KB2533623 installed. To determine whether the
-// flags are available, use GetProcAddress to get the address of the
-// AddDllDirectory, RemoveDllDirectory, or SetDefaultDllDirectories
-// function. If GetProcAddress succeeds, the LOAD_LIBRARY_SEARCH_*
-// flags can be used with LoadLibraryEx."
-var useLoadLibraryEx bool
-
-func osinit() {
-	asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
-	usleep2Addr = unsafe.Pointer(funcPC(usleep2))
-	switchtothreadAddr = unsafe.Pointer(funcPC(switchtothread))
-
-	setBadSignalMsg()
-
-	loadOptionalSyscalls()
-
-	useLoadLibraryEx = (_LoadLibraryExW != nil && _AddDllDirectory != nil)
-
-	disableWER()
-
-	externalthreadhandlerp = funcPC(externalthreadhandler)
-
-	initExceptionHandler()
-
-	stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
-
-	ncpu = getproccount()
-
-	// Windows dynamic priority boosting assumes that a process has different types
-	// of dedicated threads -- GUI, IO, computational, etc. Go processes use
-	// equivalent threads that all do a mix of GUI, IO, computations, etc.
-	// In such context dynamic priority boosting does nothing but harm, so we turn it off.
-	stdcall2(_SetProcessPriorityBoost, currentProcess, 1)
-}
-
-//go:nosplit
-func getRandomData(r []byte) {
-	const (
-		prov_rsa_full       = 1
-		crypt_verifycontext = 0xF0000000
-	)
-	var handle uintptr
-	n := 0
-	if stdcall5(_CryptAcquireContextW, uintptr(unsafe.Pointer(&handle)), 0, 0, prov_rsa_full, crypt_verifycontext) != 0 {
-		if stdcall3(_CryptGenRandom, handle, uintptr(len(r)), uintptr(unsafe.Pointer(&r[0]))) != 0 {
-			n = len(r)
-		}
-		stdcall2(_CryptReleaseContext, handle, 0)
-	}
-	extendRandom(r, n)
-}
-
-func goenvs() {
-	// strings is a pointer to environment variable pairs in the form:
-	//     "envA=valA\x00envB=valB\x00\x00" (in UTF-16)
-	// Two consecutive zero bytes end the list.
-	strings := unsafe.Pointer(stdcall0(_GetEnvironmentStringsW))
-	p := (*[1 << 24]uint16)(strings)[:]
-
-	n := 0
-	for from, i := 0, 0; true; i++ {
-		if p[i] == 0 {
-			// empty string marks the end
-			if i == from {
-				break
-			}
-			from = i + 1
-			n++
-		}
-	}
-	envs = make([]string, n)
-
-	for i := range envs {
-		envs[i] = gostringw(&p[0])
-		for p[0] != 0 {
-			p = p[1:]
-		}
-		p = p[1:] // skip nil byte
-	}
-
-	stdcall1(_FreeEnvironmentStringsW, uintptr(strings))
-}
-
-//go:nosplit
-func exit(code int32) {
-	stdcall1(_ExitProcess, uintptr(code))
-}
-
-//go:nosplit
-func write(fd uintptr, buf unsafe.Pointer, n int32) int32 {
-	const (
-		_STD_OUTPUT_HANDLE = ^uintptr(10) // -11
-		_STD_ERROR_HANDLE  = ^uintptr(11) // -12
-	)
-	var handle uintptr
-	switch fd {
-	case 1:
-		handle = stdcall1(_GetStdHandle, _STD_OUTPUT_HANDLE)
-	case 2:
-		handle = stdcall1(_GetStdHandle, _STD_ERROR_HANDLE)
-	default:
-		// assume fd is real windows handle.
-		handle = fd
-	}
-	isASCII := true
-	b := (*[1 << 30]byte)(buf)[:n]
-	for _, x := range b {
-		if x >= 0x80 {
-			isASCII = false
-			break
-		}
-	}
-
-	if !isASCII {
-		var m uint32
-		isConsole := stdcall2(_GetConsoleMode, handle, uintptr(unsafe.Pointer(&m))) != 0
-		// If this is a console output, various non-unicode code pages can be in use.
-		// Use the dedicated WriteConsole call to ensure unicode is printed correctly.
-		if isConsole {
-			return int32(writeConsole(handle, buf, n))
-		}
-	}
-	var written uint32
-	stdcall5(_WriteFile, handle, uintptr(buf), uintptr(n), uintptr(unsafe.Pointer(&written)), 0)
-	return int32(written)
-}
-
-var (
-	utf16ConsoleBack     [1000]uint16
-	utf16ConsoleBackLock mutex
-)
-
-// writeConsole writes bufLen bytes from buf to the console File.
-// It returns the number of bytes written.
-func writeConsole(handle uintptr, buf unsafe.Pointer, bufLen int32) int {
-	const surr2 = (surrogateMin + surrogateMax + 1) / 2
-
-	// Do not use defer for unlock. May cause issues when printing a panic.
-	lock(&utf16ConsoleBackLock)
-
-	b := (*[1 << 30]byte)(buf)[:bufLen]
-	s := *(*string)(unsafe.Pointer(&b))
-
-	utf16tmp := utf16ConsoleBack[:]
-
-	total := len(s)
-	w := 0
-	for len(s) > 0 {
-		if w >= len(utf16tmp)-2 {
-			writeConsoleUTF16(handle, utf16tmp[:w])
-			w = 0
-		}
-		r, n := charntorune(s)
-		s = s[n:]
-		if r < 0x10000 {
-			utf16tmp[w] = uint16(r)
-			w++
-		} else {
-			r -= 0x10000
-			utf16tmp[w] = surrogateMin + uint16(r>>10)&0x3ff
-			utf16tmp[w+1] = surr2 + uint16(r)&0x3ff
-			w += 2
-		}
-	}
-	writeConsoleUTF16(handle, utf16tmp[:w])
-	unlock(&utf16ConsoleBackLock)
-	return total
-}
-
-// writeConsoleUTF16 is the dedicated windows calls that correctly prints
-// to the console regardless of the current code page. Input is utf-16 code points.
-// The handle must be a console handle.
-func writeConsoleUTF16(handle uintptr, b []uint16) {
-	l := uint32(len(b))
-	if l == 0 {
-		return
-	}
-	var written uint32
-	stdcall5(_WriteConsoleW,
-		handle,
-		uintptr(unsafe.Pointer(&b[0])),
-		uintptr(l),
-		uintptr(unsafe.Pointer(&written)),
-		0,
-	)
-	return
-}
-
-//go:nosplit
-func semasleep(ns int64) int32 {
-	// store ms in ns to save stack space
-	if ns < 0 {
-		ns = _INFINITE
-	} else {
-		ns = int64(timediv(ns, 1000000, nil))
-		if ns == 0 {
-			ns = 1
-		}
-	}
-	if stdcall2(_WaitForSingleObject, getg().m.waitsema, uintptr(ns)) != 0 {
-		return -1 // timeout
-	}
-	return 0
-}
-
-//go:nosplit
-func semawakeup(mp *m) {
-	stdcall1(_SetEvent, mp.waitsema)
-}
-
-//go:nosplit
-func semacreate(mp *m) {
-	if mp.waitsema != 0 {
-		return
-	}
-	mp.waitsema = stdcall4(_CreateEventA, 0, 0, 0, 0)
-}
-
-// May run with m.p==nil, so write barriers are not allowed. This
-// function is called by newosproc0, so it is also required to
-// operate without stack guards.
-//go:nowritebarrierc
-//go:nosplit
-func newosproc(mp *m, stk unsafe.Pointer) {
-	const _STACK_SIZE_PARAM_IS_A_RESERVATION = 0x00010000
-	thandle := stdcall6(_CreateThread, 0, 0x20000,
-		funcPC(tstart_stdcall), uintptr(unsafe.Pointer(mp)),
-		_STACK_SIZE_PARAM_IS_A_RESERVATION, 0)
-	if thandle == 0 {
-		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", getlasterror(), ")\n")
-		throw("runtime.newosproc")
-	}
-}
-
-// Used by the C library build mode. On Linux this function would allocate a
-// stack, but that's not necessary for Windows. No stack guards are present
-// and the GC has not been initialized, so write barriers will fail.
-//go:nowritebarrierc
-//go:nosplit
-func newosproc0(mp *m, stk unsafe.Pointer) {
-	newosproc(mp, stk)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
-}
-
-//go:nosplit
-func msigsave(mp *m) {
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-}
-
-//go:nosplit
-func sigblock() {
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
-	var thandle uintptr
-	stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS)
-	atomic.Storeuintptr(&getg().m.thread, thandle)
-}
-
-// Called from dropm to undo the effect of an minit.
-//go:nosplit
-func unminit() {
-	tp := &getg().m.thread
-	stdcall1(_CloseHandle, *tp)
-	*tp = 0
-}
-
-// Described in http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
-type _KSYSTEM_TIME struct {
-	LowPart   uint32
-	High1Time int32
-	High2Time int32
-}
-
-const (
-	_INTERRUPT_TIME = 0x7ffe0008
-	_SYSTEM_TIME    = 0x7ffe0014
-)
-
-//go:nosplit
-func systime(addr uintptr) int64 {
-	timeaddr := (*_KSYSTEM_TIME)(unsafe.Pointer(addr))
-
-	var t _KSYSTEM_TIME
-	for i := 1; i < 10000; i++ {
-		// these fields must be read in that order (see URL above)
-		t.High1Time = timeaddr.High1Time
-		t.LowPart = timeaddr.LowPart
-		t.High2Time = timeaddr.High2Time
-		if t.High1Time == t.High2Time {
-			return int64(t.High1Time)<<32 | int64(t.LowPart)
-		}
-		if (i % 100) == 0 {
-			osyield()
-		}
-	}
-	systemstack(func() {
-		throw("interrupt/system time is changing too fast")
-	})
-	return 0
-}
-
-//go:nosplit
-func unixnano() int64 {
-	return (systime(_SYSTEM_TIME) - 116444736000000000) * 100
-}
-
-//go:nosplit
-func nanotime() int64 {
-	return systime(_INTERRUPT_TIME) * 100
-}
-
-// Calling stdcall on os stack.
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
-//go:nosplit
-func stdcall(fn stdFunction) uintptr {
-	gp := getg()
-	mp := gp.m
-	mp.libcall.fn = uintptr(unsafe.Pointer(fn))
-
-	if mp.profilehz != 0 {
-		// leave pc/sp for cpu profiler
-		mp.libcallg.set(gp)
-		mp.libcallpc = getcallerpc(unsafe.Pointer(&fn))
-		// sp must be the last, because once async cpu profiler finds
-		// all three values to be non-zero, it will use them
-		mp.libcallsp = getcallersp(unsafe.Pointer(&fn))
-	}
-	asmcgocall(asmstdcallAddr, unsafe.Pointer(&mp.libcall))
-	mp.libcallsp = 0
-	return mp.libcall.r1
-}
-
-//go:nosplit
-func stdcall0(fn stdFunction) uintptr {
-	mp := getg().m
-	mp.libcall.n = 0
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&fn))) // it's unused but must be non-nil, otherwise crashes
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall1(fn stdFunction, a0 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 1
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall2(fn stdFunction, a0, a1 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 2
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall3(fn stdFunction, a0, a1, a2 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 3
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall4(fn stdFunction, a0, a1, a2, a3 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 4
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall5(fn stdFunction, a0, a1, a2, a3, a4 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 5
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall6(fn stdFunction, a0, a1, a2, a3, a4, a5 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 6
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-//go:nosplit
-func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
-	mp := getg().m
-	mp.libcall.n = 7
-	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
-	return stdcall(fn)
-}
-
-// in sys_windows_386.s and sys_windows_amd64.s
-func onosstack(fn unsafe.Pointer, arg uint32)
-func usleep2(usec uint32)
-func switchtothread()
-
-var usleep2Addr unsafe.Pointer
-var switchtothreadAddr unsafe.Pointer
-
-//go:nosplit
-func osyield() {
-	onosstack(switchtothreadAddr, 0)
-}
-
-//go:nosplit
-func usleep(us uint32) {
-	// Have 1us units; want 100ns units.
-	onosstack(usleep2Addr, 10*us)
-}
-
-func ctrlhandler1(_type uint32) uint32 {
-	var s uint32
-
-	switch _type {
-	case _CTRL_C_EVENT, _CTRL_BREAK_EVENT:
-		s = _SIGINT
-	default:
-		return 0
-	}
-
-	if sigsend(s) {
-		return 1
-	}
-	exit(2) // SIGINT, SIGTERM, etc
-	return 0
-}
-
-// in sys_windows_386.s and sys_windows_amd64.s
-func profileloop()
-
-var profiletimer uintptr
-
-func profilem(mp *m) {
-	var r *context
-	rbuf := make([]byte, unsafe.Sizeof(*r)+15)
-
-	tls := &mp.tls[0]
-	gp := *((**g)(unsafe.Pointer(tls)))
-
-	// align Context to 16 bytes
-	r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
-	r.contextflags = _CONTEXT_CONTROL
-	stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
-	sigprof(r.ip(), r.sp(), 0, gp, mp)
-}
-
-func profileloop1(param uintptr) uint32 {
-	stdcall2(_SetThreadPriority, currentThread, _THREAD_PRIORITY_HIGHEST)
-
-	for {
-		stdcall2(_WaitForSingleObject, profiletimer, _INFINITE)
-		first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
-		for mp := first; mp != nil; mp = mp.alllink {
-			thread := atomic.Loaduintptr(&mp.thread)
-			// Do not profile threads blocked on Notes,
-			// this includes idle worker threads,
-			// idle timer thread, idle heap scavenger, etc.
-			if thread == 0 || mp.profilehz == 0 || mp.blocked {
-				continue
-			}
-			stdcall1(_SuspendThread, thread)
-			if mp.profilehz != 0 && !mp.blocked {
-				profilem(mp)
-			}
-			stdcall1(_ResumeThread, thread)
-		}
-	}
-}
-
-var cpuprofilerlock mutex
-
-func resetcpuprofiler(hz int32) {
-	lock(&cpuprofilerlock)
-	if profiletimer == 0 {
-		timer := stdcall3(_CreateWaitableTimerA, 0, 0, 0)
-		atomic.Storeuintptr(&profiletimer, timer)
-		thread := stdcall6(_CreateThread, 0, 0, funcPC(profileloop), 0, 0, 0)
-		stdcall2(_SetThreadPriority, thread, _THREAD_PRIORITY_HIGHEST)
-		stdcall1(_CloseHandle, thread)
-	}
-	unlock(&cpuprofilerlock)
-
-	ms := int32(0)
-	due := ^int64(^uint64(1 << 63))
-	if hz > 0 {
-		ms = 1000 / hz
-		if ms == 0 {
-			ms = 1
-		}
-		due = int64(ms) * -10000
-	}
-	stdcall6(_SetWaitableTimer, profiletimer, uintptr(unsafe.Pointer(&due)), uintptr(ms), 0, 0, 0)
-	atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
-}
-
-func memlimit() uintptr {
-	return 0
-}
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
new file mode 100644
index 0000000000..7244706b92
--- /dev/null
+++ b/src/runtime/os_windows.go
@@ -0,0 +1,737 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
+
+// TODO(brainman): should not need those
+const (
+	_NSIG = 65
+)
+
+//go:cgo_import_dynamic runtime._AddVectoredExceptionHandler AddVectoredExceptionHandler%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CloseHandle CloseHandle%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CreateEventA CreateEventA%4 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort%4 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CreateThread CreateThread%6 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA%3 "kernel32.dll"
+//go:cgo_import_dynamic runtime._CryptAcquireContextW CryptAcquireContextW%5 "advapi32.dll"
+//go:cgo_import_dynamic runtime._CryptGenRandom CryptGenRandom%3 "advapi32.dll"
+//go:cgo_import_dynamic runtime._CryptReleaseContext CryptReleaseContext%2 "advapi32.dll"
+//go:cgo_import_dynamic runtime._DuplicateHandle DuplicateHandle%7 "kernel32.dll"
+//go:cgo_import_dynamic runtime._ExitProcess ExitProcess%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetConsoleMode GetConsoleMode%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetEnvironmentStringsW GetEnvironmentStringsW%0 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetProcAddress GetProcAddress%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetProcessAffinityMask GetProcessAffinityMask%3 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus%5 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetStdHandle GetStdHandle%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._NtWaitForSingleObject NtWaitForSingleObject%3 "ntdll.dll"
+//go:cgo_import_dynamic runtime._ResumeThread ResumeThread%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetConsoleCtrlHandler SetConsoleCtrlHandler%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetErrorMode SetErrorMode%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetEvent SetEvent%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetProcessPriorityBoost SetProcessPriorityBoost%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetThreadPriority SetThreadPriority%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetUnhandledExceptionFilter SetUnhandledExceptionFilter%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer%6 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SuspendThread SuspendThread%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SwitchToThread SwitchToThread%0 "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc%4 "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
+//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult%5 "ws2_32.dll"
+//go:cgo_import_dynamic runtime._WaitForSingleObject WaitForSingleObject%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._WriteConsoleW WriteConsoleW%5 "kernel32.dll"
+//go:cgo_import_dynamic runtime._WriteFile WriteFile%5 "kernel32.dll"
+
+type stdFunction unsafe.Pointer
+
+var (
+	// Following syscalls are available on every Windows PC.
+	// All these variables are set by the Windows executable
+	// loader before the Go program starts.
+	_AddVectoredExceptionHandler,
+	_CloseHandle,
+	_CreateEventA,
+	_CreateIoCompletionPort,
+	_CreateThread,
+	_CreateWaitableTimerA,
+	_CryptAcquireContextW,
+	_CryptGenRandom,
+	_CryptReleaseContext,
+	_DuplicateHandle,
+	_ExitProcess,
+	_FreeEnvironmentStringsW,
+	_GetConsoleMode,
+	_GetEnvironmentStringsW,
+	_GetProcAddress,
+	_GetProcessAffinityMask,
+	_GetQueuedCompletionStatus,
+	_GetStdHandle,
+	_GetSystemInfo,
+	_GetThreadContext,
+	_LoadLibraryW,
+	_LoadLibraryA,
+	_NtWaitForSingleObject,
+	_ResumeThread,
+	_SetConsoleCtrlHandler,
+	_SetErrorMode,
+	_SetEvent,
+	_SetProcessPriorityBoost,
+	_SetThreadPriority,
+	_SetUnhandledExceptionFilter,
+	_SetWaitableTimer,
+	_SuspendThread,
+	_SwitchToThread,
+	_VirtualAlloc,
+	_VirtualFree,
+	_WSAGetOverlappedResult,
+	_WaitForSingleObject,
+	_WriteConsoleW,
+	_WriteFile stdFunction
+
+	// Following syscalls are only available on some Windows PCs.
+	// We will load syscalls, if available, before using them.
+	_AddDllDirectory,
+	_AddVectoredContinueHandler,
+	_GetQueuedCompletionStatusEx,
+	_LoadLibraryExW,
+	_ stdFunction
+)
+
+// Function to be called by windows CreateThread
+// to start new os thread.
+func tstart_stdcall(newm *m) uint32
+
+func ctrlhandler(_type uint32) uint32
+
+type mOS struct {
+	waitsema uintptr // semaphore for parking on locks
+}
+
+//go:linkname os_sigpipe os.sigpipe
+func os_sigpipe() {
+	throw("too many writes on closed pipe")
+}
+
+// Stubs so tests can link correctly. These should never be called.
+func open(name *byte, mode, perm int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+func closefd(fd int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+func read(fd int32, p unsafe.Pointer, n int32) int32 {
+	throw("unimplemented")
+	return -1
+}
+
+type sigset struct{}
+
+// Call a Windows function with stdcall conventions,
+// and switch to os stack during the call.
+func asmstdcall(fn unsafe.Pointer)
+
+var asmstdcallAddr unsafe.Pointer
+
+func windowsFindfunc(lib uintptr, name []byte) stdFunction {
+	if name[len(name)-1] != 0 {
+		throw("usage")
+	}
+	f := stdcall2(_GetProcAddress, lib, uintptr(unsafe.Pointer(&name[0])))
+	return stdFunction(unsafe.Pointer(f))
+}
+
+func loadOptionalSyscalls() {
+	var kernel32dll = []byte("kernel32.dll\000")
+	k32 := stdcall1(_LoadLibraryA, uintptr(unsafe.Pointer(&kernel32dll[0])))
+	if k32 == 0 {
+		throw("kernel32.dll not found")
+	}
+	_AddDllDirectory = windowsFindfunc(k32, []byte("AddDllDirectory\000"))
+	_AddVectoredContinueHandler = windowsFindfunc(k32, []byte("AddVectoredContinueHandler\000"))
+	_GetQueuedCompletionStatusEx = windowsFindfunc(k32, []byte("GetQueuedCompletionStatusEx\000"))
+	_LoadLibraryExW = windowsFindfunc(k32, []byte("LoadLibraryExW\000"))
+}
+
+//go:nosplit
+func getLoadLibrary() uintptr {
+	return uintptr(unsafe.Pointer(_LoadLibraryW))
+}
+
+//go:nosplit
+func getLoadLibraryEx() uintptr {
+	return uintptr(unsafe.Pointer(_LoadLibraryExW))
+}
+
+//go:nosplit
+func getGetProcAddress() uintptr {
+	return uintptr(unsafe.Pointer(_GetProcAddress))
+}
+
+func getproccount() int32 {
+	var mask, sysmask uintptr
+	ret := stdcall3(_GetProcessAffinityMask, currentProcess, uintptr(unsafe.Pointer(&mask)), uintptr(unsafe.Pointer(&sysmask)))
+	if ret != 0 {
+		n := 0
+		maskbits := int(unsafe.Sizeof(mask) * 8)
+		for i := 0; i < maskbits; i++ {
+			if mask&(1<<uint(i)) != 0 {
+				n++
+			}
+		}
+		if n != 0 {
+			return int32(n)
+		}
+	}
+	// use GetSystemInfo if GetProcessAffinityMask fails
+	var info systeminfo
+	stdcall1(_GetSystemInfo, uintptr(unsafe.Pointer(&info)))
+	return int32(info.dwnumberofprocessors)
+}
+
+const (
+	currentProcess = ^uintptr(0) // -1 = current process
+	currentThread  = ^uintptr(1) // -2 = current thread
+)
+
+// in sys_windows_386.s and sys_windows_amd64.s:
+func externalthreadhandler()
+func getlasterror() uint32
+func setlasterror(err uint32)
+
+// When loading DLLs, we prefer to use LoadLibraryEx with
+// LOAD_LIBRARY_SEARCH_* flags, if available. LoadLibraryEx is not
+// available on old Windows, though, and the LOAD_LIBRARY_SEARCH_*
+// flags are not available on some versions of Windows without a
+// security patch.
+//
+// https://msdn.microsoft.com/en-us/library/ms684179(v=vs.85).aspx says:
+// "Windows 7, Windows Server 2008 R2, Windows Vista, and Windows
+// Server 2008: The LOAD_LIBRARY_SEARCH_* flags are available on
+// systems that have KB2533623 installed. To determine whether the
+// flags are available, use GetProcAddress to get the address of the
+// AddDllDirectory, RemoveDllDirectory, or SetDefaultDllDirectories
+// function. If GetProcAddress succeeds, the LOAD_LIBRARY_SEARCH_*
+// flags can be used with LoadLibraryEx."
+var useLoadLibraryEx bool
+
+func osinit() {
+	asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
+	usleep2Addr = unsafe.Pointer(funcPC(usleep2))
+	switchtothreadAddr = unsafe.Pointer(funcPC(switchtothread))
+
+	setBadSignalMsg()
+
+	loadOptionalSyscalls()
+
+	useLoadLibraryEx = (_LoadLibraryExW != nil && _AddDllDirectory != nil)
+
+	disableWER()
+
+	externalthreadhandlerp = funcPC(externalthreadhandler)
+
+	initExceptionHandler()
+
+	stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
+
+	ncpu = getproccount()
+
+	// Windows dynamic priority boosting assumes that a process has different types
+	// of dedicated threads -- GUI, IO, computational, etc. Go processes use
+	// equivalent threads that all do a mix of GUI, IO, computations, etc.
+	// In such context dynamic priority boosting does nothing but harm, so we turn it off.
+	stdcall2(_SetProcessPriorityBoost, currentProcess, 1)
+}
+
+//go:nosplit
+func getRandomData(r []byte) {
+	const (
+		prov_rsa_full       = 1
+		crypt_verifycontext = 0xF0000000
+	)
+	var handle uintptr
+	n := 0
+	if stdcall5(_CryptAcquireContextW, uintptr(unsafe.Pointer(&handle)), 0, 0, prov_rsa_full, crypt_verifycontext) != 0 {
+		if stdcall3(_CryptGenRandom, handle, uintptr(len(r)), uintptr(unsafe.Pointer(&r[0]))) != 0 {
+			n = len(r)
+		}
+		stdcall2(_CryptReleaseContext, handle, 0)
+	}
+	extendRandom(r, n)
+}
+
+func goenvs() {
+	// strings is a pointer to environment variable pairs in the form:
+	//     "envA=valA\x00envB=valB\x00\x00" (in UTF-16)
+	// Two consecutive zero bytes end the list.
+	strings := unsafe.Pointer(stdcall0(_GetEnvironmentStringsW))
+	p := (*[1 << 24]uint16)(strings)[:]
+
+	n := 0
+	for from, i := 0, 0; true; i++ {
+		if p[i] == 0 {
+			// empty string marks the end
+			if i == from {
+				break
+			}
+			from = i + 1
+			n++
+		}
+	}
+	envs = make([]string, n)
+
+	for i := range envs {
+		envs[i] = gostringw(&p[0])
+		for p[0] != 0 {
+			p = p[1:]
+		}
+		p = p[1:] // skip nil byte
+	}
+
+	stdcall1(_FreeEnvironmentStringsW, uintptr(strings))
+}
+
+//go:nosplit
+func exit(code int32) {
+	stdcall1(_ExitProcess, uintptr(code))
+}
+
+//go:nosplit
+func write(fd uintptr, buf unsafe.Pointer, n int32) int32 {
+	const (
+		_STD_OUTPUT_HANDLE = ^uintptr(10) // -11
+		_STD_ERROR_HANDLE  = ^uintptr(11) // -12
+	)
+	var handle uintptr
+	switch fd {
+	case 1:
+		handle = stdcall1(_GetStdHandle, _STD_OUTPUT_HANDLE)
+	case 2:
+		handle = stdcall1(_GetStdHandle, _STD_ERROR_HANDLE)
+	default:
+		// assume fd is real windows handle.
+		handle = fd
+	}
+	isASCII := true
+	b := (*[1 << 30]byte)(buf)[:n]
+	for _, x := range b {
+		if x >= 0x80 {
+			isASCII = false
+			break
+		}
+	}
+
+	if !isASCII {
+		var m uint32
+		isConsole := stdcall2(_GetConsoleMode, handle, uintptr(unsafe.Pointer(&m))) != 0
+		// If this is a console output, various non-unicode code pages can be in use.
+		// Use the dedicated WriteConsole call to ensure unicode is printed correctly.
+		if isConsole {
+			return int32(writeConsole(handle, buf, n))
+		}
+	}
+	var written uint32
+	stdcall5(_WriteFile, handle, uintptr(buf), uintptr(n), uintptr(unsafe.Pointer(&written)), 0)
+	return int32(written)
+}
+
+var (
+	utf16ConsoleBack     [1000]uint16
+	utf16ConsoleBackLock mutex
+)
+
+// writeConsole writes bufLen bytes from buf to the console File.
+// It returns the number of bytes written.
+func writeConsole(handle uintptr, buf unsafe.Pointer, bufLen int32) int {
+	const surr2 = (surrogateMin + surrogateMax + 1) / 2
+
+	// Do not use defer for unlock. May cause issues when printing a panic.
+	lock(&utf16ConsoleBackLock)
+
+	b := (*[1 << 30]byte)(buf)[:bufLen]
+	s := *(*string)(unsafe.Pointer(&b))
+
+	utf16tmp := utf16ConsoleBack[:]
+
+	total := len(s)
+	w := 0
+	for len(s) > 0 {
+		if w >= len(utf16tmp)-2 {
+			writeConsoleUTF16(handle, utf16tmp[:w])
+			w = 0
+		}
+		r, n := charntorune(s)
+		s = s[n:]
+		if r < 0x10000 {
+			utf16tmp[w] = uint16(r)
+			w++
+		} else {
+			r -= 0x10000
+			utf16tmp[w] = surrogateMin + uint16(r>>10)&0x3ff
+			utf16tmp[w+1] = surr2 + uint16(r)&0x3ff
+			w += 2
+		}
+	}
+	writeConsoleUTF16(handle, utf16tmp[:w])
+	unlock(&utf16ConsoleBackLock)
+	return total
+}
+
+// writeConsoleUTF16 is the dedicated windows calls that correctly prints
+// to the console regardless of the current code page. Input is utf-16 code points.
+// The handle must be a console handle.
+func writeConsoleUTF16(handle uintptr, b []uint16) {
+	l := uint32(len(b))
+	if l == 0 {
+		return
+	}
+	var written uint32
+	stdcall5(_WriteConsoleW,
+		handle,
+		uintptr(unsafe.Pointer(&b[0])),
+		uintptr(l),
+		uintptr(unsafe.Pointer(&written)),
+		0,
+	)
+	return
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	// store ms in ns to save stack space
+	if ns < 0 {
+		ns = _INFINITE
+	} else {
+		ns = int64(timediv(ns, 1000000, nil))
+		if ns == 0 {
+			ns = 1
+		}
+	}
+	if stdcall2(_WaitForSingleObject, getg().m.waitsema, uintptr(ns)) != 0 {
+		return -1 // timeout
+	}
+	return 0
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	stdcall1(_SetEvent, mp.waitsema)
+}
+
+//go:nosplit
+func semacreate(mp *m) {
+	if mp.waitsema != 0 {
+		return
+	}
+	mp.waitsema = stdcall4(_CreateEventA, 0, 0, 0, 0)
+}
+
+// May run with m.p==nil, so write barriers are not allowed. This
+// function is called by newosproc0, so it is also required to
+// operate without stack guards.
+//go:nowritebarrierc
+//go:nosplit
+func newosproc(mp *m, stk unsafe.Pointer) {
+	const _STACK_SIZE_PARAM_IS_A_RESERVATION = 0x00010000
+	thandle := stdcall6(_CreateThread, 0, 0x20000,
+		funcPC(tstart_stdcall), uintptr(unsafe.Pointer(mp)),
+		_STACK_SIZE_PARAM_IS_A_RESERVATION, 0)
+	if thandle == 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", getlasterror(), ")\n")
+		throw("runtime.newosproc")
+	}
+}
+
+// Used by the C library build mode. On Linux this function would allocate a
+// stack, but that's not necessary for Windows. No stack guards are present
+// and the GC has not been initialized, so write barriers will fail.
+//go:nowritebarrierc
+//go:nosplit
+func newosproc0(mp *m, stk unsafe.Pointer) {
+	newosproc(mp, stk)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+}
+
+//go:nosplit
+func sigblock() {
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	var thandle uintptr
+	stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS)
+	atomic.Storeuintptr(&getg().m.thread, thandle)
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	tp := &getg().m.thread
+	stdcall1(_CloseHandle, *tp)
+	*tp = 0
+}
+
+// Described in http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
+type _KSYSTEM_TIME struct {
+	LowPart   uint32
+	High1Time int32
+	High2Time int32
+}
+
+const (
+	_INTERRUPT_TIME = 0x7ffe0008
+	_SYSTEM_TIME    = 0x7ffe0014
+)
+
+//go:nosplit
+func systime(addr uintptr) int64 {
+	timeaddr := (*_KSYSTEM_TIME)(unsafe.Pointer(addr))
+
+	var t _KSYSTEM_TIME
+	for i := 1; i < 10000; i++ {
+		// these fields must be read in that order (see URL above)
+		t.High1Time = timeaddr.High1Time
+		t.LowPart = timeaddr.LowPart
+		t.High2Time = timeaddr.High2Time
+		if t.High1Time == t.High2Time {
+			return int64(t.High1Time)<<32 | int64(t.LowPart)
+		}
+		if (i % 100) == 0 {
+			osyield()
+		}
+	}
+	systemstack(func() {
+		throw("interrupt/system time is changing too fast")
+	})
+	return 0
+}
+
+//go:nosplit
+func unixnano() int64 {
+	return (systime(_SYSTEM_TIME) - 116444736000000000) * 100
+}
+
+//go:nosplit
+func nanotime() int64 {
+	return systime(_INTERRUPT_TIME) * 100
+}
+
+// Calling stdcall on os stack.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrier
+//go:nosplit
+func stdcall(fn stdFunction) uintptr {
+	gp := getg()
+	mp := gp.m
+	mp.libcall.fn = uintptr(unsafe.Pointer(fn))
+
+	if mp.profilehz != 0 {
+		// leave pc/sp for cpu profiler
+		mp.libcallg.set(gp)
+		mp.libcallpc = getcallerpc(unsafe.Pointer(&fn))
+		// sp must be the last, because once async cpu profiler finds
+		// all three values to be non-zero, it will use them
+		mp.libcallsp = getcallersp(unsafe.Pointer(&fn))
+	}
+	asmcgocall(asmstdcallAddr, unsafe.Pointer(&mp.libcall))
+	mp.libcallsp = 0
+	return mp.libcall.r1
+}
+
+//go:nosplit
+func stdcall0(fn stdFunction) uintptr {
+	mp := getg().m
+	mp.libcall.n = 0
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&fn))) // it's unused but must be non-nil, otherwise crashes
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall1(fn stdFunction, a0 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 1
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall2(fn stdFunction, a0, a1 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 2
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall3(fn stdFunction, a0, a1, a2 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 3
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall4(fn stdFunction, a0, a1, a2, a3 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 4
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall5(fn stdFunction, a0, a1, a2, a3, a4 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 5
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall6(fn stdFunction, a0, a1, a2, a3, a4, a5 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 6
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+//go:nosplit
+func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
+	mp := getg().m
+	mp.libcall.n = 7
+	mp.libcall.args = uintptr(noescape(unsafe.Pointer(&a0)))
+	return stdcall(fn)
+}
+
+// in sys_windows_386.s and sys_windows_amd64.s
+func onosstack(fn unsafe.Pointer, arg uint32)
+func usleep2(usec uint32)
+func switchtothread()
+
+var usleep2Addr unsafe.Pointer
+var switchtothreadAddr unsafe.Pointer
+
+//go:nosplit
+func osyield() {
+	onosstack(switchtothreadAddr, 0)
+}
+
+//go:nosplit
+func usleep(us uint32) {
+	// Have 1us units; want 100ns units.
+	onosstack(usleep2Addr, 10*us)
+}
+
+func ctrlhandler1(_type uint32) uint32 {
+	var s uint32
+
+	switch _type {
+	case _CTRL_C_EVENT, _CTRL_BREAK_EVENT:
+		s = _SIGINT
+	default:
+		return 0
+	}
+
+	if sigsend(s) {
+		return 1
+	}
+	exit(2) // SIGINT, SIGTERM, etc
+	return 0
+}
+
+// in sys_windows_386.s and sys_windows_amd64.s
+func profileloop()
+
+var profiletimer uintptr
+
+func profilem(mp *m) {
+	var r *context
+	rbuf := make([]byte, unsafe.Sizeof(*r)+15)
+
+	tls := &mp.tls[0]
+	gp := *((**g)(unsafe.Pointer(tls)))
+
+	// align Context to 16 bytes
+	r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
+	r.contextflags = _CONTEXT_CONTROL
+	stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
+	sigprof(r.ip(), r.sp(), 0, gp, mp)
+}
+
+func profileloop1(param uintptr) uint32 {
+	stdcall2(_SetThreadPriority, currentThread, _THREAD_PRIORITY_HIGHEST)
+
+	for {
+		stdcall2(_WaitForSingleObject, profiletimer, _INFINITE)
+		first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
+		for mp := first; mp != nil; mp = mp.alllink {
+			thread := atomic.Loaduintptr(&mp.thread)
+			// Do not profile threads blocked on Notes,
+			// this includes idle worker threads,
+			// idle timer thread, idle heap scavenger, etc.
+			if thread == 0 || mp.profilehz == 0 || mp.blocked {
+				continue
+			}
+			stdcall1(_SuspendThread, thread)
+			if mp.profilehz != 0 && !mp.blocked {
+				profilem(mp)
+			}
+			stdcall1(_ResumeThread, thread)
+		}
+	}
+}
+
+var cpuprofilerlock mutex
+
+func resetcpuprofiler(hz int32) {
+	lock(&cpuprofilerlock)
+	if profiletimer == 0 {
+		timer := stdcall3(_CreateWaitableTimerA, 0, 0, 0)
+		atomic.Storeuintptr(&profiletimer, timer)
+		thread := stdcall6(_CreateThread, 0, 0, funcPC(profileloop), 0, 0, 0)
+		stdcall2(_SetThreadPriority, thread, _THREAD_PRIORITY_HIGHEST)
+		stdcall1(_CloseHandle, thread)
+	}
+	unlock(&cpuprofilerlock)
+
+	ms := int32(0)
+	due := ^int64(^uint64(1 << 63))
+	if hz > 0 {
+		ms = 1000 / hz
+		if ms == 0 {
+			ms = 1
+		}
+		due = int64(ms) * -10000
+	}
+	stdcall6(_SetWaitableTimer, profiletimer, uintptr(unsafe.Pointer(&due)), uintptr(ms), 0, 0, 0)
+	atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
+}
+
+func memlimit() uintptr {
+	return 0
+}
-- 
cgit v1.3


From 8455f3a3d5f2879e8574882978e7646db1ebabb5 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 04:38:00 +0000
Subject: os: consolidate os{1,2}_*.go files

Change-Id: I463ca59f486b2842f67f151a55f530ee10663830
Reviewed-on: https://go-review.googlesource.com/21568
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Minux Ma <minux@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/os1_dragonfly.go     | 270 -------------------------------------
 src/runtime/os1_linux_generic.go |  28 ----
 src/runtime/os1_linux_mips64x.go |  26 ----
 src/runtime/os1_netbsd.go        | 275 -------------------------------------
 src/runtime/os1_netbsd_386.go    |  16 ---
 src/runtime/os1_netbsd_amd64.go  |  16 ---
 src/runtime/os2_dragonfly.go     |  15 ---
 src/runtime/os2_linux_generic.go |  30 -----
 src/runtime/os2_linux_mips64x.go |  25 ----
 src/runtime/os2_netbsd.go        |  18 ---
 src/runtime/os_dragonfly.go      | 273 +++++++++++++++++++++++++++++++++++++
 src/runtime/os_linux_generic.go  |  48 +++++++
 src/runtime/os_linux_mips64x.go  |  37 ++++-
 src/runtime/os_netbsd.go         | 283 ++++++++++++++++++++++++++++++++++++++-
 src/runtime/os_netbsd_386.go     |  16 +++
 src/runtime/os_netbsd_amd64.go   |  16 +++
 16 files changed, 671 insertions(+), 721 deletions(-)
 delete mode 100644 src/runtime/os1_dragonfly.go
 delete mode 100644 src/runtime/os1_linux_generic.go
 delete mode 100644 src/runtime/os1_linux_mips64x.go
 delete mode 100644 src/runtime/os1_netbsd.go
 delete mode 100644 src/runtime/os1_netbsd_386.go
 delete mode 100644 src/runtime/os1_netbsd_amd64.go
 delete mode 100644 src/runtime/os2_dragonfly.go
 delete mode 100644 src/runtime/os2_linux_generic.go
 delete mode 100644 src/runtime/os2_linux_mips64x.go
 delete mode 100644 src/runtime/os2_netbsd.go
 create mode 100644 src/runtime/os_linux_generic.go
 create mode 100644 src/runtime/os_netbsd_386.go
 create mode 100644 src/runtime/os_netbsd_amd64.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_dragonfly.go b/src/runtime/os1_dragonfly.go
deleted file mode 100644
index d7044ae4b0..0000000000
--- a/src/runtime/os1_dragonfly.go
+++ /dev/null
@@ -1,270 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// From DragonFly's <sys/sysctl.h>
-const (
-	_CTL_HW  = 6
-	_HW_NCPU = 3
-)
-
-var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
-
-func getncpu() int32 {
-	mib := [2]uint32{_CTL_HW, _HW_NCPU}
-	out := uint32(0)
-	nout := unsafe.Sizeof(out)
-	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
-	if ret >= 0 {
-		return int32(out)
-	}
-	return 1
-}
-
-//go:nosplit
-func futexsleep(addr *uint32, val uint32, ns int64) {
-	systemstack(func() {
-		futexsleep1(addr, val, ns)
-	})
-}
-
-func futexsleep1(addr *uint32, val uint32, ns int64) {
-	var timeout int32
-	if ns >= 0 {
-		// The timeout is specified in microseconds - ensure that we
-		// do not end up dividing to zero, which would put us to sleep
-		// indefinitely...
-		timeout = timediv(ns, 1000, nil)
-		if timeout == 0 {
-			timeout = 1
-		}
-	}
-
-	// sys_umtx_sleep will return EWOULDBLOCK (EAGAIN) when the timeout
-	// expires or EBUSY if the mutex value does not match.
-	ret := sys_umtx_sleep(addr, int32(val), timeout)
-	if ret >= 0 || ret == -_EINTR || ret == -_EAGAIN || ret == -_EBUSY {
-		return
-	}
-
-	print("umtx_sleep addr=", addr, " val=", val, " ret=", ret, "\n")
-	*(*int32)(unsafe.Pointer(uintptr(0x1005))) = 0x1005
-}
-
-//go:nosplit
-func futexwakeup(addr *uint32, cnt uint32) {
-	ret := sys_umtx_wakeup(addr, int32(cnt))
-	if ret >= 0 {
-		return
-	}
-
-	systemstack(func() {
-		print("umtx_wake_addr=", addr, " ret=", ret, "\n")
-		*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
-	})
-}
-
-func lwp_start(uintptr)
-
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newosproc(mp *m, stk unsafe.Pointer) {
-	if false {
-		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " lwp_start=", funcPC(lwp_start), " id=", mp.id, " ostk=", &mp, "\n")
-	}
-
-	var oset sigset
-	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
-
-	params := lwpparams{
-		start_func: funcPC(lwp_start),
-		arg:        unsafe.Pointer(mp),
-		stack:      uintptr(stk),
-		tid1:       unsafe.Pointer(&mp.procid),
-		tid2:       nil,
-	}
-
-	lwp_create(&params)
-	sigprocmask(_SIG_SETMASK, &oset, nil)
-}
-
-func osinit() {
-	ncpu = getncpu()
-}
-
-var urandom_dev = []byte("/dev/urandom\x00")
-
-//go:nosplit
-func getRandomData(r []byte) {
-	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
-	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
-	closefd(fd)
-	extendRandom(r, int(n))
-}
-
-func goenvs() {
-	goenvs_unix()
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
-	mp.gsignal = malg(32 * 1024)
-	mp.gsignal.m = mp
-}
-
-//go:nosplit
-func msigsave(mp *m) {
-	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-	sigprocmask(_SIG_SETMASK, &sigmask, nil)
-}
-
-//go:nosplit
-func sigblock() {
-	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
-	_g_ := getg()
-
-	// m.procid is a uint64, but lwp_start writes an int32. Fix it up.
-	_g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
-
-	// Initialize signal handling.
-
-	// On DragonFly a thread created by pthread_create inherits
-	// the signal stack of the creating thread. We always create
-	// a new signal stack here, to avoid having two Go threads
-	// using the same signal stack. This breaks the case of a
-	// thread created in C that calls sigaltstack and then calls a
-	// Go function, because we will lose track of the C code's
-	// sigaltstack, but it's the best we can do.
-	signalstack(&_g_.m.gsignal.stack)
-	_g_.m.newSigstack = true
-
-	// restore signal mask from m.sigmask and unblock essential signals
-	nmask := _g_.m.sigmask
-	for i := range sigtable {
-		if sigtable[i].flags&_SigUnblock != 0 {
-			nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
-		}
-	}
-	sigprocmask(_SIG_SETMASK, &nmask, nil)
-}
-
-// Called from dropm to undo the effect of an minit.
-//go:nosplit
-func unminit() {
-	if getg().m.newSigstack {
-		signalstack(nil)
-	}
-}
-
-func memlimit() uintptr {
-	/*
-		                TODO: Convert to Go when something actually uses the result.
-
-				Rlimit rl;
-				extern byte runtime·text[], runtime·end[];
-				uintptr used;
-
-				if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-					return 0;
-				if(rl.rlim_cur >= 0x7fffffff)
-					return 0;
-
-				// Estimate our VM footprint excluding the heap.
-				// Not an exact science: use size of binary plus
-				// some room for thread stacks.
-				used = runtime·end - runtime·text + (64<<20);
-				if(used >= rl.rlim_cur)
-					return 0;
-
-				// If there's not at least 16 MB left, we're probably
-				// not going to be able to do much. Treat as no limit.
-				rl.rlim_cur -= used;
-				if(rl.rlim_cur < (16<<20))
-					return 0;
-
-				return rl.rlim_cur - used;
-	*/
-	return 0
-}
-
-func sigtramp()
-
-type sigactiont struct {
-	sa_sigaction uintptr
-	sa_flags     int32
-	sa_mask      sigset
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsig(i int32, fn uintptr, restart bool) {
-	var sa sigactiont
-	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
-	if restart {
-		sa.sa_flags |= _SA_RESTART
-	}
-	sa.sa_mask = sigset_all
-	if fn == funcPC(sighandler) {
-		fn = funcPC(sigtramp)
-	}
-	sa.sa_sigaction = fn
-	sigaction(i, &sa, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsigstack(i int32) {
-	throw("setsigstack")
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func getsig(i int32) uintptr {
-	var sa sigactiont
-	sigaction(i, nil, &sa)
-	if sa.sa_sigaction == funcPC(sigtramp) {
-		return funcPC(sighandler)
-	}
-	return sa.sa_sigaction
-}
-
-//go:nosplit
-func signalstack(s *stack) {
-	var st sigaltstackt
-	if s == nil {
-		st.ss_flags = _SS_DISABLE
-	} else {
-		st.ss_sp = s.lo
-		st.ss_size = s.hi - s.lo
-		st.ss_flags = 0
-	}
-	sigaltstack(&st, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func updatesigmask(m sigmask) {
-	var mask sigset
-	copy(mask.__bits[:], m[:])
-	sigprocmask(_SIG_SETMASK, &mask, nil)
-}
-
-func unblocksig(sig int32) {
-	var mask sigset
-	mask.__bits[(sig-1)/32] |= 1 << ((uint32(sig) - 1) & 31)
-	sigprocmask(_SIG_UNBLOCK, &mask, nil)
-}
diff --git a/src/runtime/os1_linux_generic.go b/src/runtime/os1_linux_generic.go
deleted file mode 100644
index 50d6d6afb4..0000000000
--- a/src/runtime/os1_linux_generic.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !mips64
-// +build !mips64le
-// +build !s390x
-// +build linux
-
-package runtime
-
-var sigset_all = sigset{^uint32(0), ^uint32(0)}
-
-func sigaddset(mask *sigset, i int) {
-	(*mask)[(i-1)/32] |= 1 << ((uint32(i) - 1) & 31)
-}
-
-func sigdelset(mask *sigset, i int) {
-	(*mask)[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
-}
-
-func sigfillset(mask *uint64) {
-	*mask = ^uint64(0)
-}
-
-func sigcopyset(mask *sigset, m sigmask) {
-	copy((*mask)[:], m[:])
-}
diff --git a/src/runtime/os1_linux_mips64x.go b/src/runtime/os1_linux_mips64x.go
deleted file mode 100644
index 701e979102..0000000000
--- a/src/runtime/os1_linux_mips64x.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build mips64 mips64le
-// +build linux
-
-package runtime
-
-var sigset_all = sigset{^uint64(0), ^uint64(0)}
-
-func sigaddset(mask *sigset, i int) {
-	(*mask)[(i-1)/64] |= 1 << ((uint32(i) - 1) & 63)
-}
-
-func sigdelset(mask *sigset, i int) {
-	(*mask)[(i-1)/64] &^= 1 << ((uint32(i) - 1) & 63)
-}
-
-func sigfillset(mask *[2]uint64) {
-	(*mask)[0], (*mask)[1] = ^uint64(0), ^uint64(0)
-}
-
-func sigcopyset(mask *sigset, m sigmask) {
-	(*mask)[0] = uint64(m[0]) | uint64(m[1])<<32
-}
diff --git a/src/runtime/os1_netbsd.go b/src/runtime/os1_netbsd.go
deleted file mode 100644
index 3c3b64186d..0000000000
--- a/src/runtime/os1_netbsd.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"runtime/internal/atomic"
-	"unsafe"
-)
-
-const (
-	_ESRCH     = 3
-	_ETIMEDOUT = 60
-
-	// From NetBSD's <sys/time.h>
-	_CLOCK_REALTIME  = 0
-	_CLOCK_VIRTUAL   = 1
-	_CLOCK_PROF      = 2
-	_CLOCK_MONOTONIC = 3
-)
-
-var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
-
-// From NetBSD's <sys/sysctl.h>
-const (
-	_CTL_HW  = 6
-	_HW_NCPU = 3
-)
-
-func getncpu() int32 {
-	mib := [2]uint32{_CTL_HW, _HW_NCPU}
-	out := uint32(0)
-	nout := unsafe.Sizeof(out)
-	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
-	if ret >= 0 {
-		return int32(out)
-	}
-	return 1
-}
-
-//go:nosplit
-func semacreate(mp *m) {
-}
-
-//go:nosplit
-func semasleep(ns int64) int32 {
-	_g_ := getg()
-
-	// Compute sleep deadline.
-	var tsp *timespec
-	if ns >= 0 {
-		var ts timespec
-		var nsec int32
-		ns += nanotime()
-		ts.set_sec(timediv(ns, 1000000000, &nsec))
-		ts.set_nsec(nsec)
-		tsp = &ts
-	}
-
-	for {
-		v := atomic.Load(&_g_.m.waitsemacount)
-		if v > 0 {
-			if atomic.Cas(&_g_.m.waitsemacount, v, v-1) {
-				return 0 // semaphore acquired
-			}
-			continue
-		}
-
-		// Sleep until unparked by semawakeup or timeout.
-		ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
-		if ret == _ETIMEDOUT {
-			return -1
-		}
-	}
-}
-
-//go:nosplit
-func semawakeup(mp *m) {
-	atomic.Xadd(&mp.waitsemacount, 1)
-	// From NetBSD's _lwp_unpark(2) manual:
-	// "If the target LWP is not currently waiting, it will return
-	// immediately upon the next call to _lwp_park()."
-	ret := lwp_unpark(int32(mp.procid), unsafe.Pointer(&mp.waitsemacount))
-	if ret != 0 && ret != _ESRCH {
-		// semawakeup can be called on signal stack.
-		systemstack(func() {
-			print("thrwakeup addr=", &mp.waitsemacount, " sem=", mp.waitsemacount, " ret=", ret, "\n")
-		})
-	}
-}
-
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newosproc(mp *m, stk unsafe.Pointer) {
-	if false {
-		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, " ostk=", &mp, "\n")
-	}
-
-	var uc ucontextt
-	getcontext(unsafe.Pointer(&uc))
-
-	uc.uc_flags = _UC_SIGMASK | _UC_CPU
-	uc.uc_link = nil
-	uc.uc_sigmask = sigset_all
-
-	lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp.g0, funcPC(netbsdMstart))
-
-	ret := lwp_create(unsafe.Pointer(&uc), 0, unsafe.Pointer(&mp.procid))
-	if ret < 0 {
-		print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
-		throw("runtime.newosproc")
-	}
-}
-
-// netbsdMStart is the function call that starts executing a newly
-// created thread. On NetBSD, a new thread inherits the signal stack
-// of the creating thread. That confuses minit, so we remove that
-// signal stack here before calling the regular mstart. It's a bit
-// baroque to remove a signal stack here only to add one in minit, but
-// it's a simple change that keeps NetBSD working like other OS's.
-// At this point all signals are blocked, so there is no race.
-//go:nosplit
-func netbsdMstart() {
-	signalstack(nil)
-	mstart()
-}
-
-func osinit() {
-	ncpu = getncpu()
-}
-
-var urandom_dev = []byte("/dev/urandom\x00")
-
-//go:nosplit
-func getRandomData(r []byte) {
-	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
-	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
-	closefd(fd)
-	extendRandom(r, int(n))
-}
-
-func goenvs() {
-	goenvs_unix()
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
-	mp.gsignal = malg(32 * 1024)
-	mp.gsignal.m = mp
-}
-
-//go:nosplit
-func msigsave(mp *m) {
-	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-	sigprocmask(_SIG_SETMASK, &sigmask, nil)
-}
-
-//go:nosplit
-func sigblock() {
-	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
-	_g_ := getg()
-	_g_.m.procid = uint64(lwp_self())
-
-	// Initialize signal handling.
-
-	// On NetBSD a thread created by pthread_create inherits the
-	// signal stack of the creating thread. We always create a
-	// new signal stack here, to avoid having two Go threads using
-	// the same signal stack. This breaks the case of a thread
-	// created in C that calls sigaltstack and then calls a Go
-	// function, because we will lose track of the C code's
-	// sigaltstack, but it's the best we can do.
-	signalstack(&_g_.m.gsignal.stack)
-	_g_.m.newSigstack = true
-
-	// restore signal mask from m.sigmask and unblock essential signals
-	nmask := _g_.m.sigmask
-	for i := range sigtable {
-		if sigtable[i].flags&_SigUnblock != 0 {
-			nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
-		}
-	}
-	sigprocmask(_SIG_SETMASK, &nmask, nil)
-}
-
-// Called from dropm to undo the effect of an minit.
-//go:nosplit
-func unminit() {
-	if getg().m.newSigstack {
-		signalstack(nil)
-	}
-}
-
-func memlimit() uintptr {
-	return 0
-}
-
-func sigtramp()
-
-type sigactiont struct {
-	sa_sigaction uintptr
-	sa_mask      sigset
-	sa_flags     int32
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsig(i int32, fn uintptr, restart bool) {
-	var sa sigactiont
-	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
-	if restart {
-		sa.sa_flags |= _SA_RESTART
-	}
-	sa.sa_mask = sigset_all
-	if fn == funcPC(sighandler) {
-		fn = funcPC(sigtramp)
-	}
-	sa.sa_sigaction = fn
-	sigaction(i, &sa, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsigstack(i int32) {
-	throw("setsigstack")
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func getsig(i int32) uintptr {
-	var sa sigactiont
-	sigaction(i, nil, &sa)
-	if sa.sa_sigaction == funcPC(sigtramp) {
-		return funcPC(sighandler)
-	}
-	return sa.sa_sigaction
-}
-
-//go:nosplit
-func signalstack(s *stack) {
-	var st sigaltstackt
-	if s == nil {
-		st.ss_flags = _SS_DISABLE
-	} else {
-		st.ss_sp = s.lo
-		st.ss_size = s.hi - s.lo
-		st.ss_flags = 0
-	}
-	sigaltstack(&st, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func updatesigmask(m sigmask) {
-	var mask sigset
-	copy(mask.__bits[:], m[:])
-	sigprocmask(_SIG_SETMASK, &mask, nil)
-}
-
-func unblocksig(sig int32) {
-	var mask sigset
-	mask.__bits[(sig-1)/32] |= 1 << ((uint32(sig) - 1) & 31)
-	sigprocmask(_SIG_UNBLOCK, &mask, nil)
-}
diff --git a/src/runtime/os1_netbsd_386.go b/src/runtime/os1_netbsd_386.go
deleted file mode 100644
index 037f7e36dc..0000000000
--- a/src/runtime/os1_netbsd_386.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
-	// Machine dependent mcontext initialisation for LWP.
-	mc.__gregs[_REG_EIP] = uint32(funcPC(lwp_tramp))
-	mc.__gregs[_REG_UESP] = uint32(uintptr(stk))
-	mc.__gregs[_REG_EBX] = uint32(uintptr(unsafe.Pointer(mp)))
-	mc.__gregs[_REG_EDX] = uint32(uintptr(unsafe.Pointer(gp)))
-	mc.__gregs[_REG_ESI] = uint32(fn)
-}
diff --git a/src/runtime/os1_netbsd_amd64.go b/src/runtime/os1_netbsd_amd64.go
deleted file mode 100644
index 5118b0c4ff..0000000000
--- a/src/runtime/os1_netbsd_amd64.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
-	// Machine dependent mcontext initialisation for LWP.
-	mc.__gregs[_REG_RIP] = uint64(funcPC(lwp_tramp))
-	mc.__gregs[_REG_RSP] = uint64(uintptr(stk))
-	mc.__gregs[_REG_R8] = uint64(uintptr(unsafe.Pointer(mp)))
-	mc.__gregs[_REG_R9] = uint64(uintptr(unsafe.Pointer(gp)))
-	mc.__gregs[_REG_R12] = uint64(fn)
-}
diff --git a/src/runtime/os2_dragonfly.go b/src/runtime/os2_dragonfly.go
deleted file mode 100644
index 6ea2da0393..0000000000
--- a/src/runtime/os2_dragonfly.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	_NSIG        = 33
-	_SI_USER     = 0
-	_SS_DISABLE  = 4
-	_RLIMIT_AS   = 10
-	_SIG_BLOCK   = 1
-	_SIG_UNBLOCK = 2
-	_SIG_SETMASK = 3
-)
diff --git a/src/runtime/os2_linux_generic.go b/src/runtime/os2_linux_generic.go
deleted file mode 100644
index f1a2dd5130..0000000000
--- a/src/runtime/os2_linux_generic.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !mips64
-// +build !mips64le
-// +build !s390x
-// +build linux
-
-package runtime
-
-const (
-	_SS_DISABLE  = 2
-	_NSIG        = 65
-	_SI_USER     = 0
-	_SIG_BLOCK   = 0
-	_SIG_UNBLOCK = 1
-	_SIG_SETMASK = 2
-	_RLIMIT_AS   = 9
-)
-
-// It's hard to tease out exactly how big a Sigset is, but
-// rt_sigprocmask crashes if we get it wrong, so if binaries
-// are running, this is right.
-type sigset [2]uint32
-
-type rlimit struct {
-	rlim_cur uintptr
-	rlim_max uintptr
-}
diff --git a/src/runtime/os2_linux_mips64x.go b/src/runtime/os2_linux_mips64x.go
deleted file mode 100644
index 9a6a92a87d..0000000000
--- a/src/runtime/os2_linux_mips64x.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build linux
-// +build mips64 mips64le
-
-package runtime
-
-const (
-	_SS_DISABLE  = 2
-	_NSIG        = 65
-	_SI_USER     = 0
-	_SIG_BLOCK   = 1
-	_SIG_UNBLOCK = 2
-	_SIG_SETMASK = 3
-	_RLIMIT_AS   = 6
-)
-
-type sigset [2]uint64
-
-type rlimit struct {
-	rlim_cur uintptr
-	rlim_max uintptr
-}
diff --git a/src/runtime/os2_netbsd.go b/src/runtime/os2_netbsd.go
deleted file mode 100644
index 405dd5e727..0000000000
--- a/src/runtime/os2_netbsd.go
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	_SS_DISABLE  = 4
-	_SIG_BLOCK   = 1
-	_SIG_UNBLOCK = 2
-	_SIG_SETMASK = 3
-	_NSIG        = 33
-	_SI_USER     = 0
-
-	// From NetBSD's <sys/ucontext.h>
-	_UC_SIGMASK = 0x01
-	_UC_CPU     = 0x04
-)
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index c3833a397a..78a150eee5 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -6,6 +6,16 @@ package runtime
 
 import "unsafe"
 
+const (
+	_NSIG        = 33
+	_SI_USER     = 0
+	_SS_DISABLE  = 4
+	_RLIMIT_AS   = 10
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+)
+
 type mOS struct{}
 
 //go:noescape
@@ -41,3 +51,266 @@ func sys_umtx_wakeup(addr *uint32, val int32) int32
 func osyield()
 
 const stackSystem = 0
+
+// From DragonFly's <sys/sysctl.h>
+const (
+	_CTL_HW  = 6
+	_HW_NCPU = 3
+)
+
+var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
+
+func getncpu() int32 {
+	mib := [2]uint32{_CTL_HW, _HW_NCPU}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+//go:nosplit
+func futexsleep(addr *uint32, val uint32, ns int64) {
+	systemstack(func() {
+		futexsleep1(addr, val, ns)
+	})
+}
+
+func futexsleep1(addr *uint32, val uint32, ns int64) {
+	var timeout int32
+	if ns >= 0 {
+		// The timeout is specified in microseconds - ensure that we
+		// do not end up dividing to zero, which would put us to sleep
+		// indefinitely...
+		timeout = timediv(ns, 1000, nil)
+		if timeout == 0 {
+			timeout = 1
+		}
+	}
+
+	// sys_umtx_sleep will return EWOULDBLOCK (EAGAIN) when the timeout
+	// expires or EBUSY if the mutex value does not match.
+	ret := sys_umtx_sleep(addr, int32(val), timeout)
+	if ret >= 0 || ret == -_EINTR || ret == -_EAGAIN || ret == -_EBUSY {
+		return
+	}
+
+	print("umtx_sleep addr=", addr, " val=", val, " ret=", ret, "\n")
+	*(*int32)(unsafe.Pointer(uintptr(0x1005))) = 0x1005
+}
+
+//go:nosplit
+func futexwakeup(addr *uint32, cnt uint32) {
+	ret := sys_umtx_wakeup(addr, int32(cnt))
+	if ret >= 0 {
+		return
+	}
+
+	systemstack(func() {
+		print("umtx_wake_addr=", addr, " ret=", ret, "\n")
+		*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
+	})
+}
+
+func lwp_start(uintptr)
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " lwp_start=", funcPC(lwp_start), " id=", mp.id, " ostk=", &mp, "\n")
+	}
+
+	var oset sigset
+	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
+
+	params := lwpparams{
+		start_func: funcPC(lwp_start),
+		arg:        unsafe.Pointer(mp),
+		stack:      uintptr(stk),
+		tid1:       unsafe.Pointer(&mp.procid),
+		tid2:       nil,
+	}
+
+	lwp_create(&params)
+	sigprocmask(_SIG_SETMASK, &oset, nil)
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+//go:nosplit
+func getRandomData(r []byte) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+	closefd(fd)
+	extendRandom(r, int(n))
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+	sigprocmask(_SIG_SETMASK, &sigmask, nil)
+}
+
+//go:nosplit
+func sigblock() {
+	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	_g_ := getg()
+
+	// m.procid is a uint64, but lwp_start writes an int32. Fix it up.
+	_g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
+
+	// Initialize signal handling.
+
+	// On DragonFly a thread created by pthread_create inherits
+	// the signal stack of the creating thread. We always create
+	// a new signal stack here, to avoid having two Go threads
+	// using the same signal stack. This breaks the case of a
+	// thread created in C that calls sigaltstack and then calls a
+	// Go function, because we will lose track of the C code's
+	// sigaltstack, but it's the best we can do.
+	signalstack(&_g_.m.gsignal.stack)
+	_g_.m.newSigstack = true
+
+	// restore signal mask from m.sigmask and unblock essential signals
+	nmask := _g_.m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+		}
+	}
+	sigprocmask(_SIG_SETMASK, &nmask, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	if getg().m.newSigstack {
+		signalstack(nil)
+	}
+}
+
+func memlimit() uintptr {
+	/*
+		                TODO: Convert to Go when something actually uses the result.
+
+				Rlimit rl;
+				extern byte runtime·text[], runtime·end[];
+				uintptr used;
+
+				if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+					return 0;
+				if(rl.rlim_cur >= 0x7fffffff)
+					return 0;
+
+				// Estimate our VM footprint excluding the heap.
+				// Not an exact science: use size of binary plus
+				// some room for thread stacks.
+				used = runtime·end - runtime·text + (64<<20);
+				if(used >= rl.rlim_cur)
+					return 0;
+
+				// If there's not at least 16 MB left, we're probably
+				// not going to be able to do much. Treat as no limit.
+				rl.rlim_cur -= used;
+				if(rl.rlim_cur < (16<<20))
+					return 0;
+
+				return rl.rlim_cur - used;
+	*/
+	return 0
+}
+
+func sigtramp()
+
+type sigactiont struct {
+	sa_sigaction uintptr
+	sa_flags     int32
+	sa_mask      sigset
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_sigaction = fn
+	sigaction(i, &sa, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsigstack(i int32) {
+	throw("setsigstack")
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if sa.sa_sigaction == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_sigaction
+}
+
+//go:nosplit
+func signalstack(s *stack) {
+	var st sigaltstackt
+	if s == nil {
+		st.ss_flags = _SS_DISABLE
+	} else {
+		st.ss_sp = s.lo
+		st.ss_size = s.hi - s.lo
+		st.ss_flags = 0
+	}
+	sigaltstack(&st, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func updatesigmask(m sigmask) {
+	var mask sigset
+	copy(mask.__bits[:], m[:])
+	sigprocmask(_SIG_SETMASK, &mask, nil)
+}
+
+func unblocksig(sig int32) {
+	var mask sigset
+	mask.__bits[(sig-1)/32] |= 1 << ((uint32(sig) - 1) & 31)
+	sigprocmask(_SIG_UNBLOCK, &mask, nil)
+}
diff --git a/src/runtime/os_linux_generic.go b/src/runtime/os_linux_generic.go
new file mode 100644
index 0000000000..a16d140776
--- /dev/null
+++ b/src/runtime/os_linux_generic.go
@@ -0,0 +1,48 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !mips64
+// +build !mips64le
+// +build !s390x
+// +build linux
+
+package runtime
+
+const (
+	_SS_DISABLE  = 2
+	_NSIG        = 65
+	_SI_USER     = 0
+	_SIG_BLOCK   = 0
+	_SIG_UNBLOCK = 1
+	_SIG_SETMASK = 2
+	_RLIMIT_AS   = 9
+)
+
+// It's hard to tease out exactly how big a Sigset is, but
+// rt_sigprocmask crashes if we get it wrong, so if binaries
+// are running, this is right.
+type sigset [2]uint32
+
+type rlimit struct {
+	rlim_cur uintptr
+	rlim_max uintptr
+}
+
+var sigset_all = sigset{^uint32(0), ^uint32(0)}
+
+func sigaddset(mask *sigset, i int) {
+	(*mask)[(i-1)/32] |= 1 << ((uint32(i) - 1) & 31)
+}
+
+func sigdelset(mask *sigset, i int) {
+	(*mask)[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+}
+
+func sigfillset(mask *uint64) {
+	*mask = ^uint64(0)
+}
+
+func sigcopyset(mask *sigset, m sigmask) {
+	copy((*mask)[:], m[:])
+}
diff --git a/src/runtime/os_linux_mips64x.go b/src/runtime/os_linux_mips64x.go
index 4d2e9e8a20..92b5c82af7 100644
--- a/src/runtime/os_linux_mips64x.go
+++ b/src/runtime/os_linux_mips64x.go
@@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build mips64 mips64le
 // +build linux
+// +build mips64 mips64le
 
 package runtime
 
@@ -16,3 +16,38 @@ func cputicks() int64 {
 	// randomNumber provides better seeding of fastrand1.
 	return nanotime() + int64(randomNumber)
 }
+
+const (
+	_SS_DISABLE  = 2
+	_NSIG        = 65
+	_SI_USER     = 0
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_RLIMIT_AS   = 6
+)
+
+type sigset [2]uint64
+
+type rlimit struct {
+	rlim_cur uintptr
+	rlim_max uintptr
+}
+
+var sigset_all = sigset{^uint64(0), ^uint64(0)}
+
+func sigaddset(mask *sigset, i int) {
+	(*mask)[(i-1)/64] |= 1 << ((uint32(i) - 1) & 63)
+}
+
+func sigdelset(mask *sigset, i int) {
+	(*mask)[(i-1)/64] &^= 1 << ((uint32(i) - 1) & 63)
+}
+
+func sigfillset(mask *[2]uint64) {
+	(*mask)[0], (*mask)[1] = ^uint64(0), ^uint64(0)
+}
+
+func sigcopyset(mask *sigset, m sigmask) {
+	(*mask)[0] = uint64(m[0]) | uint64(m[1])<<32
+}
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
index 0fba16d4f4..41f34f7132 100644
--- a/src/runtime/os_netbsd.go
+++ b/src/runtime/os_netbsd.go
@@ -4,7 +4,23 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
+
+const (
+	_SS_DISABLE  = 4
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_NSIG        = 33
+	_SI_USER     = 0
+
+	// From NetBSD's <sys/ucontext.h>
+	_UC_SIGMASK = 0x01
+	_UC_CPU     = 0x04
+)
 
 type mOS struct {
 	waitsemacount uint32
@@ -45,3 +61,268 @@ func lwp_unpark(lwp int32, hint unsafe.Pointer) int32
 func lwp_self() int32
 
 func osyield()
+
+const (
+	_ESRCH     = 3
+	_ETIMEDOUT = 60
+
+	// From NetBSD's <sys/time.h>
+	_CLOCK_REALTIME  = 0
+	_CLOCK_VIRTUAL   = 1
+	_CLOCK_PROF      = 2
+	_CLOCK_MONOTONIC = 3
+)
+
+var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
+
+// From NetBSD's <sys/sysctl.h>
+const (
+	_CTL_HW  = 6
+	_HW_NCPU = 3
+)
+
+func getncpu() int32 {
+	mib := [2]uint32{_CTL_HW, _HW_NCPU}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+//go:nosplit
+func semacreate(mp *m) {
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	_g_ := getg()
+
+	// Compute sleep deadline.
+	var tsp *timespec
+	if ns >= 0 {
+		var ts timespec
+		var nsec int32
+		ns += nanotime()
+		ts.set_sec(timediv(ns, 1000000000, &nsec))
+		ts.set_nsec(nsec)
+		tsp = &ts
+	}
+
+	for {
+		v := atomic.Load(&_g_.m.waitsemacount)
+		if v > 0 {
+			if atomic.Cas(&_g_.m.waitsemacount, v, v-1) {
+				return 0 // semaphore acquired
+			}
+			continue
+		}
+
+		// Sleep until unparked by semawakeup or timeout.
+		ret := lwp_park(tsp, 0, unsafe.Pointer(&_g_.m.waitsemacount), nil)
+		if ret == _ETIMEDOUT {
+			return -1
+		}
+	}
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	atomic.Xadd(&mp.waitsemacount, 1)
+	// From NetBSD's _lwp_unpark(2) manual:
+	// "If the target LWP is not currently waiting, it will return
+	// immediately upon the next call to _lwp_park()."
+	ret := lwp_unpark(int32(mp.procid), unsafe.Pointer(&mp.waitsemacount))
+	if ret != 0 && ret != _ESRCH {
+		// semawakeup can be called on signal stack.
+		systemstack(func() {
+			print("thrwakeup addr=", &mp.waitsemacount, " sem=", mp.waitsemacount, " ret=", ret, "\n")
+		})
+	}
+}
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, " ostk=", &mp, "\n")
+	}
+
+	var uc ucontextt
+	getcontext(unsafe.Pointer(&uc))
+
+	uc.uc_flags = _UC_SIGMASK | _UC_CPU
+	uc.uc_link = nil
+	uc.uc_sigmask = sigset_all
+
+	lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp.g0, funcPC(netbsdMstart))
+
+	ret := lwp_create(unsafe.Pointer(&uc), 0, unsafe.Pointer(&mp.procid))
+	if ret < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
+		throw("runtime.newosproc")
+	}
+}
+
+// netbsdMStart is the function call that starts executing a newly
+// created thread. On NetBSD, a new thread inherits the signal stack
+// of the creating thread. That confuses minit, so we remove that
+// signal stack here before calling the regular mstart. It's a bit
+// baroque to remove a signal stack here only to add one in minit, but
+// it's a simple change that keeps NetBSD working like other OS's.
+// At this point all signals are blocked, so there is no race.
+//go:nosplit
+func netbsdMstart() {
+	signalstack(nil)
+	mstart()
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+//go:nosplit
+func getRandomData(r []byte) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+	closefd(fd)
+	extendRandom(r, int(n))
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+	sigprocmask(_SIG_SETMASK, &sigmask, nil)
+}
+
+//go:nosplit
+func sigblock() {
+	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	_g_ := getg()
+	_g_.m.procid = uint64(lwp_self())
+
+	// Initialize signal handling.
+
+	// On NetBSD a thread created by pthread_create inherits the
+	// signal stack of the creating thread. We always create a
+	// new signal stack here, to avoid having two Go threads using
+	// the same signal stack. This breaks the case of a thread
+	// created in C that calls sigaltstack and then calls a Go
+	// function, because we will lose track of the C code's
+	// sigaltstack, but it's the best we can do.
+	signalstack(&_g_.m.gsignal.stack)
+	_g_.m.newSigstack = true
+
+	// restore signal mask from m.sigmask and unblock essential signals
+	nmask := _g_.m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+		}
+	}
+	sigprocmask(_SIG_SETMASK, &nmask, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	if getg().m.newSigstack {
+		signalstack(nil)
+	}
+}
+
+func memlimit() uintptr {
+	return 0
+}
+
+func sigtramp()
+
+type sigactiont struct {
+	sa_sigaction uintptr
+	sa_mask      sigset
+	sa_flags     int32
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_sigaction = fn
+	sigaction(i, &sa, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsigstack(i int32) {
+	throw("setsigstack")
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if sa.sa_sigaction == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_sigaction
+}
+
+//go:nosplit
+func signalstack(s *stack) {
+	var st sigaltstackt
+	if s == nil {
+		st.ss_flags = _SS_DISABLE
+	} else {
+		st.ss_sp = s.lo
+		st.ss_size = s.hi - s.lo
+		st.ss_flags = 0
+	}
+	sigaltstack(&st, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func updatesigmask(m sigmask) {
+	var mask sigset
+	copy(mask.__bits[:], m[:])
+	sigprocmask(_SIG_SETMASK, &mask, nil)
+}
+
+func unblocksig(sig int32) {
+	var mask sigset
+	mask.__bits[(sig-1)/32] |= 1 << ((uint32(sig) - 1) & 31)
+	sigprocmask(_SIG_UNBLOCK, &mask, nil)
+}
diff --git a/src/runtime/os_netbsd_386.go b/src/runtime/os_netbsd_386.go
new file mode 100644
index 0000000000..037f7e36dc
--- /dev/null
+++ b/src/runtime/os_netbsd_386.go
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
+	// Machine dependent mcontext initialisation for LWP.
+	mc.__gregs[_REG_EIP] = uint32(funcPC(lwp_tramp))
+	mc.__gregs[_REG_UESP] = uint32(uintptr(stk))
+	mc.__gregs[_REG_EBX] = uint32(uintptr(unsafe.Pointer(mp)))
+	mc.__gregs[_REG_EDX] = uint32(uintptr(unsafe.Pointer(gp)))
+	mc.__gregs[_REG_ESI] = uint32(fn)
+}
diff --git a/src/runtime/os_netbsd_amd64.go b/src/runtime/os_netbsd_amd64.go
new file mode 100644
index 0000000000..5118b0c4ff
--- /dev/null
+++ b/src/runtime/os_netbsd_amd64.go
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
+	// Machine dependent mcontext initialisation for LWP.
+	mc.__gregs[_REG_RIP] = uint64(funcPC(lwp_tramp))
+	mc.__gregs[_REG_RSP] = uint64(uintptr(stk))
+	mc.__gregs[_REG_R8] = uint64(uintptr(unsafe.Pointer(mp)))
+	mc.__gregs[_REG_R9] = uint64(uintptr(unsafe.Pointer(gp)))
+	mc.__gregs[_REG_R12] = uint64(fn)
+}
-- 
cgit v1.3


From 5c7ae10f66eae34b8a786fc2fdf753bf48a3d116 Mon Sep 17 00:00:00 2001
From: Dave Cheney <dave@cheney.net>
Date: Wed, 6 Apr 2016 15:19:12 +1000
Subject: runtime: merge 64bit lfstack impls

Merge all the 64bit lfstack impls into one file, adjust build tags to
match.

Merge all the comments on the various lfstack implementations for
posterity.

lfstack_amd64.go can probably be merged, but it is slightly different so
that will happen in a followup.

Change-Id: I5362d5e127daa81c9cb9d4fa8a0cc5c5e5c2707c
Reviewed-on: https://go-review.googlesource.com/21591
Run-TryBot: Dave Cheney <dave@cheney.net>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/lfstack_64bit.go         | 35 +++++++++++++++++++++++++++++++++++
 src/runtime/lfstack_darwin_arm64.go  | 25 -------------------------
 src/runtime/lfstack_linux_arm64.go   | 25 -------------------------
 src/runtime/lfstack_linux_mips64x.go | 32 --------------------------------
 src/runtime/lfstack_linux_ppc64x.go  | 32 --------------------------------
 5 files changed, 35 insertions(+), 114 deletions(-)
 create mode 100644 src/runtime/lfstack_64bit.go
 delete mode 100644 src/runtime/lfstack_darwin_arm64.go
 delete mode 100644 src/runtime/lfstack_linux_arm64.go
 delete mode 100644 src/runtime/lfstack_linux_mips64x.go
 delete mode 100644 src/runtime/lfstack_linux_ppc64x.go

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
new file mode 100644
index 0000000000..27a058c763
--- /dev/null
+++ b/src/runtime/lfstack_64bit.go
@@ -0,0 +1,35 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm64 mips64 mips64le ppc64 ppc64le
+
+package runtime
+
+import "unsafe"
+
+// On ppc64, Linux limits the user address space to 46 bits (see
+// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
+// so here we allow 48 bit addresses.
+//
+// On mips64, Linux limits the user address space to 40 bits (see
+// TASK_SIZE64 in the Linux kernel).  This has grown over time,
+// so here we allow 48 bit addresses.
+//
+// In addition to the 16 bits taken from the top, we can take 3 from the
+// bottom, because node must be pointer-aligned, giving a total of 19 bits
+// of count.
+const (
+	addrBits = 48
+	cntBits  = 64 - addrBits + 3
+)
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
+}
+
+func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
+	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
+	cnt = uintptr(val & (1<<cntBits - 1))
+	return
+}
diff --git a/src/runtime/lfstack_darwin_arm64.go b/src/runtime/lfstack_darwin_arm64.go
deleted file mode 100644
index f48d76382b..0000000000
--- a/src/runtime/lfstack_darwin_arm64.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-	cnt = uintptr(val & (1<<cntBits - 1))
-	return
-}
diff --git a/src/runtime/lfstack_linux_arm64.go b/src/runtime/lfstack_linux_arm64.go
deleted file mode 100644
index f48d76382b..0000000000
--- a/src/runtime/lfstack_linux_arm64.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-	cnt = uintptr(val & (1<<cntBits - 1))
-	return
-}
diff --git a/src/runtime/lfstack_linux_mips64x.go b/src/runtime/lfstack_linux_mips64x.go
deleted file mode 100644
index 7ff95f77ae..0000000000
--- a/src/runtime/lfstack_linux_mips64x.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build mips64 mips64le
-// +build linux
-
-package runtime
-
-import "unsafe"
-
-// On mips64, Linux limits the user address space to 40 bits (see
-// TASK_SIZE64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-	cnt = uintptr(val & (1<<cntBits - 1))
-	return
-}
diff --git a/src/runtime/lfstack_linux_ppc64x.go b/src/runtime/lfstack_linux_ppc64x.go
deleted file mode 100644
index 83b7cf4f58..0000000000
--- a/src/runtime/lfstack_linux_ppc64x.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ppc64 ppc64le
-// +build linux
-
-package runtime
-
-import "unsafe"
-
-// On ppc64, Linux limits the user address space to 46 bits (see
-// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-	cnt = uintptr(val & (1<<cntBits - 1))
-	return
-}
-- 
cgit v1.3


From 2cefd12a1bf7ee1d1aad03e17c4680d4b611d6da Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 6 Apr 2016 19:02:27 +0000
Subject: net, runtime: skip flaky tests on OpenBSD

Flaky tests are a distraction and cover up real problems.

File bugs instead and mark them as flaky.

This moves the net/http flaky test flagging mechanism to internal/testenv.

Updates #15156
Updates #15157
Updates #15158

Change-Id: I0e561cd2a09c0dec369cd4ed93bc5a2b40233dfe
Reviewed-on: https://go-review.googlesource.com/21614
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/context/context_test.go     | 4 ++++
 src/go/build/deps_test.go       | 2 +-
 src/internal/testenv/testenv.go | 9 +++++++++
 src/net/dial_test.go            | 4 ++++
 src/net/http/main_test.go       | 9 ---------
 src/net/http/transport_test.go  | 3 ++-
 src/net/timeout_test.go         | 4 ++++
 src/net/unixsock_test.go        | 4 ++++
 src/runtime/pprof/pprof_test.go | 3 +++
 9 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'src/runtime')

diff --git a/src/context/context_test.go b/src/context/context_test.go
index 05345fc5e5..60020303c7 100644
--- a/src/context/context_test.go
+++ b/src/context/context_test.go
@@ -6,6 +6,7 @@ package context
 
 import (
 	"fmt"
+	"internal/testenv"
 	"math/rand"
 	"runtime"
 	"strings"
@@ -258,6 +259,9 @@ func TestDeadline(t *testing.T) {
 }
 
 func TestTimeout(t *testing.T) {
+	if runtime.GOOS == "openbsd" {
+		testenv.SkipFlaky(t, 15158)
+	}
 	c, _ := WithTimeout(Background(), 100*time.Millisecond)
 	if got, prefix := fmt.Sprint(c), "context.Background.WithDeadline("; !strings.HasPrefix(got, prefix) {
 		t.Errorf("c.String() = %q want prefix %q", got, prefix)
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index c066048630..8e2fd6e584 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -168,7 +168,7 @@ var pkgDeps = map[string][]string{
 	"testing":          {"L2", "flag", "fmt", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
 	"testing/iotest":   {"L2", "log"},
 	"testing/quick":    {"L2", "flag", "fmt", "reflect"},
-	"internal/testenv": {"L2", "OS", "testing"},
+	"internal/testenv": {"L2", "OS", "flag", "testing"},
 
 	// L4 is defined as L3+fmt+log+time, because in general once
 	// you're using L3 packages, use of fmt, log, or time is not a big deal.
diff --git a/src/internal/testenv/testenv.go b/src/internal/testenv/testenv.go
index e751e0cf11..9e684e3034 100644
--- a/src/internal/testenv/testenv.go
+++ b/src/internal/testenv/testenv.go
@@ -11,6 +11,7 @@
 package testenv
 
 import (
+	"flag"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -124,3 +125,11 @@ func MustHaveExternalNetwork(t *testing.T) {
 		t.Skipf("skipping test: no external network in -short mode")
 	}
 }
+
+var flaky = flag.Bool("flaky", false, "run known-flaky tests too")
+
+func SkipFlaky(t *testing.T, issue int) {
+	if !*flaky {
+		t.Skipf("skipping known flaky test without the -flaky flag; see golang.org/issue/%d", issue)
+	}
+}
diff --git a/src/net/dial_test.go b/src/net/dial_test.go
index 2fc75c6356..f8e90abb48 100644
--- a/src/net/dial_test.go
+++ b/src/net/dial_test.go
@@ -59,6 +59,8 @@ func TestDialTimeoutFDLeak(t *testing.T) {
 	switch runtime.GOOS {
 	case "plan9":
 		t.Skipf("%s does not have full support of socktest", runtime.GOOS)
+	case "openbsd":
+		testenv.SkipFlaky(t, 15157)
 	}
 
 	const T = 100 * time.Millisecond
@@ -126,6 +128,8 @@ func TestDialerDualStackFDLeak(t *testing.T) {
 		t.Skipf("%s does not have full support of socktest", runtime.GOOS)
 	case "windows":
 		t.Skipf("not implemented a way to cancel dial racers in TCP SYN-SENT state on %s", runtime.GOOS)
+	case "openbsd":
+		testenv.SkipFlaky(t, 15157)
 	}
 	if !supportsIPv4 || !supportsIPv6 {
 		t.Skip("both IPv4 and IPv6 are required")
diff --git a/src/net/http/main_test.go b/src/net/http/main_test.go
index 299cd7b2d2..1163874ac2 100644
--- a/src/net/http/main_test.go
+++ b/src/net/http/main_test.go
@@ -5,7 +5,6 @@
 package http_test
 
 import (
-	"flag"
 	"fmt"
 	"net/http"
 	"os"
@@ -16,8 +15,6 @@ import (
 	"time"
 )
 
-var flaky = flag.Bool("flaky", false, "run known-flaky tests too")
-
 func TestMain(m *testing.M) {
 	v := m.Run()
 	if v == 0 && goroutineLeaked() {
@@ -91,12 +88,6 @@ func setParallel(t *testing.T) {
 	}
 }
 
-func setFlaky(t *testing.T, issue int) {
-	if !*flaky {
-		t.Skipf("skipping known flaky test; see golang.org/issue/%d", issue)
-	}
-}
-
 func afterTest(t testing.TB) {
 	http.DefaultTransport.(*http.Transport).CloseIdleConnections()
 	if testing.Short() {
diff --git a/src/net/http/transport_test.go b/src/net/http/transport_test.go
index 7a01dca394..1aa26610b0 100644
--- a/src/net/http/transport_test.go
+++ b/src/net/http/transport_test.go
@@ -18,6 +18,7 @@ import (
 	"crypto/tls"
 	"errors"
 	"fmt"
+	"internal/testenv"
 	"io"
 	"io/ioutil"
 	"log"
@@ -2229,7 +2230,7 @@ func TestTransportTLSHandshakeTimeout(t *testing.T) {
 // Trying to repro golang.org/issue/3514
 func TestTLSServerClosesConnection(t *testing.T) {
 	defer afterTest(t)
-	setFlaky(t, 7634)
+	testenv.SkipFlaky(t, 7634)
 
 	closedc := make(chan bool, 1)
 	ts := httptest.NewTLSServer(HandlerFunc(func(w ResponseWriter, r *Request) {
diff --git a/src/net/timeout_test.go b/src/net/timeout_test.go
index d80e478c77..3ea0ec1ebd 100644
--- a/src/net/timeout_test.go
+++ b/src/net/timeout_test.go
@@ -6,6 +6,7 @@ package net
 
 import (
 	"fmt"
+	"internal/testenv"
 	"io"
 	"io/ioutil"
 	"net/internal/socktest"
@@ -112,6 +113,9 @@ var dialTimeoutMaxDurationTests = []struct {
 
 func TestDialTimeoutMaxDuration(t *testing.T) {
 	t.Parallel()
+	if runtime.GOOS == "openbsd" {
+		testenv.SkipFlaky(t, 15157)
+	}
 
 	ln, err := newLocalListener("tcp")
 	if err != nil {
diff --git a/src/net/unixsock_test.go b/src/net/unixsock_test.go
index d70c0d1953..f0f88ed37b 100644
--- a/src/net/unixsock_test.go
+++ b/src/net/unixsock_test.go
@@ -8,6 +8,7 @@ package net
 
 import (
 	"bytes"
+	"internal/testenv"
 	"os"
 	"reflect"
 	"runtime"
@@ -20,6 +21,9 @@ func TestReadUnixgramWithUnnamedSocket(t *testing.T) {
 	if !testableNetwork("unixgram") {
 		t.Skip("unixgram test")
 	}
+	if runtime.GOOS == "openbsd" {
+		testenv.SkipFlaky(t, 15157)
+	}
 
 	addr := testUnixAddr()
 	la, err := ResolveUnixAddr("unixgram", addr)
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index fa0af59b37..23bc72c1e4 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -585,6 +585,9 @@ func func3(c chan int) { <-c }
 func func4(c chan int) { <-c }
 
 func TestGoroutineCounts(t *testing.T) {
+	if runtime.GOOS == "openbsd" {
+		testenv.SkipFlaky(t, 15156)
+	}
 	c := make(chan int)
 	for i := 0; i < 100; i++ {
 		if i%10 == 0 {
-- 
cgit v1.3


From 0c81248bf46f611b56e3ab38b4d83e449b3c8636 Mon Sep 17 00:00:00 2001
From: Dave Cheney <dave@cheney.net>
Date: Wed, 6 Apr 2016 18:43:23 +1000
Subject: runtime: remove unused return value from lfstackUnpack

None of the two places that call lfstackUnpack use the second argument.
This simplifies a followup CL that merges the lfstack{Pack,Unpack}
implementations.

Change-Id: I3c93f6259da99e113d94f8c8027584da79c1ac2c
Reviewed-on: https://go-review.googlesource.com/21595
Run-TryBot: Dave Cheney <dave@cheney.net>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/lfstack.go       | 4 ++--
 src/runtime/lfstack_32bit.go | 6 ++----
 src/runtime/lfstack_64bit.go | 6 ++----
 src/runtime/lfstack_amd64.go | 6 ++----
 4 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index ea640eb12f..1261f54d97 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -15,7 +15,7 @@ import (
 func lfstackpush(head *uint64, node *lfnode) {
 	node.pushcnt++
 	new := lfstackPack(node, node.pushcnt)
-	if node1, _ := lfstackUnpack(new); node1 != node {
+	if node1 := lfstackUnpack(new); node1 != node {
 		print("runtime: lfstackpush invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n")
 		throw("lfstackpush")
 	}
@@ -34,7 +34,7 @@ func lfstackpop(head *uint64) unsafe.Pointer {
 		if old == 0 {
 			return nil
 		}
-		node, _ := lfstackUnpack(old)
+		node := lfstackUnpack(old)
 		next := atomic.Load64(&node.next)
 		if atomic.Cas64(head, old, next) {
 			return unsafe.Pointer(node)
diff --git a/src/runtime/lfstack_32bit.go b/src/runtime/lfstack_32bit.go
index 36811c1e47..2f59e0212e 100644
--- a/src/runtime/lfstack_32bit.go
+++ b/src/runtime/lfstack_32bit.go
@@ -14,8 +14,6 @@ func lfstackPack(node *lfnode, cnt uintptr) uint64 {
 	return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
 }
 
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
-	cnt = uintptr(val)
-	return
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
 }
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
index 27a058c763..07c2a141f0 100644
--- a/src/runtime/lfstack_64bit.go
+++ b/src/runtime/lfstack_64bit.go
@@ -28,8 +28,6 @@ func lfstackPack(node *lfnode, cnt uintptr) uint64 {
 	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
 }
 
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-	cnt = uintptr(val & (1<<cntBits - 1))
-	return
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
 }
diff --git a/src/runtime/lfstack_amd64.go b/src/runtime/lfstack_amd64.go
index 0a71455c6b..6397e1d47f 100644
--- a/src/runtime/lfstack_amd64.go
+++ b/src/runtime/lfstack_amd64.go
@@ -17,8 +17,6 @@ func lfstackPack(node *lfnode, cnt uintptr) uint64 {
 	return uint64(uintptr(unsafe.Pointer(node)))<<16 | uint64(cnt&(1<<19-1))
 }
 
-func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
-	node = (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
-	cnt = uintptr(val & (1<<19 - 1))
-	return
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
 }
-- 
cgit v1.3


From 31cf1c17792d4da9dae2504c703633a0db8072c7 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Apr 2016 11:47:32 +1200
Subject: runtime: clamp OS-reported number of processors to _MaxGomaxprocs

So that all Go processes do not die on startup on a system with >256 CPUs.

I tested this by hacking osinit to set ncpu to 1000.

Updates #15131

Change-Id: I52e061a0de97be41d684dd8b748fa9087d6f1aef
Reviewed-on: https://go-review.googlesource.com/21599
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/proc.go | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 5145c84aea..1f55b0fa21 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -449,6 +449,9 @@ func schedinit() {
 
 	sched.lastpoll = uint64(nanotime())
 	procs := int(ncpu)
+	if procs > _MaxGomaxprocs {
+		procs = _MaxGomaxprocs
+	}
 	if n := atoi(gogetenv("GOMAXPROCS")); n > 0 {
 		if n > _MaxGomaxprocs {
 			n = _MaxGomaxprocs
-- 
cgit v1.3


From 3b02c5b1b66df9cdb23d5a3243bb37b2c312ea1b Mon Sep 17 00:00:00 2001
From: Dave Cheney <dave@cheney.net>
Date: Thu, 7 Apr 2016 07:29:22 +1000
Subject: runtime: merge lfstack{Pack,Unpack} into one file

Merge the remaining lfstack{Pack,Unpack} implemetations into one file.

unsafe.Sizeof(uintptr(0)) == 4 is a constant comparison so this branch
folds away at compile time.

Dmitry confirmed that the upper 17 bits of an address will be zero for a
user mode pointer, so there is no need to sign extend on amd64 during
unpack, so we can reuse the same implementation as all othe 64 bit
archs.

Change-Id: I99f589416d8b181ccde5364c9c2e78e4a5efc7f1
Reviewed-on: https://go-review.googlesource.com/21597
Run-TryBot: Dave Cheney <dave@cheney.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/lfstack.go       | 35 +++++++++++++++++++++++++++++++++++
 src/runtime/lfstack_32bit.go | 19 -------------------
 src/runtime/lfstack_64bit.go | 33 ---------------------------------
 src/runtime/lfstack_amd64.go | 22 ----------------------
 4 files changed, 35 insertions(+), 74 deletions(-)
 delete mode 100644 src/runtime/lfstack_32bit.go
 delete mode 100644 src/runtime/lfstack_64bit.go
 delete mode 100644 src/runtime/lfstack_amd64.go

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index 1261f54d97..8a2d519402 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -41,3 +41,38 @@ func lfstackpop(head *uint64) unsafe.Pointer {
 		}
 	}
 }
+
+const (
+	addrBits = 48
+	cntBits  = 64 - addrBits + 3
+)
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	if unsafe.Sizeof(uintptr(0)) == 4 {
+		// On 32-bit systems, the stored uint64 has a 32-bit pointer and 32-bit count.
+		return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
+	}
+	// On ppc64, Linux limits the user address space to 46 bits (see
+	// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
+	// so here we allow 48 bit addresses.
+	//
+	// On mips64, Linux limits the user address space to 40 bits (see
+	// TASK_SIZE64 in the Linux kernel).  This has grown over time,
+	// so here we allow 48 bit addresses.
+	//
+	// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+	// We shift the address left 16 to eliminate the sign extended part and make
+	// room in the bottom for the count.
+	//
+	// In addition to the 16 bits taken from the top, we can take 3 from the
+	// bottom, because node must be pointer-aligned, giving a total of 19 bits
+	// of count.
+	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
+}
+
+func lfstackUnpack(val uint64) *lfnode {
+	if unsafe.Sizeof(uintptr(0)) == 4 {
+		return (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
+	}
+	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
+}
diff --git a/src/runtime/lfstack_32bit.go b/src/runtime/lfstack_32bit.go
deleted file mode 100644
index 2f59e0212e..0000000000
--- a/src/runtime/lfstack_32bit.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build 386 arm nacl
-
-package runtime
-
-import "unsafe"
-
-// On 32-bit systems, the stored uint64 has a 32-bit pointer and 32-bit count.
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
-}
-
-func lfstackUnpack(val uint64) *lfnode {
-	return (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
-}
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
deleted file mode 100644
index 07c2a141f0..0000000000
--- a/src/runtime/lfstack_64bit.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm64 mips64 mips64le ppc64 ppc64le
-
-package runtime
-
-import "unsafe"
-
-// On ppc64, Linux limits the user address space to 46 bits (see
-// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// On mips64, Linux limits the user address space to 40 bits (see
-// TASK_SIZE64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) *lfnode {
-	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-}
diff --git a/src/runtime/lfstack_amd64.go b/src/runtime/lfstack_amd64.go
deleted file mode 100644
index 6397e1d47f..0000000000
--- a/src/runtime/lfstack_amd64.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
-// We shift the address left 16 to eliminate the sign extended part and make
-// room in the bottom for the count.
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<16 | uint64(cnt&(1<<19-1))
-}
-
-func lfstackUnpack(val uint64) *lfnode {
-	return (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
-}
-- 
cgit v1.3


From 121c434f7add815c3147b01a097a8998018bcc6b Mon Sep 17 00:00:00 2001
From: Richard Miller <miller.research@gmail.com>
Date: Wed, 6 Apr 2016 18:58:22 +0100
Subject: runtime/pprof: make TestBlockProfile less timing dependent

The test for profiling of channel blocking is timing dependent,
and in particular the blockSelectRecvAsync case can fail on a
slow builder (plan9_arm) when many tests are run in parallel.
The child goroutine sleeps for a fixed period so the parent
can be observed to block in a select call reading from the
child; but if the OS process running the parent goroutine is
delayed long enough, the child may wake again before the
parent has reached the blocking point.  By repeating the test
three times, the likelihood of a blocking event is increased.

Fixes #15096

Change-Id: I2ddb9576a83408d06b51ded682bf8e71e53ce59e
Reviewed-on: https://go-review.googlesource.com/21604
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/pprof/pprof_test.go | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 23bc72c1e4..8b2f3d5291 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -530,15 +530,20 @@ func blockChanClose() {
 }
 
 func blockSelectRecvAsync() {
+	const numTries = 3
 	c := make(chan bool, 1)
 	c2 := make(chan bool, 1)
 	go func() {
-		time.Sleep(blockDelay)
-		c <- true
+		for i := 0; i < numTries; i++ {
+			time.Sleep(blockDelay)
+			c <- true
+		}
 	}()
-	select {
-	case <-c:
-	case <-c2:
+	for i := 0; i < numTries; i++ {
+		select {
+		case <-c:
+		case <-c2:
+		}
 	}
 }
 
-- 
cgit v1.3


From 9cc9e95b288648d796d92f0b92cb713b35f20062 Mon Sep 17 00:00:00 2001
From: Dave Cheney <dave@cheney.net>
Date: Thu, 7 Apr 2016 14:05:06 +0000
Subject: Revert "runtime: merge lfstack{Pack,Unpack} into one file"

This broke solaris, which apparently does use the upper 17 bits of the address space.

This reverts commit 3b02c5b1b66df9cdb23d5a3243bb37b2c312ea1b.

Change-Id: Iedfe54abd0384960845468205f20191a97751c0b
Reviewed-on: https://go-review.googlesource.com/21652
Reviewed-by: Dave Cheney <dave@cheney.net>
---
 src/runtime/lfstack.go       | 35 -----------------------------------
 src/runtime/lfstack_32bit.go | 19 +++++++++++++++++++
 src/runtime/lfstack_64bit.go | 33 +++++++++++++++++++++++++++++++++
 src/runtime/lfstack_amd64.go | 22 ++++++++++++++++++++++
 4 files changed, 74 insertions(+), 35 deletions(-)
 create mode 100644 src/runtime/lfstack_32bit.go
 create mode 100644 src/runtime/lfstack_64bit.go
 create mode 100644 src/runtime/lfstack_amd64.go

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index 8a2d519402..1261f54d97 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -41,38 +41,3 @@ func lfstackpop(head *uint64) unsafe.Pointer {
 		}
 	}
 }
-
-const (
-	addrBits = 48
-	cntBits  = 64 - addrBits + 3
-)
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	if unsafe.Sizeof(uintptr(0)) == 4 {
-		// On 32-bit systems, the stored uint64 has a 32-bit pointer and 32-bit count.
-		return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
-	}
-	// On ppc64, Linux limits the user address space to 46 bits (see
-	// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
-	// so here we allow 48 bit addresses.
-	//
-	// On mips64, Linux limits the user address space to 40 bits (see
-	// TASK_SIZE64 in the Linux kernel).  This has grown over time,
-	// so here we allow 48 bit addresses.
-	//
-	// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
-	// We shift the address left 16 to eliminate the sign extended part and make
-	// room in the bottom for the count.
-	//
-	// In addition to the 16 bits taken from the top, we can take 3 from the
-	// bottom, because node must be pointer-aligned, giving a total of 19 bits
-	// of count.
-	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
-}
-
-func lfstackUnpack(val uint64) *lfnode {
-	if unsafe.Sizeof(uintptr(0)) == 4 {
-		return (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
-	}
-	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
-}
diff --git a/src/runtime/lfstack_32bit.go b/src/runtime/lfstack_32bit.go
new file mode 100644
index 0000000000..2f59e0212e
--- /dev/null
+++ b/src/runtime/lfstack_32bit.go
@@ -0,0 +1,19 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 arm nacl
+
+package runtime
+
+import "unsafe"
+
+// On 32-bit systems, the stored uint64 has a 32-bit pointer and 32-bit count.
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
+}
+
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
+}
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
new file mode 100644
index 0000000000..07c2a141f0
--- /dev/null
+++ b/src/runtime/lfstack_64bit.go
@@ -0,0 +1,33 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm64 mips64 mips64le ppc64 ppc64le
+
+package runtime
+
+import "unsafe"
+
+// On ppc64, Linux limits the user address space to 46 bits (see
+// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
+// so here we allow 48 bit addresses.
+//
+// On mips64, Linux limits the user address space to 40 bits (see
+// TASK_SIZE64 in the Linux kernel).  This has grown over time,
+// so here we allow 48 bit addresses.
+//
+// In addition to the 16 bits taken from the top, we can take 3 from the
+// bottom, because node must be pointer-aligned, giving a total of 19 bits
+// of count.
+const (
+	addrBits = 48
+	cntBits  = 64 - addrBits + 3
+)
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<(64-addrBits) | uint64(cnt&(1<<cntBits-1))
+}
+
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
+}
diff --git a/src/runtime/lfstack_amd64.go b/src/runtime/lfstack_amd64.go
new file mode 100644
index 0000000000..6397e1d47f
--- /dev/null
+++ b/src/runtime/lfstack_amd64.go
@@ -0,0 +1,22 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+// We shift the address left 16 to eliminate the sign extended part and make
+// room in the bottom for the count.
+// In addition to the 16 bits taken from the top, we can take 3 from the
+// bottom, because node must be pointer-aligned, giving a total of 19 bits
+// of count.
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<16 | uint64(cnt&(1<<19-1))
+}
+
+func lfstackUnpack(val uint64) *lfnode {
+	return (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
+}
-- 
cgit v1.3


From e6f36f0cd5b45b9ce7809a34c45aeb66a5ca64a4 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Fri, 18 Mar 2016 19:09:39 -0400
Subject: runtime: add s390x support (new files and lfstack_64bit.go
 modifications)

Change-Id: I51c0a332e3cbdab348564e5dcd27583e75e4b881
Reviewed-on: https://go-review.googlesource.com/20946
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/asm_s390x.s           | 1130 +++++++++++++++++++++++++++++++++++++
 src/runtime/defs_linux_s390x.go   |  167 ++++++
 src/runtime/lfstack_64bit.go      |   32 +-
 src/runtime/memclr_s390x.s        |  122 ++++
 src/runtime/memmove_s390x.s       |  189 +++++++
 src/runtime/os_linux_s390x.go     |   46 ++
 src/runtime/rt0_linux_s390x.s     |   20 +
 src/runtime/signal_linux_s390x.go |  208 +++++++
 src/runtime/sys_linux_s390x.s     |  440 +++++++++++++++
 src/runtime/sys_s390x.go          |   45 ++
 src/runtime/tls_s390x.s           |   51 ++
 11 files changed, 2437 insertions(+), 13 deletions(-)
 create mode 100644 src/runtime/asm_s390x.s
 create mode 100644 src/runtime/defs_linux_s390x.go
 create mode 100644 src/runtime/memclr_s390x.s
 create mode 100644 src/runtime/memmove_s390x.s
 create mode 100644 src/runtime/os_linux_s390x.go
 create mode 100644 src/runtime/rt0_linux_s390x.s
 create mode 100644 src/runtime/signal_linux_s390x.go
 create mode 100644 src/runtime/sys_linux_s390x.s
 create mode 100644 src/runtime/sys_s390x.go
 create mode 100644 src/runtime/tls_s390x.s

(limited to 'src/runtime')

diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
new file mode 100644
index 0000000000..fc74b0ddf9
--- /dev/null
+++ b/src/runtime/asm_s390x.s
@@ -0,0 +1,1130 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// Indicate the status of vector facility
+// -1: 	init value
+// 0:	vector not installed
+// 1:	vector installed and enabled
+// 2:	vector installed but not enabled
+
+DATA runtime·vectorfacility+0x00(SB)/4, $-1
+GLOBL runtime·vectorfacility(SB), NOPTR, $4
+
+TEXT runtime·checkvectorfacility(SB),NOSPLIT,$32-0
+	MOVD    $2, R0
+	MOVD	R1, tmp-32(SP)
+	MOVD    $x-24(SP), R1
+//      STFLE   0(R1)
+	WORD    $0xB2B01000
+	MOVBZ   z-8(SP), R1
+	AND     $0x40, R1
+	BNE     vectorinstalled
+	MOVB    $0, runtime·vectorfacility(SB) //Vector not installed
+	MOVD	tmp-32(SP), R1
+	MOVD    $0, R0
+	RET
+vectorinstalled:
+	// check if the vector instruction has been enabled
+	VLEIB   $0, $0xF, V16
+	VLGVB   $0, V16, R0
+	CMPBEQ  R0, $0xF, vectorenabled
+	MOVB    $2, runtime·vectorfacility(SB) //Vector installed but not enabled
+	MOVD    tmp-32(SP), R1
+	MOVD    $0, R0
+	RET
+vectorenabled:
+	MOVB    $1, runtime·vectorfacility(SB) //Vector installed and enabled
+	MOVD    tmp-32(SP), R1
+	MOVD    $0, R0
+	RET
+
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
+	// C TLS base pointer in AR0:AR1
+
+	// initialize essential registers
+	XOR	R0, R0
+
+	SUB	$24, R15
+	MOVW	R2, 8(R15) // argc
+	MOVD	R3, 16(R15) // argv
+
+	// create istack out of the given (operating system) stack.
+	// _cgo_init may update stackguard.
+	MOVD	$runtime·g0(SB), g
+	MOVD	R15, R11
+	SUB	$(64*1024), R11
+	MOVD	R11, g_stackguard0(g)
+	MOVD	R11, g_stackguard1(g)
+	MOVD	R11, (g_stack+stack_lo)(g)
+	MOVD	R15, (g_stack+stack_hi)(g)
+
+	// if there is a _cgo_init, call it using the gcc ABI.
+	MOVD	_cgo_init(SB), R11
+	CMPBEQ	R11, $0, nocgo
+	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
+	SLD	$32, R4, R4
+	MOVW	AR1, R4			// arg 2: TLS base pointer
+	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
+	MOVD	g, R2			// arg 0: G
+	// C functions expect 160 bytes of space on caller stack frame
+	// and an 8-byte aligned stack pointer
+	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
+	SUB	$160, R15		// reserve 160 bytes
+	MOVD    $~7, R6
+	AND 	R6, R15			// 8-byte align
+	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
+	MOVD	R9, R15			// restore stack
+	XOR	R0, R0			// zero R0
+
+nocgo:
+	// update stackguard after _cgo_init
+	MOVD	(g_stack+stack_lo)(g), R2
+	ADD	$const__StackGuard, R2
+	MOVD	R2, g_stackguard0(g)
+	MOVD	R2, g_stackguard1(g)
+
+	// set the per-goroutine and per-mach "registers"
+	MOVD	$runtime·m0(SB), R2
+
+	// save m->g0 = g0
+	MOVD	g, m_g0(R2)
+	// save m0 to g0->m
+	MOVD	R2, g_m(g)
+
+	BL	runtime·check(SB)
+
+	// argc/argv are already prepared on stack
+	BL	runtime·args(SB)
+	BL	runtime·osinit(SB)
+	BL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	MOVD	$runtime·mainPC(SB), R2		// entry
+	SUB     $24, R15
+	MOVD 	R2, 16(R15)
+	MOVD 	R0, 8(R15)
+	MOVD 	R0, 0(R15)
+	BL	runtime·newproc(SB)
+	ADD	$24, R15
+
+	// start this M
+	BL	runtime·mstart(SB)
+
+	MOVD	R0, 1(R0)
+	RET
+
+DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
+GLOBL	runtime·mainPC(SB),RODATA,$8
+
+TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	R0, 2(R0)
+	RET
+
+TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT, $-8-8
+	MOVD	buf+0(FP), R3
+	MOVD	R15, gobuf_sp(R3)
+	MOVD	LR, gobuf_pc(R3)
+	MOVD	g, gobuf_g(R3)
+	MOVD	$0, gobuf_lr(R3)
+	MOVD	$0, gobuf_ret(R3)
+	MOVD	$0, gobuf_ctxt(R3)
+	RET
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT, $-8-8
+	MOVD	buf+0(FP), R5
+	MOVD	gobuf_g(R5), g	// make sure g is not nil
+	BL	runtime·save_g(SB)
+
+	MOVD	0(g), R4
+	MOVD	gobuf_sp(R5), R15
+	MOVD	gobuf_lr(R5), LR
+	MOVD	gobuf_ret(R5), R3
+	MOVD	gobuf_ctxt(R5), R12
+	MOVD	$0, gobuf_sp(R5)
+	MOVD	$0, gobuf_ret(R5)
+	MOVD	$0, gobuf_lr(R5)
+	MOVD	$0, gobuf_ctxt(R5)
+	CMP	R0, R0 // set condition codes for == test, needed by stack split
+	MOVD	gobuf_pc(R5), R6
+	BR	(R6)
+
+// void mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), NOSPLIT, $-8-8
+	// Save caller state in g->sched
+	MOVD	R15, (g_sched+gobuf_sp)(g)
+	MOVD	LR, (g_sched+gobuf_pc)(g)
+	MOVD	R0, (g_sched+gobuf_lr)(g)
+	MOVD	g, (g_sched+gobuf_g)(g)
+
+	// Switch to m->g0 & its stack, call fn.
+	MOVD	g, R3
+	MOVD	g_m(g), R8
+	MOVD	m_g0(R8), g
+	BL	runtime·save_g(SB)
+	CMP	g, R3
+	BNE	2(PC)
+	BR	runtime·badmcall(SB)
+	MOVD	fn+0(FP), R12			// context
+	MOVD	0(R12), R4			// code pointer
+	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
+	SUB	$16, R15
+	MOVD	R3, 8(R15)
+	MOVD	$0, 0(R15)
+	BL	(R4)
+	BR	runtime·badmcall2(SB)
+
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
+// of the G stack.  We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
+	UNDEF
+	BL	(LR)	// make sure this function is not leaf
+	RET
+
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-8
+	MOVD	fn+0(FP), R3	// R3 = fn
+	MOVD	R3, R12		// context
+	MOVD	g_m(g), R4	// R4 = m
+
+	MOVD	m_gsignal(R4), R5	// R5 = gsignal
+	CMPBEQ	g, R5, noswitch
+
+	MOVD	m_g0(R4), R5	// R5 = g0
+	CMPBEQ	g, R5, noswitch
+
+	MOVD	m_curg(R4), R6
+	CMPBEQ	g, R6, switch
+
+	// Bad: g is not gsignal, not g0, not curg. What is it?
+	// Hide call from linker nosplit analysis.
+	MOVD	$runtime·badsystemstack(SB), R3
+	BL	(R3)
+
+switch:
+	// save our state in g->sched.  Pretend to
+	// be systemstack_switch if the G stack is scanned.
+	MOVD	$runtime·systemstack_switch(SB), R6
+	ADD	$16, R6	// get past prologue
+	MOVD	R6, (g_sched+gobuf_pc)(g)
+	MOVD	R15, (g_sched+gobuf_sp)(g)
+	MOVD	R0, (g_sched+gobuf_lr)(g)
+	MOVD	g, (g_sched+gobuf_g)(g)
+
+	// switch to g0
+	MOVD	R5, g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R3
+	// make it look like mstart called systemstack on g0, to stop traceback
+	SUB	$8, R3
+	MOVD	$runtime·mstart(SB), R4
+	MOVD	R4, 0(R3)
+	MOVD	R3, R15
+
+	// call target function
+	MOVD	0(R12), R3	// code pointer
+	BL	(R3)
+
+	// switch back to g
+	MOVD	g_m(g), R3
+	MOVD	m_curg(R3), g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R15
+	MOVD	$0, (g_sched+gobuf_sp)(g)
+	RET
+
+noswitch:
+	// already on m stack, just call directly
+	MOVD	0(R12), R3	// code pointer
+	BL	(R3)
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already loaded:
+// R3: framesize, R4: argsize, R5: LR
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
+	// Cannot grow scheduler stack (m->g0).
+	MOVD	g_m(g), R7
+	MOVD	m_g0(R7), R8
+	CMPBNE	g, R8, 2(PC)
+	BL	runtime·abort(SB)
+
+	// Cannot grow signal stack (m->gsignal).
+	MOVD	m_gsignal(R7), R8
+	CMP	g, R8
+	BNE	2(PC)
+	BL	runtime·abort(SB)
+
+	// Called from f.
+	// Set g->sched to context in f.
+	MOVD	R12, (g_sched+gobuf_ctxt)(g)
+	MOVD	R15, (g_sched+gobuf_sp)(g)
+	MOVD	LR, R8
+	MOVD	R8, (g_sched+gobuf_pc)(g)
+	MOVD	R5, (g_sched+gobuf_lr)(g)
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
+	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
+	MOVD	g, (m_morebuf+gobuf_g)(R7)
+
+	// Call newstack on m->g0's stack.
+	MOVD	m_g0(R7), g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R15
+	BL	runtime·newstack(SB)
+
+	// Not reached, but make sure the return PC from the call to newstack
+	// is still in this function, and not the beginning of the next.
+	UNDEF
+
+TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	$0, R12
+	BR	runtime·morestack(SB)
+
+TEXT runtime·stackBarrier(SB),NOSPLIT,$0
+	// We came here via a RET to an overwritten LR.
+	// R3 may be live. Other registers are available.
+
+	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
+	MOVD	(g_stkbar+slice_array)(g), R4
+	MOVD	g_stkbarPos(g), R5
+	MOVD	$stkbar__size, R6
+	MULLD	R5, R6
+	ADD	R4, R6
+	MOVD	stkbar_savedLRVal(R6), R6
+	// Record that this stack barrier was hit.
+	ADD	$1, R5
+	MOVD	R5, g_stkbarPos(g)
+	// Jump to the original return PC.
+	BR	(R6)
+
+// reflectcall: call a function with the given argument list
+// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE)		\
+	MOVD	$MAXSIZE, R4;		\
+	CMP	R3, R4;		\
+	BGT	3(PC);			\
+	MOVD	$NAME(SB), R5;	\
+	BR	(R5)
+// Note: can't just "BR NAME(SB)" - bad inlining results.
+
+TEXT reflect·call(SB), NOSPLIT, $0-0
+	BR	·reflectcall(SB)
+
+TEXT ·reflectcall(SB), NOSPLIT, $-8-32
+	MOVWZ argsize+24(FP), R3
+	// NOTE(rsc): No call16, because CALLFN needs four words
+	// of argument space to invoke callwritebarrier.
+	DISPATCH(runtime·call32, 32)
+	DISPATCH(runtime·call64, 64)
+	DISPATCH(runtime·call128, 128)
+	DISPATCH(runtime·call256, 256)
+	DISPATCH(runtime·call512, 512)
+	DISPATCH(runtime·call1024, 1024)
+	DISPATCH(runtime·call2048, 2048)
+	DISPATCH(runtime·call4096, 4096)
+	DISPATCH(runtime·call8192, 8192)
+	DISPATCH(runtime·call16384, 16384)
+	DISPATCH(runtime·call32768, 32768)
+	DISPATCH(runtime·call65536, 65536)
+	DISPATCH(runtime·call131072, 131072)
+	DISPATCH(runtime·call262144, 262144)
+	DISPATCH(runtime·call524288, 524288)
+	DISPATCH(runtime·call1048576, 1048576)
+	DISPATCH(runtime·call2097152, 2097152)
+	DISPATCH(runtime·call4194304, 4194304)
+	DISPATCH(runtime·call8388608, 8388608)
+	DISPATCH(runtime·call16777216, 16777216)
+	DISPATCH(runtime·call33554432, 33554432)
+	DISPATCH(runtime·call67108864, 67108864)
+	DISPATCH(runtime·call134217728, 134217728)
+	DISPATCH(runtime·call268435456, 268435456)
+	DISPATCH(runtime·call536870912, 536870912)
+	DISPATCH(runtime·call1073741824, 1073741824)
+	MOVD	$runtime·badreflectcall(SB), R5
+	BR	(R5)
+
+#define CALLFN(NAME,MAXSIZE)			\
+TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
+	NO_LOCAL_POINTERS;			\
+	/* copy arguments to stack */		\
+	MOVD	arg+16(FP), R3;			\
+	MOVWZ	argsize+24(FP), R4;			\
+	MOVD	R15, R5;				\
+	ADD	$(8-1), R5;			\
+	SUB	$1, R3;				\
+	ADD	R5, R4;				\
+	CMP	R5, R4;				\
+	BEQ	6(PC);				\
+	ADD	$1, R3;				\
+	ADD	$1, R5;				\
+	MOVBZ	0(R3), R6;			\
+	MOVBZ	R6, 0(R5);			\
+	BR	-6(PC);				\
+	/* call function */			\
+	MOVD	f+8(FP), R12;			\
+	MOVD	(R12), R8;			\
+	PCDATA  $PCDATA_StackMapIndex, $0;	\
+	BL	(R8);				\
+	/* copy return values back */		\
+	MOVD	arg+16(FP), R3;			\
+	MOVWZ	n+24(FP), R4;			\
+	MOVWZ	retoffset+28(FP), R6;		\
+	MOVD	R15, R5;				\
+	ADD	R6, R5; 			\
+	ADD	R6, R3;				\
+	SUB	R6, R4;				\
+	ADD	$(8-1), R5;			\
+	SUB	$1, R3;				\
+	ADD	R5, R4;				\
+loop:						\
+	CMP	R5, R4;				\
+	BEQ	end;				\
+	ADD	$1, R5;				\
+	ADD	$1, R3;				\
+	MOVBZ	0(R5), R6;			\
+	MOVBZ	R6, 0(R3);			\
+	BR	loop;				\
+end:						\
+	/* execute write barrier updates */	\
+	MOVD	argtype+0(FP), R7;		\
+	MOVD	arg+16(FP), R3;			\
+	MOVWZ	n+24(FP), R4;			\
+	MOVWZ	retoffset+28(FP), R6;		\
+	MOVD	R7, 8(R15);			\
+	MOVD	R3, 16(R15);			\
+	MOVD	R4, 24(R15);			\
+	MOVD	R6, 32(R15);			\
+	BL	runtime·callwritebarrier(SB);	\
+	RET
+
+CALLFN(·call32, 32)
+CALLFN(·call64, 64)
+CALLFN(·call128, 128)
+CALLFN(·call256, 256)
+CALLFN(·call512, 512)
+CALLFN(·call1024, 1024)
+CALLFN(·call2048, 2048)
+CALLFN(·call4096, 4096)
+CALLFN(·call8192, 8192)
+CALLFN(·call16384, 16384)
+CALLFN(·call32768, 32768)
+CALLFN(·call65536, 65536)
+CALLFN(·call131072, 131072)
+CALLFN(·call262144, 262144)
+CALLFN(·call524288, 524288)
+CALLFN(·call1048576, 1048576)
+CALLFN(·call2097152, 2097152)
+CALLFN(·call4194304, 4194304)
+CALLFN(·call8388608, 8388608)
+CALLFN(·call16777216, 16777216)
+CALLFN(·call33554432, 33554432)
+CALLFN(·call67108864, 67108864)
+CALLFN(·call134217728, 134217728)
+CALLFN(·call268435456, 268435456)
+CALLFN(·call536870912, 536870912)
+CALLFN(·call1073741824, 1073741824)
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+	RET
+
+// void jmpdefer(fv, sp);
+// called from deferreturn.
+// 1. grab stored LR for caller
+// 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
+// 3. BR to fn
+TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
+	MOVD	0(R15), R1
+	SUB	$6, R1, LR
+
+	MOVD	fv+0(FP), R12
+	MOVD	argp+8(FP), R15
+	SUB	$8, R15
+	MOVD	0(R12), R3
+	BR	(R3)
+
+// Save state of caller into g->sched. Smashes R31.
+TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
+	MOVD	LR, (g_sched+gobuf_pc)(g)
+	MOVD	R15, (g_sched+gobuf_sp)(g)
+	MOVD	$0, (g_sched+gobuf_lr)(g)
+	MOVD	$0, (g_sched+gobuf_ret)(g)
+	MOVD	$0, (g_sched+gobuf_ctxt)(g)
+	RET
+
+// func asmcgocall(fn, arg unsafe.Pointer) int32
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.go for more details.
+TEXT ·asmcgocall(SB),NOSPLIT,$0-20
+	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
+	// C TLS base pointer in AR0:AR1
+	MOVD	fn+0(FP), R3
+	MOVD	arg+8(FP), R4
+
+	MOVD	R15, R2		// save original stack pointer
+	MOVD	g, R5
+
+	// Figure out if we need to switch to m->g0 stack.
+	// We get called to create new OS threads too, and those
+	// come in on the m->g0 stack already.
+	MOVD	g_m(g), R6
+	MOVD	m_g0(R6), R6
+	CMPBEQ	R6, g, g0
+	BL	gosave<>(SB)
+	MOVD	R6, g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R15
+
+	// Now on a scheduling stack (a pthread-created stack).
+g0:
+	// Save room for two of our pointers, plus 160 bytes of callee
+	// save area that lives on the caller stack.
+	SUB	$176, R15
+	MOVD	$~7, R6
+	AND	R6, R15                 // 8-byte alignment for gcc ABI
+	MOVD	R5, 168(R15)             // save old g on stack
+	MOVD	(g_stack+stack_hi)(R5), R5
+	SUB	R2, R5
+	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
+	MOVD	R0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
+	MOVD	R4, R2                  // arg in R2
+	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
+
+	XOR	R0, R0                  // set R0 back to 0.
+	// Restore g, stack pointer.
+	MOVD	168(R15), g
+	BL	runtime·save_g(SB)
+	MOVD	(g_stack+stack_hi)(g), R5
+	MOVD	160(R15), R6
+	SUB	R6, R5
+	MOVD	R5, R15
+
+	MOVW	R2, ret+16(FP)
+	RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Turn the fn into a Go func (by taking its address) and call
+// cgocallback_gofunc.
+TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
+	MOVD	$fn+0(FP), R3
+	MOVD	R3, 8(R15)
+	MOVD	frame+8(FP), R3
+	MOVD	R3, 16(R15)
+	MOVD	framesize+16(FP), R3
+	MOVD	R3, 24(R15)
+	MOVD	$runtime·cgocallback_gofunc(SB), R3
+	BL	(R3)
+	RET
+
+// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
+// See cgocall.go for more details.
+TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-24
+	NO_LOCAL_POINTERS
+
+	// Load m and g from thread-local storage.
+	MOVB	runtime·iscgo(SB), R3
+	CMPBEQ	R3, $0, nocgo
+	BL	runtime·load_g(SB)
+
+nocgo:
+	// If g is nil, Go did not create the current thread.
+	// Call needm to obtain one for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call.
+	CMPBEQ	g, $0, needm
+
+	MOVD	g_m(g), R8
+	MOVD	R8, savedm-8(SP)
+	BR	havem
+
+needm:
+	MOVD	g, savedm-8(SP) // g is zero, so is m.
+	MOVD	$runtime·needm(SB), R3
+	BL	(R3)
+
+	// Set m->sched.sp = SP, so that if a panic happens
+	// during the function we are about to execute, it will
+	// have a valid SP to run on the g0 stack.
+	// The next few lines (after the havem label)
+	// will save this SP onto the stack and then write
+	// the same SP back to m->sched.sp. That seems redundant,
+	// but if an unrecovered panic happens, unwindm will
+	// restore the g->sched.sp from the stack location
+	// and then systemstack will try to use it. If we don't set it here,
+	// that restored SP will be uninitialized (typically 0) and
+	// will not be usable.
+	MOVD	g_m(g), R8
+	MOVD	m_g0(R8), R3
+	MOVD	R15, (g_sched+gobuf_sp)(R3)
+
+havem:
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
+	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
+	MOVD	m_g0(R8), R3
+	MOVD	(g_sched+gobuf_sp)(R3), R4
+	MOVD	R4, savedsp-16(SP)
+	MOVD	R15, (g_sched+gobuf_sp)(R3)
+
+	// Switch to m->curg stack and call runtime.cgocallbackg.
+	// Because we are taking over the execution of m->curg
+	// but *not* resuming what had been running, we need to
+	// save that information (m->curg->sched) so we can restore it.
+	// We can restore m->curg->sched.sp easily, because calling
+	// runtime.cgocallbackg leaves SP unchanged upon return.
+	// To save m->curg->sched.pc, we push it onto the stack.
+	// This has the added benefit that it looks to the traceback
+	// routine like cgocallbackg is going to return to that
+	// PC (because the frame we allocate below has the same
+	// size as cgocallback_gofunc's frame declared above)
+	// so that the traceback will seamlessly trace back into
+	// the earlier calls.
+	//
+	// In the new goroutine, -16(SP) and -8(SP) are unused.
+	MOVD	m_curg(R8), g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
+	MOVD	(g_sched+gobuf_pc)(g), R5
+	MOVD	R5, -24(R4)
+	MOVD	$-24(R4), R15
+	BL	runtime·cgocallbackg(SB)
+
+	// Restore g->sched (== m->curg->sched) from saved values.
+	MOVD	0(R15), R5
+	MOVD	R5, (g_sched+gobuf_pc)(g)
+	MOVD	$24(R15), R4
+	MOVD	R4, (g_sched+gobuf_sp)(g)
+
+	// Switch back to m->g0's stack and restore m->g0->sched.sp.
+	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
+	// so we do not have to restore it.)
+	MOVD	g_m(g), R8
+	MOVD	m_g0(R8), g
+	BL	runtime·save_g(SB)
+	MOVD	(g_sched+gobuf_sp)(g), R15
+	MOVD	savedsp-16(SP), R4
+	MOVD	R4, (g_sched+gobuf_sp)(g)
+
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	MOVD	savedm-8(SP), R6
+	CMPBNE	R6, $0, droppedm
+	MOVD	$runtime·dropm(SB), R3
+	BL	(R3)
+droppedm:
+
+	// Done!
+	RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-8
+	MOVD	gg+0(FP), g
+	// This only happens if iscgo, so jump straight to save_g
+	BL	runtime·save_g(SB)
+	RET
+
+// void setg_gcc(G*); set g in C TLS.
+// Must obey the gcc calling convention.
+TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
+	// The standard prologue clobbers LR (R14), which is callee-save in
+	// the C ABI, so we have to use NOFRAME and save LR ourselves.
+	MOVD	LR, R1
+	// Also save g, R10, and R11 since they're callee-save in C ABI
+	MOVD	R10, R3
+	MOVD	g, R4
+	MOVD	R11, R5
+
+	MOVD	R2, g
+	BL	runtime·save_g(SB)
+
+	MOVD	R5, R11
+	MOVD	R4, g
+	MOVD	R3, R10
+	MOVD	R1, LR
+	RET
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
+	MOVD	16(R15), R3		// LR saved by caller
+	MOVD	runtime·stackBarrierPC(SB), R4
+	CMPBNE	R3, R4, nobar
+	// Get original return PC.
+	BL	runtime·nextBarrierPC(SB)
+	MOVD	8(R15), R3
+nobar:
+	MOVD	R3, ret+8(FP)
+	RET
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16
+	MOVD	pc+8(FP), R3
+	MOVD	16(R15), R4
+	MOVD	runtime·stackBarrierPC(SB), R5
+	CMPBEQ	R4, R5, setbar
+	MOVD	R3, 16(R15)		// set LR in caller
+	RET
+setbar:
+	// Set the stack barrier return PC.
+	MOVD	R3, 8(R15)
+	BL	runtime·setNextBarrierPC(SB)
+	RET
+
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
+	MOVD	argp+0(FP), R3
+	SUB	$8, R3
+	MOVD	R3, ret+8(FP)
+	RET
+
+TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	(R0), R0
+	UNDEF
+
+// int64 runtime·cputicks(void)
+TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
+	// This means that since about 1972 the msb has been set, making the
+	// result of a call to STORE CLOCK (stck) a negative number.
+	// We clear the msb to make it positive.
+	STCK	ret+0(FP)      // serialises before and after call
+	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
+	SLD	$1, R3
+	SRD	$1, R3
+	MOVD	R3, ret+0(FP)
+	RET
+
+// memhash_varlen(p unsafe.Pointer, h seed) uintptr
+// redirects to memhash(p, h, size) using the size
+// stored in the closure.
+TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
+	GO_ARGS
+	NO_LOCAL_POINTERS
+	MOVD	p+0(FP), R3
+	MOVD	h+8(FP), R4
+	MOVD	8(R12), R5
+	MOVD	R3, 8(R15)
+	MOVD	R4, 16(R15)
+	MOVD	R5, 24(R15)
+	BL	runtime·memhash(SB)
+	MOVD	32(R15), R3
+	MOVD	R3, ret+16(FP)
+	RET
+
+// AES hashing not implemented for s390x
+TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	(R0), R15
+TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	(R0), R15
+TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	(R0), R15
+TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	(R0), R15
+
+// memequal(p, q unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOVD	p+0(FP), R3
+	MOVD	q+8(FP), R5
+	MOVD	size+16(FP), R6
+	LA	ret+24(FP), R7
+	BR	runtime·memeqbody(SB)
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
+	MOVD	a+0(FP), R3
+	MOVD	b+8(FP), R5
+	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
+	LA	ret+16(FP), R7
+	BR	runtime·memeqbody(SB)
+
+// eqstring tests whether two strings are equal.
+// The compiler guarantees that strings passed
+// to eqstring have equal length.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
+	MOVD	s1str+0(FP), R3
+	MOVD	s1len+8(FP), R6
+	MOVD	s2str+16(FP), R5
+	LA	ret+32(FP), R7
+	BR	runtime·memeqbody(SB)
+
+TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
+	MOVD	a_len+8(FP), R2
+	MOVD	b_len+32(FP), R6
+	MOVD	a+0(FP), R3
+	MOVD	b+24(FP), R5
+	LA	ret+48(FP), R7
+	CMPBNE	R2, R6, notequal
+	BR	runtime·memeqbody(SB)
+notequal:
+	MOVB	$0, ret+48(FP)
+	RET
+
+// input:
+//   R3 = a
+//   R5 = b
+//   R6 = len
+//   R7 = address of output byte (stores 0 or 1 here)
+//   a and b have the same length
+TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
+	CMPBEQ	R3, R5, equal
+loop:
+	CMPBEQ	R6, $0, equal
+	CMPBLT	R6, $32, tiny
+	CMP	R6, $256
+	BLT	tail
+	CLC	$256, 0(R3), 0(R5)
+	BNE	notequal
+	SUB	$256, R6
+	LA	256(R3), R3
+	LA	256(R5), R5
+	BR	loop
+tail:
+	SUB	$1, R6, R8
+	EXRL	$runtime·memeqbodyclc(SB), R8
+	BEQ	equal
+notequal:
+	MOVB	$0, 0(R7)
+	RET
+equal:
+	MOVB	$1, 0(R7)
+	RET
+tiny:
+	MOVD	$0, R2
+	CMPBLT	R6, $16, lt16
+	MOVD	0(R3), R8
+	MOVD	0(R5), R9
+	CMPBNE	R8, R9, notequal
+	MOVD	8(R3), R8
+	MOVD	8(R5), R9
+	CMPBNE	R8, R9, notequal
+	LA	16(R2), R2
+	SUB	$16, R6
+lt16:
+	CMPBLT	R6, $8, lt8
+	MOVD	0(R3)(R2*1), R8
+	MOVD	0(R5)(R2*1), R9
+	CMPBNE	R8, R9, notequal
+	LA	8(R2), R2
+	SUB	$8, R6
+lt8:
+	CMPBLT	R6, $4, lt4
+	MOVWZ	0(R3)(R2*1), R8
+	MOVWZ	0(R5)(R2*1), R9
+	CMPBNE	R8, R9, notequal
+	LA	4(R2), R2
+	SUB	$4, R6
+lt4:
+#define CHECK(n) \
+	CMPBEQ	R6, $n, equal \
+	MOVB	n(R3)(R2*1), R8 \
+	MOVB	n(R5)(R2*1), R9 \
+	CMPBNE	R8, R9, notequal
+	CHECK(0)
+	CHECK(1)
+	CHECK(2)
+	CHECK(3)
+	BR	equal
+
+TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
+	CLC	$1, 0(R3), 0(R5)
+	RET
+
+TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
+	MOVD	g_m(g), R4
+	MOVWZ	m_fastrand(R4), R3
+	ADD	R3, R3
+	CMPW	R3, $0
+	BGE	2(PC)
+	XOR	$0x88888eef, R3
+	MOVW	R3, m_fastrand(R4)
+	MOVW	R3, ret+0(FP)
+	RET
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
+	MOVD	s+0(FP), R3     // s => R3
+	MOVD	s_len+8(FP), R4 // s_len => R4
+	MOVBZ	c+24(FP), R5    // c => R5
+	MOVD	$ret+32(FP), R2 // &ret => R9
+	BR	runtime·indexbytebody(SB)
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0-32
+	MOVD	s+0(FP), R3     // s => R3
+	MOVD	s_len+8(FP), R4 // s_len => R4
+	MOVBZ	c+16(FP), R5    // c => R5
+	MOVD	$ret+24(FP), R2 // &ret => R9
+	BR	runtime·indexbytebody(SB)
+
+// input:
+// R3: s
+// R4: s_len
+// R5: c -- byte sought
+// R2: &ret -- address to put index into
+TEXT runtime·indexbytebody(SB),NOSPLIT,$0
+	CMPBEQ	R4, $0, notfound
+	MOVD	R3, R6          // store base for later
+	ADD	R3, R4, R8      // the address after the end of the string
+	//if the length is small, use loop; otherwise, use vector or srst search
+	CMPBGE	R4, $16, large
+
+residual:
+	CMPBEQ	R3, R8, notfound
+	MOVBZ	0(R3), R7
+	LA	1(R3), R3
+	CMPBNE	R7, R5, residual
+
+found:
+	SUB	R6, R3
+	SUB	$1, R3
+	MOVD	R3, 0(R2)
+	RET
+
+notfound:
+	MOVD	$-1, 0(R2)
+	RET
+
+large:
+	MOVB	runtime·vectorfacility(SB), R1
+	CMPBEQ	R1, $-1, checkvector	// vectorfacility = -1, vector not checked yet
+vectorchecked:
+	CMPBEQ	R1, $1, vectorimpl      // vectorfacility = 1, vector supported
+
+srstimpl:                       // vectorfacility != 1, not support or enable vector
+	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
+srstloop:
+	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
+	BVS	srstloop        // interrupted - continue
+	BGT	notfoundr0
+foundr0:
+	XOR	R0, R0          // reset R0
+	SUB	R6, R8          // remove base
+	MOVD	R8, 0(R2)
+	RET
+notfoundr0:
+	XOR	R0, R0          // reset R0
+	MOVD	$-1, 0(R2)
+	RET
+
+vectorimpl:
+	//if the address is not 16byte aligned, use loop for the header
+	AND	$15, R3, R8
+	CMPBGT	R8, $0, notaligned
+
+aligned:
+	ADD	R6, R4, R8
+	AND	$-16, R8, R7
+	// replicate c across V17
+	VLVGB	$0, R5, V19
+	VREPB	$0, V19, V17
+
+vectorloop:
+	CMPBGE	R3, R7, residual
+	VL	0(R3), V16    // load string to be searched into V16
+	ADD	$16, R3
+	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
+	BVS	vectorloop
+
+	// when vector search found c in the string
+	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
+	SUB	$16, R3
+	SUB	R6, R3
+	ADD	R3, R7
+	MOVD	R7, 0(R2)
+	RET
+
+notaligned:
+	AND	$-16, R3, R8
+	ADD     $16, R8
+notalignedloop:
+	CMPBEQ	R3, R8, aligned
+	MOVBZ	0(R3), R7
+	LA	1(R3), R3
+	CMPBNE	R7, R5, notalignedloop
+	BR	found
+
+checkvector:
+	CALL	runtime·checkvectorfacility(SB)
+	MOVB    runtime·vectorfacility(SB), R1
+	BR	vectorchecked
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+	MOVW	$0, R3
+	RET
+
+// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
+// Must obey the gcc calling convention.
+TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
+	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
+	MOVD	g, R1
+	MOVD	R10, R3
+	MOVD	LR, R4
+	MOVD	R11, R5
+
+	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
+	MOVD	g_m(g), R2
+	MOVD	m_curg(R2), R2
+	MOVD	(g_stack+stack_hi)(R2), R2
+
+	MOVD	R1, g
+	MOVD	R3, R10
+	MOVD	R4, LR
+	MOVD	R5, R11
+	RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
+	BYTE $0x07; BYTE $0x00; // 2-byte nop
+	BL	runtime·goexit1(SB)	// does not return
+	// traceback from goexit1 must hit code range of goexit
+	BYTE $0x07; BYTE $0x00; // 2-byte nop
+
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
+	RET
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
+	RET
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
+	RET
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
+	RET
+
+TEXT runtime·sigreturn(SB),NOSPLIT,$0-8
+	RET
+
+TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
+	SYNC
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	s1_base+0(FP), R3
+	MOVD	s1_len+8(FP), R4
+	MOVD	s2_base+16(FP), R5
+	MOVD	s2_len+24(FP), R6
+	LA	ret+32(FP), R7
+	BR	runtime·cmpbody(SB)
+
+TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
+	MOVD	s1+0(FP), R3
+	MOVD	s1+8(FP), R4
+	MOVD	s2+24(FP), R5
+	MOVD	s2+32(FP), R6
+	LA	res+48(FP), R7
+	BR	runtime·cmpbody(SB)
+
+// input:
+//   R3 = a
+//   R4 = alen
+//   R5 = b
+//   R6 = blen
+//   R7 = address of output word (stores -1/0/1 here)
+TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
+	CMPBEQ	R3, R5, cmplengths
+	MOVD	R4, R8
+	CMPBLE	R4, R6, amin
+	MOVD	R6, R8
+amin:
+	CMPBEQ	R8, $0, cmplengths
+	CMP	R8, $256
+	BLE	tail
+loop:
+	CLC	$256, 0(R3), 0(R5)
+	BGT	gt
+	BLT	lt
+	SUB	$256, R8
+	CMP	R8, $256
+	BGT	loop
+tail:
+	SUB	$1, R8
+	EXRL	$runtime·cmpbodyclc(SB), R8
+	BGT	gt
+	BLT	lt
+cmplengths:
+	CMP	R4, R6
+	BEQ	eq
+	BLT	lt
+gt:
+	MOVD	$1, 0(R7)
+	RET
+lt:
+	MOVD	$-1, 0(R7)
+	RET
+eq:
+	MOVD	$0, 0(R7)
+	RET
+
+TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
+	CLC	$1, 0(R3), 0(R5)
+	RET
+
+// This is called from .init_array and follows the platform, not Go, ABI.
+// We are overly conservative. We could only save the registers we use.
+// However, since this function is only called once per loaded module
+// performance is unimportant.
+TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
+	// Save R6-R15, F0, F2, F4 and F6 in the
+	// register save area of the calling function
+	STMG	R6, R15, 48(R15)
+	FMOVD	F0, 128(R15)
+	FMOVD	F2, 136(R15)
+	FMOVD	F4, 144(R15)
+	FMOVD	F6, 152(R15)
+
+	// append the argument (passed in R2, as per the ELF ABI) to the
+	// moduledata linked list.
+	MOVD	runtime·lastmoduledatap(SB), R1
+	MOVD	R2, moduledata_next(R1)
+	MOVD	R2, runtime·lastmoduledatap(SB)
+
+	// Restore R6-R15, F0, F2, F4 and F6
+	LMG	48(R15), R6, R15
+	FMOVD	F0, 128(R15)
+	FMOVD	F2, 136(R15)
+	FMOVD	F4, 144(R15)
+	FMOVD	F6, 152(R15)
+	RET
+
+TEXT ·checkASM(SB),NOSPLIT,$0-1
+	MOVB	$1, ret+0(FP)
+	RET
diff --git a/src/runtime/defs_linux_s390x.go b/src/runtime/defs_linux_s390x.go
new file mode 100644
index 0000000000..5f55d5a889
--- /dev/null
+++ b/src/runtime/defs_linux_s390x.go
@@ -0,0 +1,167 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EAGAIN = 0xb
+	_ENOMEM = 0xc
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x20
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED   = 0x4
+	_MADV_HUGEPAGE   = 0xe
+	_MADV_NOHUGEPAGE = 0xf
+
+	_SA_RESTART = 0x10000000
+	_SA_ONSTACK = 0x8000000
+	_SA_SIGINFO = 0x4
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGBUS    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGUSR1   = 0xa
+	_SIGSEGV   = 0xb
+	_SIGUSR2   = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGSTKFLT = 0x10
+	_SIGCHLD   = 0x11
+	_SIGCONT   = 0x12
+	_SIGSTOP   = 0x13
+	_SIGTSTP   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGURG    = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGIO     = 0x1d
+	_SIGPWR    = 0x1e
+	_SIGSYS    = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EPOLLIN       = 0x1
+	_EPOLLOUT      = 0x4
+	_EPOLLERR      = 0x8
+	_EPOLLHUP      = 0x10
+	_EPOLLRDHUP    = 0x2000
+	_EPOLLET       = 0x80000000
+	_EPOLL_CLOEXEC = 0x80000
+	_EPOLL_CTL_ADD = 0x1
+	_EPOLL_CTL_DEL = 0x2
+	_EPOLL_CTL_MOD = 0x3
+)
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+func (ts *timespec) set_sec(x int64) {
+	ts.tv_sec = x
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = int64(x)
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type sigactiont struct {
+	sa_handler  uintptr
+	sa_flags    uint64
+	sa_restorer uintptr
+	sa_mask     uint64
+}
+
+type siginfo struct {
+	si_signo int32
+	si_errno int32
+	si_code  int32
+	// below here is a union; si_addr is the only field we use
+	si_addr uint64
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type epollevent struct {
+	events    uint32
+	pad_cgo_0 [4]byte
+	data      [8]byte // unaligned uintptr
+}
+
+const (
+	_O_RDONLY    = 0x0
+	_O_CLOEXEC   = 0x80000
+	_SA_RESTORER = 0
+)
+
+type sigaltstackt struct {
+	ss_sp    *byte
+	ss_flags int32
+	ss_size  uintptr
+}
+
+type sigcontext struct {
+	psw_mask uint64
+	psw_addr uint64
+	gregs    [16]uint64
+	aregs    [16]uint32
+	fpc      uint32
+	fpregs   [16]uint64
+}
+
+type ucontext struct {
+	uc_flags    uint64
+	uc_link     *ucontext
+	uc_stack    sigaltstackt
+	uc_mcontext sigcontext
+	uc_sigmask  uint64
+}
diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
index 07c2a141f0..8180b0a248 100644
--- a/src/runtime/lfstack_64bit.go
+++ b/src/runtime/lfstack_64bit.go
@@ -2,26 +2,32 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build arm64 mips64 mips64le ppc64 ppc64le
+// +build arm64 mips64 mips64le ppc64 ppc64le s390x
 
 package runtime
 
 import "unsafe"
 
-// On ppc64, Linux limits the user address space to 46 bits (see
-// TASK_SIZE_USER64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// On mips64, Linux limits the user address space to 40 bits (see
-// TASK_SIZE64 in the Linux kernel).  This has grown over time,
-// so here we allow 48 bit addresses.
-//
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
 const (
+	// addrBits is the number of bits needed to represent a virtual address.
+	//
+	// In Linux the user address space for each architecture is limited as
+	// follows (taken from the processor.h file for the architecture):
+	//
+	// Architecture  Name              Maximum Value (exclusive)
+	// ---------------------------------------------------------------------
+	// arm64         TASK_SIZE_64      Depends on configuration.
+	// ppc64{,le}    TASK_SIZE_USER64  0x400000000000UL (46 bit addresses)
+	// mips64{,le}   TASK_SIZE64       0x010000000000UL (40 bit addresses)
+	// s390x         TASK_SIZE         0x020000000000UL (41 bit addresses)
+	//
+	// These values may increase over time.
 	addrBits = 48
-	cntBits  = 64 - addrBits + 3
+
+	// In addition to the 16 bits taken from the top, we can take 3 from the
+	// bottom, because node must be pointer-aligned, giving a total of 19 bits
+	// of count.
+	cntBits = 64 - addrBits + 3
 )
 
 func lfstackPack(node *lfnode, cnt uintptr) uint64 {
diff --git a/src/runtime/memclr_s390x.s b/src/runtime/memclr_s390x.s
new file mode 100644
index 0000000000..86eafec0a9
--- /dev/null
+++ b/src/runtime/memclr_s390x.s
@@ -0,0 +1,122 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB),NOSPLIT|NOFRAME,$0-16
+	MOVD	ptr+0(FP), R4
+	MOVD	n+8(FP), R5
+
+start:
+	CMPBLE	R5, $3, clear0to3
+	CMPBLE	R5, $7, clear4to7
+	CMPBLE	R5, $11, clear8to11
+	CMPBLE	R5, $15, clear12to15
+	CMP	R5, $32
+	BGE	clearmt32
+	MOVD	R0, 0(R4)
+	MOVD	R0, 8(R4)
+	ADD	$16, R4
+	SUB	$16, R5
+	BR	start
+
+clear0to3:
+	CMPBEQ	R5, $0, done
+	CMPBNE	R5, $1, clear2
+	MOVB	R0, 0(R4)
+	RET
+clear2:
+	CMPBNE	R5, $2, clear3
+	MOVH	R0, 0(R4)
+	RET
+clear3:
+	MOVH	R0, 0(R4)
+	MOVB	R0, 2(R4)
+	RET
+
+clear4to7:
+	CMPBNE	R5, $4, clear5
+	MOVW	R0, 0(R4)
+	RET
+clear5:
+	CMPBNE	R5, $5, clear6
+	MOVW	R0, 0(R4)
+	MOVB	R0, 4(R4)
+	RET
+clear6:
+	CMPBNE	R5, $6, clear7
+	MOVW	R0, 0(R4)
+	MOVH	R0, 4(R4)
+	RET
+clear7:
+	MOVW	R0, 0(R4)
+	MOVH	R0, 4(R4)
+	MOVB	R0, 6(R4)
+	RET
+
+clear8to11:
+	CMPBNE	R5, $8, clear9
+	MOVD	R0, 0(R4)
+	RET
+clear9:
+	CMPBNE	R5, $9, clear10
+	MOVD	R0, 0(R4)
+	MOVB	R0, 8(R4)
+	RET
+clear10:
+	CMPBNE	R5, $10, clear11
+	MOVD	R0, 0(R4)
+	MOVH	R0, 8(R4)
+	RET
+clear11:
+	MOVD	R0, 0(R4)
+	MOVH	R0, 8(R4)
+	MOVB	R0, 10(R4)
+	RET
+
+clear12to15:
+	CMPBNE	R5, $12, clear13
+	MOVD	R0, 0(R4)
+	MOVW	R0, 8(R4)
+	RET
+clear13:
+	CMPBNE	R5, $13, clear14
+	MOVD	R0, 0(R4)
+	MOVW	R0, 8(R4)
+	MOVB	R0, 12(R4)
+	RET
+clear14:
+	CMPBNE	R5, $14, clear15
+	MOVD	R0, 0(R4)
+	MOVW	R0, 8(R4)
+	MOVH	R0, 12(R4)
+	RET
+clear15:
+	MOVD	R0, 0(R4)
+	MOVW	R0, 8(R4)
+	MOVH	R0, 12(R4)
+	MOVB	R0, 14(R4)
+	RET
+
+clearmt32:
+	CMP	R5, $256
+	BLT	clearlt256
+	XC	$256, 0(R4), 0(R4)
+	ADD	$256, R4
+	ADD	$-256, R5
+	BR	clearmt32
+clearlt256:
+	CMPBEQ	R5, $0, done
+	ADD	$-1, R5
+	EXRL	$runtime·memclr_s390x_exrl_xc(SB), R5
+done:
+	RET
+
+// DO NOT CALL - target for exrl (execute relative long) instruction.
+TEXT runtime·memclr_s390x_exrl_xc(SB),NOSPLIT|NOFRAME,$0-0
+	XC	$1, 0(R4), 0(R4)
+	MOVD	R0, 0(R0)
+	RET
+
diff --git a/src/runtime/memmove_s390x.s b/src/runtime/memmove_s390x.s
new file mode 100644
index 0000000000..238f30891d
--- /dev/null
+++ b/src/runtime/memmove_s390x.s
@@ -0,0 +1,189 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// void runtime·memmove(void*, void*, uintptr)
+TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24
+	MOVD	to+0(FP), R6
+	MOVD	from+8(FP), R4
+	MOVD	n+16(FP), R5
+
+	CMPBEQ	R6, R4, done
+
+start:
+	CMPBLE	R5, $3, move0to3
+	CMPBLE	R5, $7, move4to7
+	CMPBLE	R5, $11, move8to11
+	CMPBLE	R5, $15, move12to15
+	CMPBNE	R5, $16, movemt16
+	MOVD	0(R4), R7
+	MOVD	8(R4), R8
+	MOVD	R7, 0(R6)
+	MOVD	R8, 8(R6)
+	RET
+
+movemt16:
+	CMPBGT	R4, R6, forwards
+	ADD	R5, R4, R7
+	CMPBLE	R7, R6, forwards
+	ADD	R5, R6, R8
+backwards:
+	MOVD	-8(R7), R3
+	MOVD	R3, -8(R8)
+	MOVD	-16(R7), R3
+	MOVD	R3, -16(R8)
+	ADD	$-16, R5
+	ADD	$-16, R7
+	ADD	$-16, R8
+	CMP	R5, $16
+	BGE	backwards
+	BR	start
+
+forwards:
+	CMPBGT	R5, $64, forwards_fast
+	MOVD	0(R4), R3
+	MOVD	R3, 0(R6)
+	MOVD	8(R4), R3
+	MOVD	R3, 8(R6)
+	ADD	$16, R4
+	ADD	$16, R6
+	ADD	$-16, R5
+	CMP	R5, $16
+	BGE	forwards
+	BR	start
+
+forwards_fast:
+	CMP	R5, $256
+	BLE	forwards_small
+	MVC	$256, 0(R4), 0(R6)
+	ADD	$256, R4
+	ADD	$256, R6
+	ADD	$-256, R5
+	BR	forwards_fast
+
+forwards_small:
+	CMPBEQ	R5, $0, done
+	ADD	$-1, R5
+	EXRL	$runtime·memmove_s390x_exrl_mvc(SB), R5
+	RET
+
+move0to3:
+	CMPBEQ	R5, $0, done
+move1:
+	CMPBNE	R5, $1, move2
+	MOVB	0(R4), R3
+	MOVB	R3, 0(R6)
+	RET
+move2:
+	CMPBNE	R5, $2, move3
+	MOVH	0(R4), R3
+	MOVH	R3, 0(R6)
+	RET
+move3:
+	MOVH	0(R4), R3
+	MOVB	2(R4), R7
+	MOVH	R3, 0(R6)
+	MOVB	R7, 2(R6)
+	RET
+
+move4to7:
+	CMPBNE	R5, $4, move5
+	MOVW	0(R4), R3
+	MOVW	R3, 0(R6)
+	RET
+move5:
+	CMPBNE	R5, $5, move6
+	MOVW	0(R4), R3
+	MOVB	4(R4), R7
+	MOVW	R3, 0(R6)
+	MOVB	R7, 4(R6)
+	RET
+move6:
+	CMPBNE	R5, $6, move7
+	MOVW	0(R4), R3
+	MOVH	4(R4), R7
+	MOVW	R3, 0(R6)
+	MOVH	R7, 4(R6)
+	RET
+move7:
+	MOVW	0(R4), R3
+	MOVH	4(R4), R7
+	MOVB	6(R4), R8
+	MOVW	R3, 0(R6)
+	MOVH	R7, 4(R6)
+	MOVB	R8, 6(R6)
+	RET
+
+move8to11:
+	CMPBNE	R5, $8, move9
+	MOVD	0(R4), R3
+	MOVD	R3, 0(R6)
+	RET
+move9:
+	CMPBNE	R5, $9, move10
+	MOVD	0(R4), R3
+	MOVB	8(R4), R7
+	MOVD	R3, 0(R6)
+	MOVB	R7, 8(R6)
+	RET
+move10:
+	CMPBNE	R5, $10, move11
+	MOVD	0(R4), R3
+	MOVH	8(R4), R7
+	MOVD	R3, 0(R6)
+	MOVH	R7, 8(R6)
+	RET
+move11:
+	MOVD	0(R4), R3
+	MOVH	8(R4), R7
+	MOVB	10(R4), R8
+	MOVD	R3, 0(R6)
+	MOVH	R7, 8(R6)
+	MOVB	R8, 10(R6)
+	RET
+
+move12to15:
+	CMPBNE	R5, $12, move13
+	MOVD	0(R4), R3
+	MOVW	8(R4), R7
+	MOVD	R3, 0(R6)
+	MOVW	R7, 8(R6)
+	RET
+move13:
+	CMPBNE	R5, $13, move14
+	MOVD	0(R4), R3
+	MOVW	8(R4), R7
+	MOVB	12(R4), R8
+	MOVD	R3, 0(R6)
+	MOVW	R7, 8(R6)
+	MOVB	R8, 12(R6)
+	RET
+move14:
+	CMPBNE	R5, $14, move15
+	MOVD	0(R4), R3
+	MOVW	8(R4), R7
+	MOVH	12(R4), R8
+	MOVD	R3, 0(R6)
+	MOVW	R7, 8(R6)
+	MOVH	R8, 12(R6)
+	RET
+move15:
+	MOVD	0(R4), R3
+	MOVW	8(R4), R7
+	MOVH	12(R4), R8
+	MOVB	14(R4), R10
+	MOVD	R3, 0(R6)
+	MOVW	R7, 8(R6)
+	MOVH	R8, 12(R6)
+	MOVB	R10, 14(R6)
+done:
+	RET
+
+// DO NOT CALL - target for exrl (execute relative long) instruction.
+TEXT runtime·memmove_s390x_exrl_mvc(SB),NOSPLIT|NOFRAME,$0-0
+	MVC	$1, 0(R4), 0(R6)
+	MOVD	R0, 0(R0)
+	RET
+
diff --git a/src/runtime/os_linux_s390x.go b/src/runtime/os_linux_s390x.go
new file mode 100644
index 0000000000..e659dff716
--- /dev/null
+++ b/src/runtime/os_linux_s390x.go
@@ -0,0 +1,46 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_SS_DISABLE  = 2
+	_NSIG        = 65
+	_SI_USER     = 0
+	_SIG_BLOCK   = 0
+	_SIG_UNBLOCK = 1
+	_SIG_SETMASK = 2
+	_RLIMIT_AS   = 9
+)
+
+type sigset uint64
+
+type rlimit struct {
+	rlim_cur uintptr
+	rlim_max uintptr
+}
+
+var sigset_all = sigset(^uint64(0))
+
+func sigaddset(mask *sigset, i int) {
+	if i > 64 {
+		throw("unexpected signal greater than 64")
+	}
+	*mask |= 1 << (uint(i) - 1)
+}
+
+func sigdelset(mask *sigset, i int) {
+	if i > 64 {
+		throw("unexpected signal greater than 64")
+	}
+	*mask &^= 1 << (uint(i) - 1)
+}
+
+func sigfillset(mask *uint64) {
+	*mask = ^uint64(0)
+}
+
+func sigcopyset(mask *sigset, m sigmask) {
+	*mask = sigset(uint64(m[0]) | uint64(m[1])<<32)
+}
diff --git a/src/runtime/rt0_linux_s390x.s b/src/runtime/rt0_linux_s390x.s
new file mode 100644
index 0000000000..aedd6c7ef2
--- /dev/null
+++ b/src/runtime/rt0_linux_s390x.s
@@ -0,0 +1,20 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_s390x_linux(SB),NOSPLIT|NOFRAME,$0
+	// In a statically linked binary, the stack contains argc,
+	// argv as argc string pointers followed by a NULL, envv as a
+	// sequence of string pointers followed by a NULL, and auxv.
+	// There is no TLS base pointer.
+	//
+	// TODO: Support dynamic linking entry point
+	MOVD 0(R15), R2 // argc
+	ADD $8, R15, R3 // argv
+	BR main(SB)
+
+TEXT main(SB),NOSPLIT|NOFRAME,$0
+	MOVD	$runtime·rt0_go(SB), R11
+	BR	R11
diff --git a/src/runtime/signal_linux_s390x.go b/src/runtime/signal_linux_s390x.go
new file mode 100644
index 0000000000..155d3a326f
--- /dev/null
+++ b/src/runtime/signal_linux_s390x.go
@@ -0,0 +1,208 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext {
+	return (*sigcontext)(unsafe.Pointer(&(*ucontext)(c.ctxt).uc_mcontext))
+}
+func (c *sigctxt) r0() uint64      { return c.regs().gregs[0] }
+func (c *sigctxt) r1() uint64      { return c.regs().gregs[1] }
+func (c *sigctxt) r2() uint64      { return c.regs().gregs[2] }
+func (c *sigctxt) r3() uint64      { return c.regs().gregs[3] }
+func (c *sigctxt) r4() uint64      { return c.regs().gregs[4] }
+func (c *sigctxt) r5() uint64      { return c.regs().gregs[5] }
+func (c *sigctxt) r6() uint64      { return c.regs().gregs[6] }
+func (c *sigctxt) r7() uint64      { return c.regs().gregs[7] }
+func (c *sigctxt) r8() uint64      { return c.regs().gregs[8] }
+func (c *sigctxt) r9() uint64      { return c.regs().gregs[9] }
+func (c *sigctxt) r10() uint64     { return c.regs().gregs[10] }
+func (c *sigctxt) r11() uint64     { return c.regs().gregs[11] }
+func (c *sigctxt) r12() uint64     { return c.regs().gregs[12] }
+func (c *sigctxt) r13() uint64     { return c.regs().gregs[13] }
+func (c *sigctxt) r14() uint64     { return c.regs().gregs[14] }
+func (c *sigctxt) r15() uint64     { return c.regs().gregs[15] }
+func (c *sigctxt) link() uint64    { return c.regs().gregs[14] }
+func (c *sigctxt) sp() uint64      { return c.regs().gregs[15] }
+func (c *sigctxt) pc() uint64      { return c.regs().psw_addr }
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return c.info.si_addr }
+
+func (c *sigctxt) set_r0(x uint64)      { c.regs().gregs[0] = x }
+func (c *sigctxt) set_r13(x uint64)     { c.regs().gregs[13] = x }
+func (c *sigctxt) set_link(x uint64)    { c.regs().gregs[14] = x }
+func (c *sigctxt) set_sp(x uint64)      { c.regs().gregs[15] = x }
+func (c *sigctxt) set_pc(x uint64)      { c.regs().psw_addr = x }
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) {
+	*(*uintptr)(add(unsafe.Pointer(c.info), 2*sys.PtrSize)) = uintptr(x)
+}
+
+func dumpregs(c *sigctxt) {
+	print("r0   ", hex(c.r0()), "\t")
+	print("r1   ", hex(c.r1()), "\n")
+	print("r2   ", hex(c.r2()), "\t")
+	print("r3   ", hex(c.r3()), "\n")
+	print("r4   ", hex(c.r4()), "\t")
+	print("r5   ", hex(c.r5()), "\n")
+	print("r6   ", hex(c.r6()), "\t")
+	print("r7   ", hex(c.r7()), "\n")
+	print("r8   ", hex(c.r8()), "\t")
+	print("r9   ", hex(c.r9()), "\n")
+	print("r10  ", hex(c.r10()), "\t")
+	print("r11  ", hex(c.r11()), "\n")
+	print("r12  ", hex(c.r12()), "\t")
+	print("r13  ", hex(c.r13()), "\n")
+	print("r14  ", hex(c.r14()), "\t")
+	print("r15  ", hex(c.r15()), "\n")
+	print("pc   ", hex(c.pc()), "\t")
+	print("link ", hex(c.link()), "\n")
+}
+
+var crashing int32
+
+// May run during STW, so write barriers are not allowed.
+//
+//go:nowritebarrierrec
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
+	_g_ := getg()
+	c := &sigctxt{info, ctxt}
+
+	if sig == _SIGPROF {
+		sigprof(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.link()), gp, _g_.m)
+		return
+	}
+	flags := int32(_SigThrow)
+	if sig < uint32(len(sigtable)) {
+		flags = sigtable[sig].flags
+	}
+	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp.sig = sig
+		gp.sigcode0 = uintptr(c.sigcode())
+		gp.sigcode1 = uintptr(c.sigaddr())
+		gp.sigpc = uintptr(c.pc())
+
+		// We arrange link, and pc to pretend the panicking
+		// function calls sigpanic directly.
+		// Always save LINK to stack so that panics in leaf
+		// functions are correctly handled. This smashes
+		// the stack frame but we're not going back there
+		// anyway.
+		sp := c.sp() - sys.MinFrameSize
+		c.set_sp(sp)
+		*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
+
+		pc := uintptr(gp.sigpc)
+
+		// If we don't recognize the PC as code
+		// but we do recognize the link register as code,
+		// then assume this was a call to non-code and treat like
+		// pc == 0, to make unwinding show the context.
+		if pc != 0 && findfunc(pc) == nil && findfunc(uintptr(c.link())) != nil {
+			pc = 0
+		}
+
+		// Don't bother saving PC if it's zero, which is
+		// probably a call to a nil func: the old link register
+		// is more useful in the stack trace.
+		if pc != 0 {
+			c.set_link(uint64(pc))
+		}
+
+		// In case we are panicking from external C code
+		c.set_r0(0)
+		c.set_r13(uint64(uintptr(unsafe.Pointer(gp))))
+		c.set_pc(uint64(funcPC(sigpanic)))
+		return
+	}
+
+	if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+		if sigsend(sig) {
+			return
+		}
+	}
+
+	if c.sigcode() == _SI_USER && signal_ignored(sig) {
+		return
+	}
+
+	if flags&_SigKill != 0 {
+		dieFromSignal(int32(sig))
+	}
+
+	if flags&_SigThrow == 0 {
+		return
+	}
+
+	_g_.m.throwing = 1
+	_g_.m.caughtsig.set(gp)
+
+	if crashing == 0 {
+		startpanic()
+	}
+
+	if sig < uint32(len(sigtable)) {
+		print(sigtable[sig].name, "\n")
+	} else {
+		print("Signal ", sig, "\n")
+	}
+
+	print("PC=", hex(c.pc()), " m=", _g_.m.id, "\n")
+	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+		print("signal arrived during cgo execution\n")
+		gp = _g_.m.lockedg
+	}
+	print("\n")
+
+	level, _, docrash := gotraceback()
+	if level > 0 {
+		goroutineheader(gp)
+		tracebacktrap(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.link()), gp)
+		if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+			// tracebackothers on original m skipped this one; trace it now.
+			goroutineheader(_g_.m.curg)
+			traceback(^uintptr(0), ^uintptr(0), 0, gp)
+		} else if crashing == 0 {
+			tracebackothers(gp)
+			print("\n")
+		}
+		dumpregs(c)
+	}
+
+	if docrash {
+		crashing++
+		if crashing < sched.mcount {
+			// There are other m's that need to dump their stacks.
+			// Relay SIGQUIT to the next m by sending it to the current process.
+			// All m's that have already received SIGQUIT have signal masks blocking
+			// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+			// When the last m receives the SIGQUIT, it will fall through to the call to
+			// crash below. Just in case the relaying gets botched, each m involved in
+			// the relay sleeps for 5 seconds and then does the crash/exit itself.
+			// In expected operation, the last m has received the SIGQUIT and run
+			// crash/exit and the process is gone, all long before any of the
+			// 5-second sleeps have finished.
+			print("\n-----\n\n")
+			raiseproc(_SIGQUIT)
+			usleep(5 * 1000 * 1000)
+		}
+		crash()
+	}
+
+	exit(2)
+}
diff --git a/src/runtime/sys_linux_s390x.s b/src/runtime/sys_linux_s390x.s
new file mode 100644
index 0000000000..f43792bd51
--- /dev/null
+++ b/src/runtime/sys_linux_s390x.s
@@ -0,0 +1,440 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// System calls and other system stuff for Linux s390x; see
+// /usr/include/asm/unistd.h for the syscall number definitions.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "textflag.h"
+
+#define SYS_exit                  1
+#define SYS_read                  3
+#define SYS_write                 4
+#define SYS_open                  5
+#define SYS_close                 6
+#define SYS_getpid               20
+#define SYS_kill                 37
+#define SYS_fcntl                55
+#define SYS_gettimeofday         78
+#define SYS_mmap                 90
+#define SYS_munmap               91
+#define SYS_setitimer           104
+#define SYS_clone               120
+#define SYS_select              142
+#define SYS_sched_yield         158
+#define SYS_rt_sigreturn        173
+#define SYS_rt_sigaction        174
+#define SYS_rt_sigprocmask      175
+#define SYS_sigaltstack         186
+#define SYS_ugetrlimit          191
+#define SYS_madvise             219
+#define SYS_mincore             218
+#define SYS_gettid              236
+#define SYS_tkill               237
+#define SYS_futex               238
+#define SYS_sched_getaffinity   240
+#define SYS_exit_group          248
+#define SYS_epoll_create        249
+#define SYS_epoll_ctl           250
+#define SYS_epoll_wait          251
+#define SYS_clock_gettime       260
+#define SYS_epoll_create1       327
+
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
+	MOVW	code+0(FP), R2
+	MOVW	$SYS_exit_group, R1
+	SYSCALL
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4
+	MOVW	code+0(FP), R2
+	MOVW	$SYS_exit, R1
+	SYSCALL
+	RET
+
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
+	MOVD	name+0(FP), R2
+	MOVW	mode+8(FP), R3
+	MOVW	perm+12(FP), R4
+	MOVW	$SYS_open, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVW	$-1, R2
+	MOVW	R2, ret+16(FP)
+	RET
+
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
+	MOVW	fd+0(FP), R2
+	MOVW	$SYS_close, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVW	$-1, R2
+	MOVW	R2, ret+8(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
+	MOVD	fd+0(FP), R2
+	MOVD	p+8(FP), R3
+	MOVW	n+16(FP), R4
+	MOVW	$SYS_write, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVW	$-1, R2
+	MOVW	R2, ret+24(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
+	MOVW	fd+0(FP), R2
+	MOVD	p+8(FP), R3
+	MOVW	n+16(FP), R4
+	MOVW	$SYS_read, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVW	$-1, R2
+	MOVW	R2, ret+24(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT|NOFRAME,$0-20
+	MOVW	kind+0(FP), R2
+	MOVD	limit+8(FP), R3
+	MOVW	$SYS_ugetrlimit, R1
+	SYSCALL
+	MOVW	R2, ret+16(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16-4
+	MOVW	usec+0(FP), R2
+	MOVD	R2, R4
+	MOVW	$1000000, R3
+	DIVD	R3, R2
+	MOVD	R2, 8(R15)
+	MULLD	R2, R3
+	SUB	R3, R4
+	MOVD	R4, 16(R15)
+
+	// select(0, 0, 0, 0, &tv)
+	MOVW	$0, R2
+	MOVW	$0, R3
+	MOVW	$0, R4
+	MOVW	$0, R5
+	ADD	$8, R15, R6
+	MOVW	$SYS_select, R1
+	SYSCALL
+	RET
+
+TEXT runtime·gettid(SB),NOSPLIT,$0-4
+	MOVW	$SYS_gettid, R1
+	SYSCALL
+	MOVW	R2, ret+0(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0
+	MOVW	$SYS_gettid, R1
+	SYSCALL
+	MOVW	R2, R2	// arg 1 tid
+	MOVW	sig+0(FP), R3	// arg 2
+	MOVW	$SYS_tkill, R1
+	SYSCALL
+	RET
+
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
+	MOVW	$SYS_getpid, R1
+	SYSCALL
+	MOVW	R2, R2	// arg 1 pid
+	MOVW	sig+0(FP), R3	// arg 2
+	MOVW	$SYS_kill, R1
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
+	MOVW	mode+0(FP), R2
+	MOVD	new+8(FP), R3
+	MOVD	old+16(FP), R4
+	MOVW	$SYS_setitimer, R1
+	SYSCALL
+	RET
+
+TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28
+	MOVD	addr+0(FP), R2
+	MOVD	n+8(FP), R3
+	MOVD	dst+16(FP), R4
+	MOVW	$SYS_mincore, R1
+	SYSCALL
+	MOVW	R2, ret+24(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$16
+	MOVD	$0(R15), R2
+	MOVD	$0, R3
+	MOVW	$SYS_gettimeofday, R1
+	SYSCALL
+	MOVD	0(R15), R2	// sec
+	MOVD	8(R15), R4	// usec
+	MOVD	$1000, R3
+	MULLD	R3, R4
+	MOVD	R2, sec+0(FP)
+	MOVW	R4, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$16
+	MOVW	$1, R2 // CLOCK_MONOTONIC
+	MOVD	$0(R15), R3
+	MOVW	$SYS_clock_gettime, R1
+	SYSCALL
+	MOVD	0(R15), R2	// sec
+	MOVD	8(R15), R4	// nsec
+	// sec is in R2, nsec in R4
+	// return nsec in R2
+	MOVD	$1000000000, R3
+	MULLD	R3, R2
+	ADD	R4, R2
+	MOVD	R2, ret+0(FP)
+	RET
+
+TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
+	MOVW	sig+0(FP), R2
+	MOVD	new+8(FP), R3
+	MOVD	old+16(FP), R4
+	MOVW	size+24(FP), R5
+	MOVW	$SYS_rt_sigprocmask, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVD	R0, 0(R0) // crash
+	RET
+
+TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36
+	MOVD	sig+0(FP), R2
+	MOVD	new+8(FP), R3
+	MOVD	old+16(FP), R4
+	MOVD	size+24(FP), R5
+	MOVW	$SYS_rt_sigaction, R1
+	SYSCALL
+	MOVW	R2, ret+32(FP)
+	RET
+
+TEXT runtime·sigfwd(SB),NOSPLIT,$0-32
+	MOVW	sig+8(FP), R2
+	MOVD	info+16(FP), R3
+	MOVD	ctx+24(FP), R4
+	MOVD	fn+0(FP), R5
+	BL	R5
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	// initialize essential registers (just in case)
+	XOR	R0, R0
+
+	// this might be called in external code context,
+	// where g is not set.
+	MOVB	runtime·iscgo(SB), R6
+	CMPBEQ	R6, $0, 2(PC)
+	BL	runtime·load_g(SB)
+
+	MOVW	R2, 8(R15)
+	MOVD	R3, 16(R15)
+	MOVD	R4, 24(R15)
+	MOVD	$runtime·sigtrampgo(SB), R5
+	BL	R5
+	RET
+
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	BR	runtime·sigtramp(SB)
+
+// func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+TEXT runtime·mmap(SB),NOSPLIT,$48-40
+	MOVD	addr+0(FP), R2
+	MOVD	n+8(FP), R3
+	MOVW	prot+16(FP), R4
+	MOVW	flags+20(FP), R5
+	MOVW	fd+24(FP), R6
+	MOVWZ	off+28(FP), R7
+
+	// s390x uses old_mmap, so the arguments need to be placed into
+	// a struct and a pointer to the struct passed to mmap.
+	MOVD	R2, addr-48(SP)
+	MOVD	R3, n-40(SP)
+	MOVD	R4, prot-32(SP)
+	MOVD	R5, flags-24(SP)
+	MOVD	R6, fd-16(SP)
+	MOVD	R7, off-8(SP)
+
+	MOVD	$addr-48(SP), R2
+	MOVW	$SYS_mmap, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	NEG	R2
+	MOVD	R2, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
+	MOVD	addr+0(FP), R2
+	MOVD	n+8(FP), R3
+	MOVW	$SYS_munmap, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVD	R0, 0(R0) // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
+	MOVD	addr+0(FP), R2
+	MOVD	n+8(FP), R3
+	MOVW	flags+16(FP), R4
+	MOVW	$SYS_madvise, R1
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+
+// int64 futex(int32 *uaddr, int32 op, int32 val,
+//	struct timespec *timeout, int32 *uaddr2, int32 val2);
+TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0
+	MOVD	addr+0(FP), R2
+	MOVW	op+8(FP), R3
+	MOVW	val+12(FP), R4
+	MOVD	ts+16(FP), R5
+	MOVD	addr2+24(FP), R6
+	MOVW	val3+32(FP),  R7
+	MOVW	$SYS_futex, R1
+	SYSCALL
+	MOVW	R2, ret+40(FP)
+	RET
+
+// int32 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0
+	MOVW	flags+0(FP), R3
+	MOVD	stk+8(FP), R2
+
+	// Copy mp, gp, fn off parent stack for use by child.
+	// Careful: Linux system call clobbers ???.
+	MOVD	mm+16(FP), R7
+	MOVD	gg+24(FP), R8
+	MOVD	fn+32(FP), R9
+
+	MOVD	R7, -8(R2)
+	MOVD	R8, -16(R2)
+	MOVD	R9, -24(R2)
+	MOVD	$1234, R7
+	MOVD	R7, -32(R2)
+
+	SYSCALL $SYS_clone
+
+	// In parent, return.
+	CMPBEQ	R2, $0, 3(PC)
+	MOVW	R2, ret+40(FP)
+	RET
+
+	// In child, on new stack.
+	// initialize essential registers
+	XOR	R0, R0
+	MOVD	-32(R15), R7
+	CMP	R7, $1234
+	BEQ	2(PC)
+	MOVD	R0, 0(R0)
+
+	// Initialize m->procid to Linux tid
+	SYSCALL $SYS_gettid
+
+	MOVD	-24(R15), R9        // fn
+	MOVD	-16(R15), R8        // g
+	MOVD	-8(R15), R7         // m
+
+	CMPBEQ	R7, $0, nog
+	CMP	R8, $0
+	BEQ	nog
+
+	MOVD	R2, m_procid(R7)
+
+	// In child, set up new stack
+	MOVD	R7, g_m(R8)
+	MOVD	R8, g
+	//CALL	runtime·stackcheck(SB)
+
+nog:
+	// Call fn
+	BL	R9
+
+	// It shouldn't return.	 If it does, exit that thread.
+	MOVW	$111, R2
+	MOVW	$SYS_exit, R1
+	SYSCALL
+	BR	-2(PC)	// keep exiting
+
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
+	MOVD	new+0(FP), R2
+	MOVD	old+8(FP), R3
+	MOVW	$SYS_sigaltstack, R1
+	SYSCALL
+	MOVD	$-4095, R3
+	CMPUBLT	R2, R3, 2(PC)
+	MOVD	R0, 0(R0) // crash
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
+	MOVW	$SYS_sched_yield, R1
+	SYSCALL
+	RET
+
+TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0
+	MOVD	pid+0(FP), R2
+	MOVD	len+8(FP), R3
+	MOVD	buf+16(FP), R4
+	MOVW	$SYS_sched_getaffinity, R1
+	SYSCALL
+	MOVW	R2, ret+24(FP)
+	RET
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
+	MOVW    size+0(FP), R2
+	MOVW	$SYS_epoll_create, R1
+	SYSCALL
+	MOVW	R2, ret+8(FP)
+	RET
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
+	MOVW	flags+0(FP), R2
+	MOVW	$SYS_epoll_create1, R1
+	SYSCALL
+	MOVW	R2, ret+8(FP)
+	RET
+
+// func epollctl(epfd, op, fd int32, ev *epollEvent) int
+TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
+	MOVW	epfd+0(FP), R2
+	MOVW	op+4(FP), R3
+	MOVW	fd+8(FP), R4
+	MOVD	ev+16(FP), R5
+	MOVW	$SYS_epoll_ctl, R1
+	SYSCALL
+	MOVW	R2, ret+24(FP)
+	RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
+	MOVW	epfd+0(FP), R2
+	MOVD	ev+8(FP), R3
+	MOVW	nev+16(FP), R4
+	MOVW	timeout+20(FP), R5
+	MOVW	$SYS_epoll_wait, R1
+	SYSCALL
+	MOVW	R2, ret+24(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
+	MOVW    fd+0(FP), R2  // fd
+	MOVD    $2, R3  // F_SETFD
+	MOVD    $1, R4  // FD_CLOEXEC
+	MOVW	$SYS_fcntl, R1
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_s390x.go b/src/runtime/sys_s390x.go
new file mode 100644
index 0000000000..2aa81e75c0
--- /dev/null
+++ b/src/runtime/sys_s390x.go
@@ -0,0 +1,45 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// adjust Gobuf as if it executed a call to fn with context ctxt
+// and then did an immediate Gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+	if buf.lr != 0 {
+		throw("invalid use of gostartcall")
+	}
+	buf.lr = buf.pc
+	buf.pc = uintptr(fn)
+	buf.ctxt = ctxt
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+func rewindmorestack(buf *gobuf) {
+	var inst uint64
+	if buf.pc&1 == 0 && buf.pc != 0 {
+		inst = *(*uint64)(unsafe.Pointer(buf.pc))
+		switch inst >> 48 {
+		case 0xa7f4: // BRC (branch relative on condition) instruction.
+			inst >>= 32
+			inst &= 0xFFFF
+			offset := int64(int16(inst))
+			offset <<= 1
+			buf.pc += uintptr(offset)
+			return
+		case 0xc0f4: // BRCL (branch relative on condition long) instruction.
+			inst >>= 16
+			inst = inst & 0xFFFFFFFF
+			inst = (inst << 1) & 0xFFFFFFFF
+			buf.pc += uintptr(int32(inst))
+			return
+		}
+	}
+	print("runtime: pc=", hex(buf.pc), " ", hex(inst), "\n")
+	throw("runtime: misuse of rewindmorestack")
+}
diff --git a/src/runtime/tls_s390x.s b/src/runtime/tls_s390x.s
new file mode 100644
index 0000000000..cb6a21c114
--- /dev/null
+++ b/src/runtime/tls_s390x.s
@@ -0,0 +1,51 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "go_tls.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// We have to resort to TLS variable to save g (R13).
+// One reason is that external code might trigger
+// SIGSEGV, and our runtime.sigtramp don't even know we
+// are in external code, and will continue to use R13,
+// this might well result in another SIGSEGV.
+
+// save_g saves the g register into pthread-provided
+// thread-local memory, so that we can call externally compiled
+// s390x code that will overwrite this register.
+//
+// If !iscgo, this is a no-op.
+//
+// NOTE: setg_gcc<> assume this clobbers only R10 and R11.
+TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
+	MOVB	runtime·iscgo(SB),  R10
+	CMPBEQ	R10, $0, nocgo
+	MOVW	AR0, R11
+	SLD	$32, R11
+	MOVW	AR1, R11
+	MOVD	runtime·tls_g(SB), R10
+	MOVD	g, 0(R10)(R11*1)
+nocgo:
+	RET
+
+// load_g loads the g register from pthread-provided
+// thread-local memory, for use after calling externally compiled
+// s390x code that overwrote those registers.
+//
+// This is never called directly from C code (it doesn't have to
+// follow the C ABI), but it may be called from a C context, where the
+// usual Go registers aren't set up.
+//
+// NOTE: _cgo_topofstack assumes this only clobbers g (R13), R10 and R11.
+TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
+	MOVW	AR0, R11
+	SLD	$32, R11
+	MOVW	AR1, R11
+	MOVD	runtime·tls_g(SB), R10
+	MOVD	0(R10)(R11*1), g
+	RET
+
+GLOBL runtime·tls_g+0(SB),TLSBSS,$8
-- 
cgit v1.3


From 0fb7b4cccd02df10f239ed77d6d85566b6388b83 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 8 Apr 2016 15:14:37 +0200
Subject: runtime: emit file:line info into traces

This makes traces self-contained and simplifies trace workflow
in modern cloud environments where it is simpler to reach
a service via HTTP than to obtain the binary.

Change-Id: I6ff3ca694dc698270f1e29da37d5efaf4e843a0d
Reviewed-on: https://go-review.googlesource.com/21732
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/main.go                 |   5 -
 src/internal/trace/parser.go          | 174 +++++++++++++---------------------
 src/runtime/trace.go                  |  98 ++++++++++++++++---
 src/runtime/trace/trace_stack_test.go |   7 +-
 4 files changed, 156 insertions(+), 128 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/trace/main.go b/src/cmd/trace/main.go
index e493be91b7..12bf8c3c16 100644
--- a/src/cmd/trace/main.go
+++ b/src/cmd/trace/main.go
@@ -104,11 +104,6 @@ func parseEvents() ([]*trace.Event, error) {
 			loader.err = fmt.Errorf("failed to parse trace: %v", err)
 			return
 		}
-		err = trace.Symbolize(events, programBinary)
-		if err != nil {
-			loader.err = fmt.Errorf("failed to symbolize trace: %v", err)
-			return
-		}
 		loader.events = events
 	})
 	return loader.events, loader.err
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index e325678733..5db3fc317e 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -5,15 +5,10 @@
 package trace
 
 import (
-	"bufio"
 	"bytes"
 	"fmt"
 	"io"
-	"os"
-	"os/exec"
 	"sort"
-	"strconv"
-	"strings"
 )
 
 // Event describes one event in the trace.
@@ -59,11 +54,11 @@ const (
 
 // Parse parses, post-processes and verifies the trace.
 func Parse(r io.Reader) ([]*Event, error) {
-	rawEvents, err := readTrace(r)
+	rawEvents, strings, err := readTrace(r)
 	if err != nil {
 		return nil, err
 	}
-	events, err := parseEvents(rawEvents)
+	events, stacks, err := parseEvents(rawEvents, strings)
 	if err != nil {
 		return nil, err
 	}
@@ -75,6 +70,12 @@ func Parse(r io.Reader) ([]*Event, error) {
 	if err != nil {
 		return nil, err
 	}
+	// Attach stack traces.
+	for _, ev := range events {
+		if ev.StkID != 0 {
+			ev.Stk = stacks[ev.StkID]
+		}
+	}
 	return events, nil
 }
 
@@ -87,19 +88,20 @@ type rawEvent struct {
 
 // readTrace does wire-format parsing and verification.
 // It does not care about specific event types and argument meaning.
-func readTrace(r io.Reader) ([]rawEvent, error) {
+func readTrace(r io.Reader) ([]rawEvent, map[uint64]string, error) {
 	// Read and validate trace header.
 	var buf [16]byte
 	off, err := r.Read(buf[:])
 	if off != 16 || err != nil {
-		return nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
+		return nil, nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
 	}
 	if !bytes.Equal(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) {
-		return nil, fmt.Errorf("not a trace file")
+		return nil, nil, fmt.Errorf("not a trace file")
 	}
 
 	// Read events.
 	var events []rawEvent
+	strings := make(map[uint64]string)
 	for {
 		// Read event type and number of arguments (1 byte).
 		off0 := off
@@ -108,18 +110,51 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
 			break
 		}
 		if err != nil || n != 1 {
-			return nil, fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+			return nil, nil, fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
 		}
 		off += n
 		typ := buf[0] << 2 >> 2
 		narg := buf[0] >> 6
+		if typ == EvString {
+			// String dictionary entry [ID, length, string].
+			var id uint64
+			id, off, err = readVal(r, off)
+			if err != nil {
+				return nil, nil, err
+			}
+			if id == 0 {
+				return nil, nil, fmt.Errorf("string at offset %d has invalid id 0", off)
+			}
+			if strings[id] != "" {
+				return nil, nil, fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+			}
+			var ln uint64
+			ln, off, err = readVal(r, off)
+			if err != nil {
+				return nil, nil, err
+			}
+			if ln == 0 {
+				return nil, nil, fmt.Errorf("string at offset %d has invalie length 0", off)
+			}
+			if ln > 1e6 {
+				return nil, nil, fmt.Errorf("string at offset %d has too large length %v", off, ln)
+			}
+			buf := make([]byte, ln)
+			n, err := io.ReadFull(r, buf)
+			if err != nil {
+				return nil, nil, fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+			}
+			off += n
+			strings[id] = string(buf)
+			continue
+		}
 		ev := rawEvent{typ: typ, off: off0}
 		if narg < 3 {
 			for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
 				var v uint64
 				v, off, err = readVal(r, off)
 				if err != nil {
-					return nil, err
+					return nil, nil, err
 				}
 				ev.args = append(ev.args, v)
 			}
@@ -128,34 +163,34 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
 			var v uint64
 			v, off, err = readVal(r, off)
 			if err != nil {
-				return nil, err
+				return nil, nil, err
 			}
 			evLen := v
 			off1 := off
 			for evLen > uint64(off-off1) {
 				v, off, err = readVal(r, off)
 				if err != nil {
-					return nil, err
+					return nil, nil, err
 				}
 				ev.args = append(ev.args, v)
 			}
 			if evLen != uint64(off-off1) {
-				return nil, fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+				return nil, nil, fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
 			}
 		}
 		events = append(events, ev)
 	}
-	return events, nil
+	return events, strings, nil
 }
 
 // Parse events transforms raw events into events.
 // It does analyze and verify per-event-type arguments.
-func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
+func parseEvents(rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
 	var ticksPerSec, lastSeq, lastTs int64
 	var lastG, timerGoid uint64
 	var lastP int
 	lastGs := make(map[int]uint64) // last goroutine running on P
-	stacks := make(map[uint64][]*Frame)
+	stacks = make(map[uint64][]*Frame)
 	for _, raw := range rawEvents {
 		if raw.typ == EvNone || raw.typ >= EvCount {
 			err = fmt.Errorf("unknown event type %v at offset 0x%x", raw.typ, raw.off)
@@ -211,16 +246,20 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
 					raw.off, size)
 				return
 			}
-			if uint64(len(raw.args)) != size+2 {
+			if want := 2 + 4*size; uint64(len(raw.args)) != want {
 				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want %v, got %v",
-					raw.off, size+2, len(raw.args))
+					raw.off, want, len(raw.args))
 				return
 			}
 			id := raw.args[0]
 			if id != 0 && size > 0 {
 				stk := make([]*Frame, size)
 				for i := 0; i < int(size); i++ {
-					stk[i] = &Frame{PC: raw.args[i+2]}
+					pc := raw.args[2+i*4+0]
+					fn := raw.args[2+i*4+1]
+					file := raw.args[2+i*4+2]
+					line := raw.args[2+i*4+3]
+					stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
 				}
 				stacks[id] = stk
 			}
@@ -263,13 +302,6 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
 		return
 	}
 
-	// Attach stack traces.
-	for _, ev := range events {
-		if ev.StkID != 0 {
-			ev.Stk = stacks[ev.StkID]
-		}
-	}
-
 	// Sort by sequence number and translate cpu ticks to real time.
 	sort.Sort(eventList(events))
 	if ticksPerSec == 0 {
@@ -478,8 +510,7 @@ func postProcessTrace(events []*Event) error {
 			g.evStart = ev
 			p.g = ev.G
 			if g.evCreate != nil {
-				// +1 because symbolizer expects return pc.
-				ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
+				ev.StkID = g.evCreate.Args[1]
 				g.evCreate = nil
 			}
 
@@ -580,79 +611,6 @@ func postProcessTrace(events []*Event) error {
 	return nil
 }
 
-// symbolizeTrace attaches func/file/line info to stack traces.
-func Symbolize(events []*Event, bin string) error {
-	// First, collect and dedup all pcs.
-	pcs := make(map[uint64]*Frame)
-	for _, ev := range events {
-		for _, f := range ev.Stk {
-			pcs[f.PC] = nil
-		}
-	}
-
-	// Start addr2line.
-	cmd := exec.Command("go", "tool", "addr2line", bin)
-	in, err := cmd.StdinPipe()
-	if err != nil {
-		return fmt.Errorf("failed to pipe addr2line stdin: %v", err)
-	}
-	cmd.Stderr = os.Stderr
-	out, err := cmd.StdoutPipe()
-	if err != nil {
-		return fmt.Errorf("failed to pipe addr2line stdout: %v", err)
-	}
-	err = cmd.Start()
-	if err != nil {
-		return fmt.Errorf("failed to start addr2line: %v", err)
-	}
-	outb := bufio.NewReader(out)
-
-	// Write all pcs to addr2line.
-	// Need to copy pcs to an array, because map iteration order is non-deterministic.
-	var pcArray []uint64
-	for pc := range pcs {
-		pcArray = append(pcArray, pc)
-		_, err := fmt.Fprintf(in, "0x%x\n", pc-1)
-		if err != nil {
-			return fmt.Errorf("failed to write to addr2line: %v", err)
-		}
-	}
-	in.Close()
-
-	// Read in answers.
-	for _, pc := range pcArray {
-		fn, err := outb.ReadString('\n')
-		if err != nil {
-			return fmt.Errorf("failed to read from addr2line: %v", err)
-		}
-		file, err := outb.ReadString('\n')
-		if err != nil {
-			return fmt.Errorf("failed to read from addr2line: %v", err)
-		}
-		f := &Frame{PC: pc}
-		f.Fn = fn[:len(fn)-1]
-		f.File = file[:len(file)-1]
-		if colon := strings.LastIndex(f.File, ":"); colon != -1 {
-			ln, err := strconv.Atoi(f.File[colon+1:])
-			if err == nil {
-				f.File = f.File[:colon]
-				f.Line = ln
-			}
-		}
-		pcs[pc] = f
-	}
-	cmd.Wait()
-
-	// Replace frames in events array.
-	for _, ev := range events {
-		for i, f := range ev.Stk {
-			ev.Stk[i] = pcs[f.PC]
-		}
-	}
-
-	return nil
-}
-
 // readVal reads unsigned base-128 value from r.
 func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
 	off = off0
@@ -704,7 +662,7 @@ const (
 	EvNone           = 0  // unused
 	EvBatch          = 1  // start of per-P batch of events [pid, timestamp]
 	EvFrequency      = 2  // contains tracer timer frequency [frequency (ticks per second)]
-	EvStack          = 3  // stack [stack id, number of PCs, array of PCs]
+	EvStack          = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
 	EvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	EvProcStart      = 5  // start of P [timestamp, thread id]
 	EvProcStop       = 6  // stop of P [timestamp]
@@ -714,7 +672,7 @@ const (
 	EvGCScanDone     = 10 // GC scan done [timestamp]
 	EvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	EvGCSweepDone    = 12 // GC sweep done [timestamp]
-	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, start PC, stack id]
+	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new start id, stack id]
 	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
 	EvGoEnd          = 15 // goroutine ends [timestamp]
 	EvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
@@ -738,7 +696,8 @@ const (
 	EvNextGC         = 34 // memstats.next_gc change [timestamp, next_gc]
 	EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	EvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
-	EvCount          = 37
+	EvString         = 37 // string dictionary entry [ID, length, string]
+	EvCount          = 38
 )
 
 var EventDescriptions = [EvCount]struct {
@@ -759,7 +718,7 @@ var EventDescriptions = [EvCount]struct {
 	EvGCScanDone:     {"GCScanDone", false, []string{}},
 	EvGCSweepStart:   {"GCSweepStart", true, []string{}},
 	EvGCSweepDone:    {"GCSweepDone", false, []string{}},
-	EvGoCreate:       {"GoCreate", true, []string{"g", "pc"}},
+	EvGoCreate:       {"GoCreate", true, []string{"g", "stack"}},
 	EvGoStart:        {"GoStart", false, []string{"g"}},
 	EvGoEnd:          {"GoEnd", false, []string{}},
 	EvGoStop:         {"GoStop", true, []string{}},
@@ -783,4 +742,5 @@ var EventDescriptions = [EvCount]struct {
 	EvNextGC:         {"NextGC", false, []string{"mem"}},
 	EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}},
 	EvFutileWakeup:   {"FutileWakeup", false, []string{}},
+	EvString:         {"String", false, []string{}},
 }
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 805c34f483..f54e5e0a7e 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -23,7 +23,7 @@ const (
 	traceEvNone           = 0  // unused
 	traceEvBatch          = 1  // start of per-P batch of events [pid, timestamp]
 	traceEvFrequency      = 2  // contains tracer timer frequency [frequency (ticks per second)]
-	traceEvStack          = 3  // stack [stack id, number of PCs, array of PCs]
+	traceEvStack          = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
 	traceEvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	traceEvProcStart      = 5  // start of P [timestamp, thread id]
 	traceEvProcStop       = 6  // stop of P [timestamp]
@@ -33,7 +33,7 @@ const (
 	traceEvGCScanDone     = 10 // GC scan done [timestamp]
 	traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
-	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, start PC, stack id]
+	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new start id, stack id]
 	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
 	traceEvGoEnd          = 15 // goroutine ends [timestamp]
 	traceEvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
@@ -57,7 +57,8 @@ const (
 	traceEvNextGC         = 34 // memstats.next_gc change [timestamp, next_gc]
 	traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	traceEvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
-	traceEvCount          = 37
+	traceEvString         = 37 // string dictionary entry [ID, length, string]
+	traceEvCount          = 38
 )
 
 const (
@@ -111,6 +112,12 @@ var trace struct {
 	reader        *g              // goroutine that called ReadTrace, or nil
 	stackTab      traceStackTable // maps stack traces to unique ids
 
+	// Dictionary for traceEvString.
+	// Currently this is used only for func/file:line info after tracing session,
+	// so we assume single-threaded access.
+	strings   map[string]uint64
+	stringSeq uint64
+
 	bufLock mutex       // protects buf
 	buf     traceBufPtr // global trace buffer, used when running without a p
 }
@@ -191,6 +198,8 @@ func StartTrace() error {
 	trace.timeStart = nanotime()
 	trace.headerWritten = false
 	trace.footerWritten = false
+	trace.strings = make(map[string]uint64)
+	trace.stringSeq = 0
 
 	// Can't set trace.enabled yet. While the world is stopped, exitsyscall could
 	// already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
@@ -272,8 +281,6 @@ func StopTrace() {
 
 	trace.enabled = false
 	trace.shutdown = true
-	trace.stackTab.dump()
-
 	unlock(&trace.bufLock)
 
 	startTheWorld()
@@ -309,6 +316,7 @@ func StopTrace() {
 		trace.empty = buf.ptr().link
 		sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
 	}
+	trace.strings = nil
 	trace.shutdown = false
 	unlock(&trace.lock)
 }
@@ -380,6 +388,9 @@ func ReadTrace() []byte {
 			data = traceAppend(data, uint64(timers.gp.goid))
 			data = traceAppend(data, 0)
 		}
+		// This will emit a bunch of full buffers, we will pick them up
+		// on the next iteration.
+		trace.stackTab.dump()
 		return data
 	}
 	// Done.
@@ -603,6 +614,29 @@ func traceFlush(buf traceBufPtr) traceBufPtr {
 	return buf
 }
 
+func traceString(buf *traceBuf, s string) (uint64, *traceBuf) {
+	if s == "" {
+		return 0, buf
+	}
+	if id, ok := trace.strings[s]; ok {
+		return id, buf
+	}
+
+	trace.stringSeq++
+	id := trace.stringSeq
+	trace.strings[s] = id
+
+	size := 1 + 2*traceBytesPerNumber + len(s)
+	if len(buf.arr)-buf.pos < size {
+		buf = traceFlush(traceBufPtrOf(buf)).ptr()
+	}
+	buf.byte(traceEvString)
+	buf.varint(id)
+	buf.varint(uint64(len(s)))
+	buf.pos += copy(buf.arr[buf.pos:], s)
+	return id, buf
+}
+
 // traceAppend appends v to buf in little-endian-base-128 encoding.
 func traceAppend(buf []byte, v uint64) []byte {
 	for ; v >= 0x80; v >>= 7 {
@@ -716,23 +750,28 @@ func (tab *traceStackTable) newStack(n int) *traceStack {
 // dump writes all previously cached stacks to trace buffers,
 // releases all memory and resets state.
 func (tab *traceStackTable) dump() {
-	var tmp [(2 + traceStackSize) * traceBytesPerNumber]byte
+	frames := make(map[uintptr]traceFrame)
+	var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte
 	buf := traceFlush(0).ptr()
 	for _, stk := range tab.tab {
 		stk := stk.ptr()
 		for ; stk != nil; stk = stk.link.ptr() {
-			maxSize := 1 + (3+stk.n)*traceBytesPerNumber
-			if len(buf.arr)-buf.pos < maxSize {
-				buf = traceFlush(traceBufPtrOf(buf)).ptr()
-			}
-			// Form the event in the temp buffer, we need to know the actual length.
 			tmpbuf := tmp[:0]
 			tmpbuf = traceAppend(tmpbuf, uint64(stk.id))
 			tmpbuf = traceAppend(tmpbuf, uint64(stk.n))
 			for _, pc := range stk.stack() {
+				var frame traceFrame
+				frame, buf = traceFrameForPC(buf, frames, pc)
 				tmpbuf = traceAppend(tmpbuf, uint64(pc))
+				tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID))
+				tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID))
+				tmpbuf = traceAppend(tmpbuf, uint64(frame.line))
 			}
 			// Now copy to the buffer.
+			size := 1 + traceBytesPerNumber + len(tmpbuf)
+			if len(buf.arr)-buf.pos < size {
+				buf = traceFlush(traceBufPtrOf(buf)).ptr()
+			}
 			buf.byte(traceEvStack | 3<<traceArgCountShift)
 			buf.varint(uint64(len(tmpbuf)))
 			buf.pos += copy(buf.arr[buf.pos:], tmpbuf)
@@ -747,6 +786,39 @@ func (tab *traceStackTable) dump() {
 	*tab = traceStackTable{}
 }
 
+type traceFrame struct {
+	funcID uint64
+	fileID uint64
+	line   uint64
+}
+
+func traceFrameForPC(buf *traceBuf, frames map[uintptr]traceFrame, pc uintptr) (traceFrame, *traceBuf) {
+	if frame, ok := frames[pc]; ok {
+		return frame, buf
+	}
+
+	var frame traceFrame
+	f := findfunc(pc)
+	if f == nil {
+		frames[pc] = frame
+		return frame, buf
+	}
+
+	fn := funcname(f)
+	const maxLen = 1 << 10
+	if len(fn) > maxLen {
+		fn = fn[len(fn)-maxLen:]
+	}
+	frame.funcID, buf = traceString(buf, fn)
+	file, line := funcline(f, pc-sys.PCQuantum)
+	frame.line = uint64(line)
+	if len(file) > maxLen {
+		file = file[len(file)-maxLen:]
+	}
+	frame.fileID, buf = traceString(buf, file)
+	return frame, buf
+}
+
 // traceAlloc is a non-thread-safe region allocator.
 // It holds a linked list of traceAllocBlock.
 type traceAlloc struct {
@@ -844,7 +916,9 @@ func traceGCSweepDone() {
 }
 
 func traceGoCreate(newg *g, pc uintptr) {
-	traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(pc))
+	// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
+	id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum})
+	traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
 }
 
 func traceGoStart() {
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index b99ec687d5..c3fb0f6fee 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -129,10 +129,6 @@ func TestTraceSymbolize(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to parse trace: %v", err)
 	}
-	err = trace.Symbolize(events, os.Args[0])
-	if err != nil {
-		t.Fatalf("failed to symbolize trace: %v", err)
-	}
 
 	// Now check that the stacks are correct.
 	type frame struct {
@@ -149,6 +145,9 @@ func TestTraceSymbolize(t *testing.T) {
 			{"runtime/trace_test.TestTraceSymbolize", 106},
 			{"testing.tRunner", 0},
 		}},
+		{trace.EvGoStart, []frame{
+			{"runtime/trace_test.TestTraceSymbolize.func1", 37},
+		}},
 		{trace.EvGoSched, []frame{
 			{"runtime/trace_test.TestTraceSymbolize", 107},
 			{"testing.tRunner", 0},
-- 
cgit v1.3


From 0435e88a119fd057aa7209591ba3dff122c9f24c Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 18 Mar 2016 10:56:23 +0100
Subject: runtime: revert "do not call timeBeginPeriod on windows"

This reverts commit ab4c9298b8185a056ff1152f2c7bd9b38d3d06f3.

Sysmon critically depends on system timer resolution for retaking
of Ps blocked in system calls. See #14790 for an example
of a program where execution time goes from 2ms to 30ms if
timeBeginPeriod(1) is not used.

We can remove timeBeginPeriod(1) when we support UMS (#7876).

Update #14790

Change-Id: I362b56154359b2c52d47f9f2468fe012b481cf6d
Reviewed-on: https://go-review.googlesource.com/20834
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
---
 misc/cgo/testcarchive/carchive_test.go |  2 +-
 src/runtime/export_windows_test.go     |  7 ++--
 src/runtime/os_windows.go              |  9 ++++-
 src/runtime/syscall_windows_test.go    | 66 ++++++----------------------------
 4 files changed, 25 insertions(+), 59 deletions(-)

(limited to 'src/runtime')

diff --git a/misc/cgo/testcarchive/carchive_test.go b/misc/cgo/testcarchive/carchive_test.go
index 47e0ceb5c9..72e9ef1d59 100644
--- a/misc/cgo/testcarchive/carchive_test.go
+++ b/misc/cgo/testcarchive/carchive_test.go
@@ -120,7 +120,7 @@ func goEnv(key string) string {
 func compilemain(t *testing.T, libgo string) {
 	ccArgs := append(cc, "-o", "testp"+exeSuffix, "main.c")
 	if GOOS == "windows" {
-		ccArgs = append(ccArgs, "main_windows.c", libgo, "-lntdll", "-lws2_32")
+		ccArgs = append(ccArgs, "main_windows.c", libgo, "-lntdll", "-lws2_32", "-lwinmm")
 	} else {
 		ccArgs = append(ccArgs, "main_unix.c", libgo)
 	}
diff --git a/src/runtime/export_windows_test.go b/src/runtime/export_windows_test.go
index 66c103709c..536b398fd7 100644
--- a/src/runtime/export_windows_test.go
+++ b/src/runtime/export_windows_test.go
@@ -8,8 +8,11 @@ package runtime
 
 import "unsafe"
 
-var TestingWER = &testingWER
-var OsYield = osyield
+var (
+	TestingWER              = &testingWER
+	OsYield                 = osyield
+	TimeBeginPeriodRetValue = &timeBeginPeriodRetValue
+)
 
 func NumberOfProcessors() int32 {
 	var info systeminfo
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 7244706b92..9147091a49 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -53,6 +53,7 @@ const (
 //go:cgo_import_dynamic runtime._WaitForSingleObject WaitForSingleObject%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._WriteConsoleW WriteConsoleW%5 "kernel32.dll"
 //go:cgo_import_dynamic runtime._WriteFile WriteFile%5 "kernel32.dll"
+//go:cgo_import_dynamic runtime._timeBeginPeriod timeBeginPeriod%1 "winmm.dll"
 
 type stdFunction unsafe.Pointer
 
@@ -98,7 +99,9 @@ var (
 	_WSAGetOverlappedResult,
 	_WaitForSingleObject,
 	_WriteConsoleW,
-	_WriteFile stdFunction
+	_WriteFile,
+	_timeBeginPeriod,
+	_ stdFunction
 
 	// Following syscalls are only available on some Windows PCs.
 	// We will load syscalls, if available, before using them.
@@ -228,6 +231,8 @@ func setlasterror(err uint32)
 // flags can be used with LoadLibraryEx."
 var useLoadLibraryEx bool
 
+var timeBeginPeriodRetValue uint32
+
 func osinit() {
 	asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
 	usleep2Addr = unsafe.Pointer(funcPC(usleep2))
@@ -247,6 +252,8 @@ func osinit() {
 
 	stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1)
 
+	timeBeginPeriodRetValue = uint32(stdcall1(_timeBeginPeriod, 1))
+
 	ncpu = getproccount()
 
 	// Windows dynamic priority boosting assumes that a process has different types
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index ff045338c1..4a10749682 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -622,6 +622,13 @@ uintptr_t cfunc(callback f, uintptr_t n) {
 	}
 }
 
+func TestTimeBeginPeriod(t *testing.T) {
+	const TIMERR_NOERROR = 0
+	if *runtime.TimeBeginPeriodRetValue != TIMERR_NOERROR {
+		t.Fatalf("timeBeginPeriod failed: it returned %d", *runtime.TimeBeginPeriodRetValue)
+	}
+}
+
 // removeOneCPU removes one (any) cpu from affinity mask.
 // It returns new affinity mask.
 func removeOneCPU(mask uintptr) (uintptr, error) {
@@ -874,21 +881,10 @@ var (
 	modwinmm    = syscall.NewLazyDLL("winmm.dll")
 	modkernel32 = syscall.NewLazyDLL("kernel32.dll")
 
-	proctimeBeginPeriod = modwinmm.NewProc("timeBeginPeriod")
-	proctimeEndPeriod   = modwinmm.NewProc("timeEndPeriod")
-
 	procCreateEvent = modkernel32.NewProc("CreateEventW")
 	procSetEvent    = modkernel32.NewProc("SetEvent")
 )
 
-func timeBeginPeriod(period uint32) {
-	syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0)
-}
-
-func timeEndPeriod(period uint32) {
-	syscall.Syscall(proctimeEndPeriod.Addr(), 1, uintptr(period), 0, 0)
-}
-
 func createEvent() (syscall.Handle, error) {
 	r0, _, e0 := syscall.Syscall6(procCreateEvent.Addr(), 4, 0, 0, 0, 0, 0, 0)
 	if r0 == 0 {
@@ -905,7 +901,7 @@ func setEvent(h syscall.Handle) error {
 	return nil
 }
 
-func benchChanToSyscallPing(b *testing.B) {
+func BenchmarkChanToSyscallPing(b *testing.B) {
 	n := b.N
 	ch := make(chan int)
 	event, err := createEvent()
@@ -927,17 +923,7 @@ func benchChanToSyscallPing(b *testing.B) {
 	}
 }
 
-func BenchmarkChanToSyscallPing1ms(b *testing.B) {
-	timeBeginPeriod(1)
-	benchChanToSyscallPing(b)
-	timeEndPeriod(1)
-}
-
-func BenchmarkChanToSyscallPing15ms(b *testing.B) {
-	benchChanToSyscallPing(b)
-}
-
-func benchSyscallToSyscallPing(b *testing.B) {
+func BenchmarkSyscallToSyscallPing(b *testing.B) {
 	n := b.N
 	event1, err := createEvent()
 	if err != nil {
@@ -965,17 +951,7 @@ func benchSyscallToSyscallPing(b *testing.B) {
 	}
 }
 
-func BenchmarkSyscallToSyscallPing1ms(b *testing.B) {
-	timeBeginPeriod(1)
-	benchSyscallToSyscallPing(b)
-	timeEndPeriod(1)
-}
-
-func BenchmarkSyscallToSyscallPing15ms(b *testing.B) {
-	benchSyscallToSyscallPing(b)
-}
-
-func benchChanToChanPing(b *testing.B) {
+func BenchmarkChanToChanPing(b *testing.B) {
 	n := b.N
 	ch1 := make(chan int)
 	ch2 := make(chan int)
@@ -991,28 +967,8 @@ func benchChanToChanPing(b *testing.B) {
 	}
 }
 
-func BenchmarkChanToChanPing1ms(b *testing.B) {
-	timeBeginPeriod(1)
-	benchChanToChanPing(b)
-	timeEndPeriod(1)
-}
-
-func BenchmarkChanToChanPing15ms(b *testing.B) {
-	benchChanToChanPing(b)
-}
-
-func benchOsYield(b *testing.B) {
+func BenchmarkOsYield(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		runtime.OsYield()
 	}
 }
-
-func BenchmarkOsYield1ms(b *testing.B) {
-	timeBeginPeriod(1)
-	benchOsYield(b)
-	timeEndPeriod(1)
-}
-
-func BenchmarkOsYield15ms(b *testing.B) {
-	benchOsYield(b)
-}
-- 
cgit v1.3


From e4f1d9cf2e948eb0f0bb91d7c253ab61dfff3a59 Mon Sep 17 00:00:00 2001
From: Emmanuel Odeke <emm.odeke@gmail.com>
Date: Sun, 27 Mar 2016 17:29:53 -0700
Subject: runtime: make execution error panic values implement the Error
 interface

Make execution panics implement Error as
mandated by https://golang.org/ref/spec#Run_time_panics,
instead of panics with strings.

Fixes #14965

Change-Id: I7827f898b9b9c08af541db922cc24fa0800ff18a
Reviewed-on: https://go-review.googlesource.com/21214
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/chan.go       | 10 +++++-----
 src/runtime/crash_test.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 src/runtime/error.go      | 13 ++++++++++++-
 src/runtime/hashmap.go    |  4 ++--
 src/runtime/malloc.go     |  2 +-
 src/runtime/proc.go       |  2 +-
 src/runtime/select.go     |  2 +-
 7 files changed, 68 insertions(+), 11 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 954b389f47..8543cb4c9c 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -64,7 +64,7 @@ func makechan(t *chantype, size int64) *hchan {
 		throw("makechan: bad alignment")
 	}
 	if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) {
-		panic("makechan: size out of range")
+		panic(plainError("makechan: size out of range"))
 	}
 
 	var c *hchan
@@ -171,7 +171,7 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin
 
 	if c.closed != 0 {
 		unlock(&c.lock)
-		panic("send on closed channel")
+		panic(plainError("send on closed channel"))
 	}
 
 	if sg := c.recvq.dequeue(); sg != nil {
@@ -231,7 +231,7 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin
 		if c.closed == 0 {
 			throw("chansend: spurious wakeup")
 		}
-		panic("send on closed channel")
+		panic(plainError("send on closed channel"))
 	}
 	gp.param = nil
 	if mysg.releasetime > 0 {
@@ -302,13 +302,13 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) {
 
 func closechan(c *hchan) {
 	if c == nil {
-		panic("close of nil channel")
+		panic(plainError("close of nil channel"))
 	}
 
 	lock(&c.lock)
 	if c.closed != 0 {
 		unlock(&c.lock)
-		panic("close of closed channel")
+		panic(plainError("close of closed channel"))
 	}
 
 	if raceenabled {
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 85fcc69fed..2941b8e8f8 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -273,6 +273,52 @@ func TestGoexitInPanic(t *testing.T) {
 	}
 }
 
+// Issue 14965: Runtime panics should be of type runtime.Error
+func TestRuntimePanicWithRuntimeError(t *testing.T) {
+	testCases := [...]func(){
+		0: func() {
+			var m map[uint64]bool
+			m[1234] = true
+		},
+		1: func() {
+			ch := make(chan struct{})
+			close(ch)
+			close(ch)
+		},
+		2: func() {
+			var ch = make(chan struct{})
+			close(ch)
+			ch <- struct{}{}
+		},
+		3: func() {
+			var s = make([]int, 2)
+			_ = s[2]
+		},
+		4: func() {
+			n := -1
+			_ = make(chan bool, n)
+		},
+		5: func() {
+			close((chan bool)(nil))
+		},
+	}
+
+	for i, fn := range testCases {
+		got := panicValue(fn)
+		if _, ok := got.(runtime.Error); !ok {
+			t.Errorf("test #%d: recovered value %v(type %T) does not implement runtime.Error", i, got, got)
+		}
+	}
+}
+
+func panicValue(fn func()) (recovered interface{}) {
+	defer func() {
+		recovered = recover()
+	}()
+	fn()
+	return
+}
+
 func TestPanicAfterGoexit(t *testing.T) {
 	// an uncaught panic should still work after goexit
 	output := runTestProg(t, "testprog", "PanicAfterGoexit")
diff --git a/src/runtime/error.go b/src/runtime/error.go
index 3e1ec4bc5a..15f6bdf014 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -50,6 +50,17 @@ func (e errorString) Error() string {
 	return "runtime error: " + string(e)
 }
 
+// plainError represents a runtime error described a string without
+// the prefix "runtime error: " after invoking errorString.Error().
+// See Issue #14965.
+type plainError string
+
+func (e plainError) RuntimeError() {}
+
+func (e plainError) Error() string {
+	return string(e)
+}
+
 type stringer interface {
 	String() string
 }
@@ -82,5 +93,5 @@ func printany(i interface{}) {
 
 // called from generated code
 func panicwrap(pkg, typ, meth string) {
-	panic("value method " + pkg + "." + typ + "." + meth + " called using nil *" + typ + " pointer")
+	panic(plainError("value method " + pkg + "." + typ + "." + meth + " called using nil *" + typ + " pointer"))
 }
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 80b2b5338c..9e18192cd8 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -194,7 +194,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	}
 
 	if hint < 0 || int64(int32(hint)) != hint {
-		panic("makemap: size out of range")
+		panic(plainError("makemap: size out of range"))
 		// TODO: make hint an int, then none of this nonsense
 	}
 
@@ -428,7 +428,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
 
 func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
 	if h == nil {
-		panic("assignment to entry in nil map")
+		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
 		callerpc := getcallerpc(unsafe.Pointer(&t))
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 5f1e2f64c0..ee4728c9a5 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -793,7 +793,7 @@ func newarray(typ *_type, n uintptr) unsafe.Pointer {
 		flags |= flagNoScan
 	}
 	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/typ.size) {
-		panic("runtime: allocation size out of range")
+		panic(plainError("runtime: allocation size out of range"))
 	}
 	return mallocgc(typ.size*n, typ, flags)
 }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 1f55b0fa21..1a9dbd6c53 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -381,7 +381,7 @@ func badmcall2(fn func(*g)) {
 }
 
 func badreflectcall() {
-	panic("runtime: arg size to reflect.call more than 1GB")
+	panic(plainError("arg size to reflect.call more than 1GB"))
 }
 
 func lockedOSThread() bool {
diff --git a/src/runtime/select.go b/src/runtime/select.go
index c80c833b15..9810db5453 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -594,7 +594,7 @@ retc:
 sclose:
 	// send on closed channel
 	selunlock(scases, lockorder)
-	panic("send on closed channel")
+	panic(plainError("send on closed channel"))
 }
 
 func (c *hchan) sortkey() uintptr {
-- 
cgit v1.3


From 974c201f74f730737964e5239da473fc548b408e Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sun, 10 Apr 2016 10:43:04 -0700
Subject: runtime: avoid unnecessary map iteration write barrier

Update #14921

Change-Id: I5c5816d0193757bf7465b1e09c27ca06897df4bf
Reviewed-on: https://go-review.googlesource.com/21814
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/hashmap.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 9e18192cd8..d549ce4194 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -790,7 +790,9 @@ next:
 				}
 			}
 			it.bucket = bucket
-			it.bptr = b
+			if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
+				it.bptr = b
+			}
 			it.i = i + 1
 			it.checkBucket = checkBucket
 			return
-- 
cgit v1.3


From 6b33b0e98e9be77d98b026ae2adf10dd71be5a1b Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sun, 10 Apr 2016 09:08:00 -0700
Subject: cmd/compile: avoid a spill in append fast path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of spilling newlen, recalculate it.
This removes a spill from the fast path,
at the cost of a cheap recalculation
on the (rare) growth path.
This uses 8 bytes less of stack space.
It generates two more bytes of code,
but that is due to suboptimal register allocation;
see far below.

Runtime append microbenchmarks are all over the map,
presumably due to incidental code movement.

Sample code:

func s(b []byte) []byte {
	b = append(b, 1, 2, 3)
	return b
}

Before:

"".s t=1 size=160 args=0x30 locals=0x48
	0x0000 00000 (append.go:8)	TEXT	"".s(SB), $72-48
	0x0000 00000 (append.go:8)	MOVQ	(TLS), CX
	0x0009 00009 (append.go:8)	CMPQ	SP, 16(CX)
	0x000d 00013 (append.go:8)	JLS	149
	0x0013 00019 (append.go:8)	SUBQ	$72, SP
	0x0017 00023 (append.go:8)	FUNCDATA	$0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB)
	0x0017 00023 (append.go:8)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:9)	MOVQ	"".b+88(FP), CX
	0x001c 00028 (append.go:9)	LEAQ	3(CX), DX
	0x0020 00032 (append.go:9)	MOVQ	DX, "".autotmp_0+64(SP)
	0x0025 00037 (append.go:9)	MOVQ	"".b+96(FP), BX
	0x002a 00042 (append.go:9)	CMPQ	DX, BX
	0x002d 00045 (append.go:9)	JGT	$0, 86
	0x002f 00047 (append.go:8)	MOVQ	"".b+80(FP), AX
	0x0034 00052 (append.go:9)	MOVB	$1, (AX)(CX*1)
	0x0038 00056 (append.go:9)	MOVB	$2, 1(AX)(CX*1)
	0x003d 00061 (append.go:9)	MOVB	$3, 2(AX)(CX*1)
	0x0042 00066 (append.go:10)	MOVQ	AX, "".~r1+104(FP)
	0x0047 00071 (append.go:10)	MOVQ	DX, "".~r1+112(FP)
	0x004c 00076 (append.go:10)	MOVQ	BX, "".~r1+120(FP)
	0x0051 00081 (append.go:10)	ADDQ	$72, SP
	0x0055 00085 (append.go:10)	RET
	0x0056 00086 (append.go:9)	LEAQ	type.[]uint8(SB), AX
	0x005d 00093 (append.go:9)	MOVQ	AX, (SP)
	0x0061 00097 (append.go:9)	MOVQ	"".b+80(FP), BP
	0x0066 00102 (append.go:9)	MOVQ	BP, 8(SP)
	0x006b 00107 (append.go:9)	MOVQ	CX, 16(SP)
	0x0070 00112 (append.go:9)	MOVQ	BX, 24(SP)
	0x0075 00117 (append.go:9)	MOVQ	DX, 32(SP)
	0x007a 00122 (append.go:9)	PCDATA	$0, $0
	0x007a 00122 (append.go:9)	CALL	runtime.growslice(SB)
	0x007f 00127 (append.go:9)	MOVQ	40(SP), AX
	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
	0x0089 00137 (append.go:8)	MOVQ	"".b+88(FP), CX
	0x008e 00142 (append.go:9)	MOVQ	"".autotmp_0+64(SP), DX
	0x0093 00147 (append.go:9)	JMP	52
	0x0095 00149 (append.go:9)	NOP
	0x0095 00149 (append.go:8)	CALL	runtime.morestack_noctxt(SB)
	0x009a 00154 (append.go:8)	JMP	0

After:

"".s t=1 size=176 args=0x30 locals=0x40
	0x0000 00000 (append.go:8)	TEXT	"".s(SB), $64-48
	0x0000 00000 (append.go:8)	MOVQ	(TLS), CX
	0x0009 00009 (append.go:8)	CMPQ	SP, 16(CX)
	0x000d 00013 (append.go:8)	JLS	151
	0x0013 00019 (append.go:8)	SUBQ	$64, SP
	0x0017 00023 (append.go:8)	FUNCDATA	$0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB)
	0x0017 00023 (append.go:8)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:9)	MOVQ	"".b+80(FP), CX
	0x001c 00028 (append.go:9)	LEAQ	3(CX), DX
	0x0020 00032 (append.go:9)	MOVQ	"".b+88(FP), BX
	0x0025 00037 (append.go:9)	CMPQ	DX, BX
	0x0028 00040 (append.go:9)	JGT	$0, 81
	0x002a 00042 (append.go:8)	MOVQ	"".b+72(FP), AX
	0x002f 00047 (append.go:9)	MOVB	$1, (AX)(CX*1)
	0x0033 00051 (append.go:9)	MOVB	$2, 1(AX)(CX*1)
	0x0038 00056 (append.go:9)	MOVB	$3, 2(AX)(CX*1)
	0x003d 00061 (append.go:10)	MOVQ	AX, "".~r1+96(FP)
	0x0042 00066 (append.go:10)	MOVQ	DX, "".~r1+104(FP)
	0x0047 00071 (append.go:10)	MOVQ	BX, "".~r1+112(FP)
	0x004c 00076 (append.go:10)	ADDQ	$64, SP
	0x0050 00080 (append.go:10)	RET
	0x0051 00081 (append.go:9)	LEAQ	type.[]uint8(SB), AX
	0x0058 00088 (append.go:9)	MOVQ	AX, (SP)
	0x005c 00092 (append.go:9)	MOVQ	"".b+72(FP), BP
	0x0061 00097 (append.go:9)	MOVQ	BP, 8(SP)
	0x0066 00102 (append.go:9)	MOVQ	CX, 16(SP)
	0x006b 00107 (append.go:9)	MOVQ	BX, 24(SP)
	0x0070 00112 (append.go:9)	MOVQ	DX, 32(SP)
	0x0075 00117 (append.go:9)	PCDATA	$0, $0
	0x0075 00117 (append.go:9)	CALL	runtime.growslice(SB)
	0x007a 00122 (append.go:9)	MOVQ	40(SP), AX
	0x007f 00127 (append.go:9)	MOVQ	48(SP), CX
	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
	0x0089 00137 (append.go:9)	ADDQ	$3, CX
	0x008d 00141 (append.go:9)	MOVQ	CX, DX
	0x0090 00144 (append.go:8)	MOVQ	"".b+80(FP), CX
	0x0095 00149 (append.go:9)	JMP	47
	0x0097 00151 (append.go:9)	NOP
	0x0097 00151 (append.go:8)	CALL	runtime.morestack_noctxt(SB)
	0x009c 00156 (append.go:8)	JMP	0

Observe that in the following sequence,
we should use DX directly instead of using
CX as a temporary register, which would make
the new code a strict improvement on the old:

	0x007f 00127 (append.go:9)	MOVQ	48(SP), CX
	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
	0x0089 00137 (append.go:9)	ADDQ	$3, CX
	0x008d 00141 (append.go:9)	MOVQ	CX, DX
	0x0090 00144 (append.go:8)	MOVQ	"".b+80(FP), CX

Change-Id: I4ee50b18fa53865901d2d7f86c2cbb54c6fa6924
Reviewed-on: https://go-review.googlesource.com/21812
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/ssa.go | 30 +++++++++++++++++-------------
 src/runtime/slice.go               |  6 ++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 7c5f906d76..d69559d945 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -337,12 +337,13 @@ var (
 	memVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "mem"}}
 
 	// dummy nodes for temporary variables
-	ptrVar   = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "ptr"}}
-	capVar   = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "cap"}}
-	typVar   = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "typ"}}
-	idataVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "idata"}}
-	okVar    = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "ok"}}
-	deltaVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "delta"}}
+	ptrVar    = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "ptr"}}
+	newlenVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "newlen"}}
+	capVar    = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "cap"}}
+	typVar    = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "typ"}}
+	idataVar  = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "idata"}}
+	okVar     = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "ok"}}
+	deltaVar  = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "delta"}}
 )
 
 // startBlock sets the current block we're generating code in to b.
@@ -2089,15 +2090,16 @@ func (s *state) expr(n *Node) *ssa.Value {
 // exprAppend converts an OAPPEND node n to an ssa.Value, adds it to s, and returns the Value.
 func (s *state) exprAppend(n *Node) *ssa.Value {
 	// append(s, e1, e2, e3).  Compile like:
-	// ptr,len,cap := s
+	// ptr, len, cap := s
 	// newlen := len + 3
 	// if newlen > s.cap {
-	//     ptr,_,cap = growslice(s, newlen)
+	//     ptr, len, cap = growslice(s, newlen)
+	//     newlen = len + 3 // recalculate to avoid a spill
 	// }
 	// *(ptr+len) = e1
 	// *(ptr+len+1) = e2
 	// *(ptr+len+2) = e3
-	// makeslice(ptr,newlen,cap)
+	// makeslice(ptr, newlen, cap)
 
 	et := n.Type.Elem()
 	pt := Ptrto(et)
@@ -2117,6 +2119,7 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
 	nl := s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], l, s.constInt(Types[TINT], nargs))
 	cmp := s.newValue2(s.ssaOp(OGT, Types[TINT]), Types[TBOOL], nl, c)
 	s.vars[&ptrVar] = p
+	s.vars[&newlenVar] = nl
 	s.vars[&capVar] = c
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
@@ -2132,8 +2135,7 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
 	r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl)
 
 	s.vars[&ptrVar] = r[0]
-	// Note: we don't need to read r[1], the result's length. It will be nl.
-	// (or maybe we should, we just have to spill/restore nl otherwise?)
+	s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
 	s.vars[&capVar] = r[2]
 	b = s.endBlock()
 	b.AddEdgeTo(assign)
@@ -2154,8 +2156,9 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
 		}
 	}
 
-	p = s.variable(&ptrVar, pt)          // generates phi for ptr
-	c = s.variable(&capVar, Types[TINT]) // generates phi for cap
+	p = s.variable(&ptrVar, pt)              // generates phi for ptr
+	nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
+	c = s.variable(&capVar, Types[TINT])     // generates phi for cap
 	p2 := s.newValue2(ssa.OpPtrIndex, pt, p, l)
 	// TODO: just one write barrier call for all of these writes?
 	// TODO: maybe just one writeBarrier.enabled check?
@@ -2178,6 +2181,7 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
 
 	// make result
 	delete(s.vars, &ptrVar)
+	delete(s.vars, &newlenVar)
 	delete(s.vars, &capVar)
 	return s.newValue3(ssa.OpSliceMake, n.Type, p, nl, c)
 }
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 0bc0299f72..4ab221056c 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -37,6 +37,12 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 // It is passed the slice type, the old slice, and the desired new minimum capacity,
 // and it returns a new slice with at least that capacity, with the old data
 // copied into it.
+// The new slice's length is set to the old slice's length,
+// NOT to the new requested capacity.
+// This is for codegen convenience. The old slice's length is used immediately
+// to calculate where to write new values during an append.
+// TODO: When the old backend is gone, reconsider this decision.
+// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
 func growslice(t *slicetype, old slice, cap int) slice {
 	if raceenabled {
 		callerpc := getcallerpc(unsafe.Pointer(&t))
-- 
cgit v1.3


From ad7448fe982d83de15deec9c55c56d0cd9261c6c Mon Sep 17 00:00:00 2001
From: Martin Möhrmann <martisch@uos.de>
Date: Sun, 10 Apr 2016 17:32:35 +0200
Subject: runtime: speed up makeslice by avoiding divisions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only compute the number of maximum allowed elements per slice once.

name         old time/op  new time/op  delta
MakeSlice-2  55.5ns ± 1%  45.6ns ± 2%  -17.88%  (p=0.000 n=99+100)

Change-Id: I951feffda5d11910a75e55d7e978d306d14da2c5
Reviewed-on: https://go-review.googlesource.com/21801
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/append_test.go |  8 ++++++++
 src/runtime/slice.go       | 14 +++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go
index 3170870b0e..6d7836a351 100644
--- a/src/runtime/append_test.go
+++ b/src/runtime/append_test.go
@@ -7,6 +7,14 @@ import "testing"
 
 const N = 20
 
+func BenchmarkMakeSlice(b *testing.B) {
+	var x []byte
+	for i := 0; i < b.N; i++ {
+		x = make([]byte, 32)
+		_ = x
+	}
+}
+
 func BenchmarkGrowSliceBytes(b *testing.B) {
 	b.StopTimer()
 	var x = make([]byte, 9)
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 4ab221056c..f36ec0b466 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -16,19 +16,27 @@ type slice struct {
 
 // TODO: take uintptrs instead of int64s?
 func makeslice(t *slicetype, len64, cap64 int64) slice {
-	// NOTE: The len > MaxMem/elemsize check here is not strictly necessary,
+	// NOTE: The len > maxElements check here is not strictly necessary,
 	// but it produces a 'len out of range' error instead of a 'cap out of range' error
 	// when someone does make([]T, bignumber). 'cap out of range' is true too,
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
+
+	maxElements := ^uintptr(0)
+	if t.elem.size > 0 {
+		maxElements = _MaxMem / t.elem.size
+	}
+
 	len := int(len64)
-	if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > _MaxMem/t.elem.size {
+	if len64 < 0 || int64(len) != len64 || uintptr(len) > maxElements {
 		panic(errorString("makeslice: len out of range"))
 	}
+
 	cap := int(cap64)
-	if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > _MaxMem/t.elem.size {
+	if cap < len || int64(cap) != cap64 || uintptr(cap) > maxElements {
 		panic(errorString("makeslice: cap out of range"))
 	}
+
 	p := newarray(t.elem, uintptr(cap))
 	return slice{p, len, cap}
 }
-- 
cgit v1.3


From ba09d06e166a06b4405b2ffd92df6acf222d281f Mon Sep 17 00:00:00 2001
From: Jeremy Jackins <jeremyjackins@gmail.com>
Date: Thu, 7 Apr 2016 15:42:35 +0900
Subject: runtime: remove remaining references to TheChar

After mdempsky's recent changes, these are the only references to
"TheChar" left in the Go tree. Without the context, and without
knowing the history, this is confusing.

Also rename sys.TheGoos and sys.TheGoarch to sys.GOOS
and sys.GOARCH.

Also change the heap dump format to include sys.GOARCH
rather than TheChar, which is no longer a concept.

Updates #15169 (changes heapdump format)

Change-Id: I3e99eeeae00ed55d7d01e6ed503d958c6e931dca
Reviewed-on: https://go-review.googlesource.com/21647
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
---
 src/runtime/extern.go                        |  4 ++--
 src/runtime/heapdump.go                      |  2 +-
 src/runtime/internal/sys/arch.go             | 17 +++++++++++++++++
 src/runtime/internal/sys/arch_386.go         |  2 +-
 src/runtime/internal/sys/arch_amd64.go       |  2 +-
 src/runtime/internal/sys/arch_amd64p32.go    |  2 +-
 src/runtime/internal/sys/arch_arm.go         |  2 +-
 src/runtime/internal/sys/arch_arm64.go       |  2 +-
 src/runtime/internal/sys/arch_mips64.go      |  2 +-
 src/runtime/internal/sys/arch_mips64le.go    |  2 +-
 src/runtime/internal/sys/arch_ppc64.go       |  2 +-
 src/runtime/internal/sys/arch_ppc64le.go     |  2 +-
 src/runtime/internal/sys/arch_s390x.go       |  2 +-
 src/runtime/internal/sys/gengoos.go          |  4 ++--
 src/runtime/internal/sys/zgoarch_386.go      |  2 +-
 src/runtime/internal/sys/zgoarch_amd64.go    |  2 +-
 src/runtime/internal/sys/zgoarch_amd64p32.go |  2 +-
 src/runtime/internal/sys/zgoarch_arm.go      |  2 +-
 src/runtime/internal/sys/zgoarch_arm64.go    |  2 +-
 src/runtime/internal/sys/zgoarch_mips64.go   |  2 +-
 src/runtime/internal/sys/zgoarch_mips64le.go |  2 +-
 src/runtime/internal/sys/zgoarch_ppc64.go    |  2 +-
 src/runtime/internal/sys/zgoarch_ppc64le.go  |  2 +-
 src/runtime/internal/sys/zgoarch_s390x.go    |  2 +-
 src/runtime/internal/sys/zgoos_android.go    |  2 +-
 src/runtime/internal/sys/zgoos_darwin.go     |  2 +-
 src/runtime/internal/sys/zgoos_dragonfly.go  |  2 +-
 src/runtime/internal/sys/zgoos_freebsd.go    |  2 +-
 src/runtime/internal/sys/zgoos_linux.go      |  2 +-
 src/runtime/internal/sys/zgoos_nacl.go       |  2 +-
 src/runtime/internal/sys/zgoos_netbsd.go     |  2 +-
 src/runtime/internal/sys/zgoos_openbsd.go    |  2 +-
 src/runtime/internal/sys/zgoos_plan9.go      |  2 +-
 src/runtime/internal/sys/zgoos_solaris.go    |  2 +-
 src/runtime/internal/sys/zgoos_windows.go    |  2 +-
 src/runtime/mgcmark.go                       |  4 ++--
 src/runtime/stack.go                         |  8 ++++----
 37 files changed, 59 insertions(+), 42 deletions(-)
 create mode 100644 src/runtime/internal/sys/arch.go

(limited to 'src/runtime')

diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 1d8304f4fc..1df8691cfc 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -224,8 +224,8 @@ func Version() string {
 
 // GOOS is the running program's operating system target:
 // one of darwin, freebsd, linux, and so on.
-const GOOS string = sys.TheGoos
+const GOOS string = sys.GOOS
 
 // GOARCH is the running program's architecture target:
 // 386, amd64, arm, or s390x.
-const GOARCH string = sys.TheGoarch
+const GOARCH string = sys.GOARCH
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index e6a41f7f97..2410b1954a 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -496,7 +496,7 @@ func dumpparams() {
 	dumpint(sys.PtrSize)
 	dumpint(uint64(mheap_.arena_start))
 	dumpint(uint64(mheap_.arena_used))
-	dumpint(sys.TheChar)
+	dumpstr(sys.GOARCH)
 	dumpstr(sys.Goexperiment)
 	dumpint(uint64(ncpu))
 }
diff --git a/src/runtime/internal/sys/arch.go b/src/runtime/internal/sys/arch.go
new file mode 100644
index 0000000000..c1757041d8
--- /dev/null
+++ b/src/runtime/internal/sys/arch.go
@@ -0,0 +1,17 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+type ArchFamilyType int
+
+const (
+	AMD64 ArchFamilyType = iota
+	ARM
+	ARM64
+	I386
+	MIPS64
+	PPC64
+	S390X
+)
diff --git a/src/runtime/internal/sys/arch_386.go b/src/runtime/internal/sys/arch_386.go
index 1f1c704f9a..48c42f7584 100644
--- a/src/runtime/internal/sys/arch_386.go
+++ b/src/runtime/internal/sys/arch_386.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '8'
+	ArchFamily    = I386
 	BigEndian     = 0
 	CacheLineSize = 64
 	PhysPageSize  = GoosNacl*65536 + (1-GoosNacl)*4096 // 4k normally; 64k on NaCl
diff --git a/src/runtime/internal/sys/arch_amd64.go b/src/runtime/internal/sys/arch_amd64.go
index 80fff557f2..1bbdb99e07 100644
--- a/src/runtime/internal/sys/arch_amd64.go
+++ b/src/runtime/internal/sys/arch_amd64.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '6'
+	ArchFamily    = AMD64
 	BigEndian     = 0
 	CacheLineSize = 64
 	PhysPageSize  = 4096
diff --git a/src/runtime/internal/sys/arch_amd64p32.go b/src/runtime/internal/sys/arch_amd64p32.go
index ca29f698a2..b7011a4ff2 100644
--- a/src/runtime/internal/sys/arch_amd64p32.go
+++ b/src/runtime/internal/sys/arch_amd64p32.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '6'
+	ArchFamily    = AMD64
 	BigEndian     = 0
 	CacheLineSize = 64
 	PhysPageSize  = 65536*GoosNacl + 4096*(1-GoosNacl)
diff --git a/src/runtime/internal/sys/arch_arm.go b/src/runtime/internal/sys/arch_arm.go
index b185e8fb69..f90f52da7f 100644
--- a/src/runtime/internal/sys/arch_arm.go
+++ b/src/runtime/internal/sys/arch_arm.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '5'
+	ArchFamily    = ARM
 	BigEndian     = 0
 	CacheLineSize = 32
 	PhysPageSize  = 65536*GoosNacl + 4096*(1-GoosNacl)
diff --git a/src/runtime/internal/sys/arch_arm64.go b/src/runtime/internal/sys/arch_arm64.go
index b63a7a6f9a..aaaa4b0947 100644
--- a/src/runtime/internal/sys/arch_arm64.go
+++ b/src/runtime/internal/sys/arch_arm64.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '7'
+	ArchFamily    = ARM64
 	BigEndian     = 0
 	CacheLineSize = 32
 	PhysPageSize  = 65536
diff --git a/src/runtime/internal/sys/arch_mips64.go b/src/runtime/internal/sys/arch_mips64.go
index 5b933d4e1a..d5672599d2 100644
--- a/src/runtime/internal/sys/arch_mips64.go
+++ b/src/runtime/internal/sys/arch_mips64.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '0'
+	ArchFamily    = MIPS64
 	BigEndian     = 1
 	CacheLineSize = 32
 	PhysPageSize  = 16384
diff --git a/src/runtime/internal/sys/arch_mips64le.go b/src/runtime/internal/sys/arch_mips64le.go
index ce2e98b19f..f8cdf2b2d2 100644
--- a/src/runtime/internal/sys/arch_mips64le.go
+++ b/src/runtime/internal/sys/arch_mips64le.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '0'
+	ArchFamily    = MIPS64
 	BigEndian     = 0
 	CacheLineSize = 32
 	PhysPageSize  = 16384
diff --git a/src/runtime/internal/sys/arch_ppc64.go b/src/runtime/internal/sys/arch_ppc64.go
index 3aa07e1f56..cdec63ff71 100644
--- a/src/runtime/internal/sys/arch_ppc64.go
+++ b/src/runtime/internal/sys/arch_ppc64.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '9'
+	ArchFamily    = PPC64
 	BigEndian     = 1
 	CacheLineSize = 64
 	PhysPageSize  = 65536
diff --git a/src/runtime/internal/sys/arch_ppc64le.go b/src/runtime/internal/sys/arch_ppc64le.go
index 0f02f0bf3c..4fd68f9ce3 100644
--- a/src/runtime/internal/sys/arch_ppc64le.go
+++ b/src/runtime/internal/sys/arch_ppc64le.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = '9'
+	ArchFamily    = PPC64
 	BigEndian     = 0
 	CacheLineSize = 64
 	PhysPageSize  = 65536
diff --git a/src/runtime/internal/sys/arch_s390x.go b/src/runtime/internal/sys/arch_s390x.go
index 8690571c81..ca1cb8646e 100644
--- a/src/runtime/internal/sys/arch_s390x.go
+++ b/src/runtime/internal/sys/arch_s390x.go
@@ -5,7 +5,7 @@
 package sys
 
 const (
-	TheChar       = 'z'
+	ArchFamily    = S390X
 	BigEndian     = 1
 	CacheLineSize = 256
 	PhysPageSize  = 4096
diff --git a/src/runtime/internal/sys/gengoos.go b/src/runtime/internal/sys/gengoos.go
index e2bd87de4e..4c45c0af02 100644
--- a/src/runtime/internal/sys/gengoos.go
+++ b/src/runtime/internal/sys/gengoos.go
@@ -50,7 +50,7 @@ func main() {
 			fmt.Fprintf(&buf, "// +build !android\n\n") // must explicitly exclude android for linux
 		}
 		fmt.Fprintf(&buf, "package sys\n\n")
-		fmt.Fprintf(&buf, "const TheGoos = `%s`\n\n", target)
+		fmt.Fprintf(&buf, "const GOOS = `%s`\n\n", target)
 		for _, goos := range gooses {
 			value := 0
 			if goos == target {
@@ -68,7 +68,7 @@ func main() {
 		var buf bytes.Buffer
 		fmt.Fprintf(&buf, "// generated by gengoos.go using 'go generate'\n\n")
 		fmt.Fprintf(&buf, "package sys\n\n")
-		fmt.Fprintf(&buf, "const TheGoarch = `%s`\n\n", target)
+		fmt.Fprintf(&buf, "const GOARCH = `%s`\n\n", target)
 		for _, goarch := range goarches {
 			value := 0
 			if goarch == target {
diff --git a/src/runtime/internal/sys/zgoarch_386.go b/src/runtime/internal/sys/zgoarch_386.go
index 3ad244509d..3bcf83b8e3 100644
--- a/src/runtime/internal/sys/zgoarch_386.go
+++ b/src/runtime/internal/sys/zgoarch_386.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `386`
+const GOARCH = `386`
 
 const Goarch386 = 1
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_amd64.go b/src/runtime/internal/sys/zgoarch_amd64.go
index 7c858e3f5d..699f191fba 100644
--- a/src/runtime/internal/sys/zgoarch_amd64.go
+++ b/src/runtime/internal/sys/zgoarch_amd64.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `amd64`
+const GOARCH = `amd64`
 
 const Goarch386 = 0
 const GoarchAmd64 = 1
diff --git a/src/runtime/internal/sys/zgoarch_amd64p32.go b/src/runtime/internal/sys/zgoarch_amd64p32.go
index 772031c090..cc2d658406 100644
--- a/src/runtime/internal/sys/zgoarch_amd64p32.go
+++ b/src/runtime/internal/sys/zgoarch_amd64p32.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `amd64p32`
+const GOARCH = `amd64p32`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_arm.go b/src/runtime/internal/sys/zgoarch_arm.go
index 276e8a869b..a5fd789f13 100644
--- a/src/runtime/internal/sys/zgoarch_arm.go
+++ b/src/runtime/internal/sys/zgoarch_arm.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `arm`
+const GOARCH = `arm`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_arm64.go b/src/runtime/internal/sys/zgoarch_arm64.go
index d124ec0343..084d2c7330 100644
--- a/src/runtime/internal/sys/zgoarch_arm64.go
+++ b/src/runtime/internal/sys/zgoarch_arm64.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `arm64`
+const GOARCH = `arm64`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_mips64.go b/src/runtime/internal/sys/zgoarch_mips64.go
index b4a97d6da9..2ad62bd68c 100644
--- a/src/runtime/internal/sys/zgoarch_mips64.go
+++ b/src/runtime/internal/sys/zgoarch_mips64.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `mips64`
+const GOARCH = `mips64`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_mips64le.go b/src/runtime/internal/sys/zgoarch_mips64le.go
index 3328a35bd2..047c8b425a 100644
--- a/src/runtime/internal/sys/zgoarch_mips64le.go
+++ b/src/runtime/internal/sys/zgoarch_mips64le.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `mips64le`
+const GOARCH = `mips64le`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_ppc64.go b/src/runtime/internal/sys/zgoarch_ppc64.go
index 06f78b2023..748b5b562c 100644
--- a/src/runtime/internal/sys/zgoarch_ppc64.go
+++ b/src/runtime/internal/sys/zgoarch_ppc64.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `ppc64`
+const GOARCH = `ppc64`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_ppc64le.go b/src/runtime/internal/sys/zgoarch_ppc64le.go
index 50b56dbe3f..d3dcba467d 100644
--- a/src/runtime/internal/sys/zgoarch_ppc64le.go
+++ b/src/runtime/internal/sys/zgoarch_ppc64le.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `ppc64le`
+const GOARCH = `ppc64le`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoarch_s390x.go b/src/runtime/internal/sys/zgoarch_s390x.go
index ce85f20e0a..1ead5d573c 100644
--- a/src/runtime/internal/sys/zgoarch_s390x.go
+++ b/src/runtime/internal/sys/zgoarch_s390x.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoarch = `s390x`
+const GOARCH = `s390x`
 
 const Goarch386 = 0
 const GoarchAmd64 = 0
diff --git a/src/runtime/internal/sys/zgoos_android.go b/src/runtime/internal/sys/zgoos_android.go
index 03d91760ed..6503b15246 100644
--- a/src/runtime/internal/sys/zgoos_android.go
+++ b/src/runtime/internal/sys/zgoos_android.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `android`
+const GOOS = `android`
 
 const GoosAndroid = 1
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_darwin.go b/src/runtime/internal/sys/zgoos_darwin.go
index eb2efeb7af..6a285984bd 100644
--- a/src/runtime/internal/sys/zgoos_darwin.go
+++ b/src/runtime/internal/sys/zgoos_darwin.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `darwin`
+const GOOS = `darwin`
 
 const GoosAndroid = 0
 const GoosDarwin = 1
diff --git a/src/runtime/internal/sys/zgoos_dragonfly.go b/src/runtime/internal/sys/zgoos_dragonfly.go
index 403cf65311..886ac2698f 100644
--- a/src/runtime/internal/sys/zgoos_dragonfly.go
+++ b/src/runtime/internal/sys/zgoos_dragonfly.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `dragonfly`
+const GOOS = `dragonfly`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_freebsd.go b/src/runtime/internal/sys/zgoos_freebsd.go
index 632d5db9db..0bf2403eab 100644
--- a/src/runtime/internal/sys/zgoos_freebsd.go
+++ b/src/runtime/internal/sys/zgoos_freebsd.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `freebsd`
+const GOOS = `freebsd`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_linux.go b/src/runtime/internal/sys/zgoos_linux.go
index 2d43869a84..c8664db15d 100644
--- a/src/runtime/internal/sys/zgoos_linux.go
+++ b/src/runtime/internal/sys/zgoos_linux.go
@@ -4,7 +4,7 @@
 
 package sys
 
-const TheGoos = `linux`
+const GOOS = `linux`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_nacl.go b/src/runtime/internal/sys/zgoos_nacl.go
index a56b6ef3c9..054122638a 100644
--- a/src/runtime/internal/sys/zgoos_nacl.go
+++ b/src/runtime/internal/sys/zgoos_nacl.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `nacl`
+const GOOS = `nacl`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_netbsd.go b/src/runtime/internal/sys/zgoos_netbsd.go
index 46fd0a7cd5..5c509a1250 100644
--- a/src/runtime/internal/sys/zgoos_netbsd.go
+++ b/src/runtime/internal/sys/zgoos_netbsd.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `netbsd`
+const GOOS = `netbsd`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_openbsd.go b/src/runtime/internal/sys/zgoos_openbsd.go
index 7ee650afbb..dc43157d49 100644
--- a/src/runtime/internal/sys/zgoos_openbsd.go
+++ b/src/runtime/internal/sys/zgoos_openbsd.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `openbsd`
+const GOOS = `openbsd`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_plan9.go b/src/runtime/internal/sys/zgoos_plan9.go
index 162e7f6260..4b0934f77a 100644
--- a/src/runtime/internal/sys/zgoos_plan9.go
+++ b/src/runtime/internal/sys/zgoos_plan9.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `plan9`
+const GOOS = `plan9`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_solaris.go b/src/runtime/internal/sys/zgoos_solaris.go
index b2a8f98504..42511a36ad 100644
--- a/src/runtime/internal/sys/zgoos_solaris.go
+++ b/src/runtime/internal/sys/zgoos_solaris.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `solaris`
+const GOOS = `solaris`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/internal/sys/zgoos_windows.go b/src/runtime/internal/sys/zgoos_windows.go
index 817ec79e4c..d77f62c396 100644
--- a/src/runtime/internal/sys/zgoos_windows.go
+++ b/src/runtime/internal/sys/zgoos_windows.go
@@ -2,7 +2,7 @@
 
 package sys
 
-const TheGoos = `windows`
+const GOOS = `windows`
 
 const GoosAndroid = 0
 const GoosDarwin = 0
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 66d61bae1e..1ab8315a29 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -719,8 +719,8 @@ func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
 	// Scan local variables if stack frame has been allocated.
 	size := frame.varp - frame.sp
 	var minsize uintptr
-	switch sys.TheChar {
-	case '7':
+	switch sys.ArchFamily {
+	case sys.ARM64:
 		minsize = sys.SpAlign
 	default:
 		minsize = sys.MinFrameSize
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index fdd6710bad..dcb1b06dbd 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -634,8 +634,8 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
 	// Adjust local variables if stack frame has been allocated.
 	size := frame.varp - frame.sp
 	var minsize uintptr
-	switch sys.TheChar {
-	case '7':
+	switch sys.ArchFamily {
+	case sys.ARM64:
 		minsize = sys.SpAlign
 	default:
 		minsize = sys.MinFrameSize
@@ -662,7 +662,7 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
 	}
 
 	// Adjust saved base pointer if there is one.
-	if sys.TheChar == '6' && frame.argp-frame.varp == 2*sys.RegSize {
+	if sys.ArchFamily == sys.AMD64 && frame.argp-frame.varp == 2*sys.RegSize {
 		if !framepointer_enabled {
 			print("runtime: found space for saved base pointer, but no framepointer experiment\n")
 			print("argp=", hex(frame.argp), " varp=", hex(frame.varp), "\n")
@@ -969,7 +969,7 @@ func newstack() {
 		throw("missing stack in newstack")
 	}
 	sp := gp.sched.sp
-	if sys.TheChar == '6' || sys.TheChar == '8' {
+	if sys.ArchFamily == sys.AMD64 || sys.ArchFamily == sys.I386 {
 		// The call to morestack cost a word.
 		sp -= sys.PtrSize
 	}
-- 
cgit v1.3


From 720c4c016c75d37d14e0621696127819c8a73b0b Mon Sep 17 00:00:00 2001
From: Dave Cheney <dave@cheney.net>
Date: Fri, 8 Apr 2016 17:50:40 +1000
Subject: runtime: merge lfstack_amd64.go into lfstack_64bit.go

Merge the amd64 lfstack implementation into the general 64 bit
implementation.

Change-Id: Id9ed61b90d2e3bc3b0246294c03eb2c92803b6ca
Reviewed-on: https://go-review.googlesource.com/21707
Run-TryBot: Dave Cheney <dave@cheney.net>
Reviewed-by: Minux Ma <minux@golang.org>
---
 src/runtime/lfstack_64bit.go | 11 ++++++++++-
 src/runtime/lfstack_amd64.go | 22 ----------------------
 2 files changed, 10 insertions(+), 23 deletions(-)
 delete mode 100644 src/runtime/lfstack_amd64.go

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack_64bit.go b/src/runtime/lfstack_64bit.go
index 8180b0a248..5367f08c56 100644
--- a/src/runtime/lfstack_64bit.go
+++ b/src/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build arm64 mips64 mips64le ppc64 ppc64le s390x
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x
 
 package runtime
 
@@ -22,6 +22,10 @@ const (
 	// s390x         TASK_SIZE         0x020000000000UL (41 bit addresses)
 	//
 	// These values may increase over time.
+	//
+	// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+	// We shift the address left 16 to eliminate the sign extended part and make
+	// room in the bottom for the count.
 	addrBits = 48
 
 	// In addition to the 16 bits taken from the top, we can take 3 from the
@@ -35,5 +39,10 @@ func lfstackPack(node *lfnode, cnt uintptr) uint64 {
 }
 
 func lfstackUnpack(val uint64) *lfnode {
+	if GOARCH == "amd64" {
+		// amd64 systems can place the stack above the VA hole, so we need to sign extend
+		// val before unpacking.
+		return (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> cntBits << 3)))
+	}
 	return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3)))
 }
diff --git a/src/runtime/lfstack_amd64.go b/src/runtime/lfstack_amd64.go
deleted file mode 100644
index 6397e1d47f..0000000000
--- a/src/runtime/lfstack_amd64.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
-// We shift the address left 16 to eliminate the sign extended part and make
-// room in the bottom for the count.
-// In addition to the 16 bits taken from the top, we can take 3 from the
-// bottom, because node must be pointer-aligned, giving a total of 19 bits
-// of count.
-
-func lfstackPack(node *lfnode, cnt uintptr) uint64 {
-	return uint64(uintptr(unsafe.Pointer(node)))<<16 | uint64(cnt&(1<<19-1))
-}
-
-func lfstackUnpack(val uint64) *lfnode {
-	return (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
-}
-- 
cgit v1.3


From 683917a72154e3409e1ab5ef5b26030388312d0b Mon Sep 17 00:00:00 2001
From: Dominik Honnef <dominik@honnef.co>
Date: Fri, 1 Apr 2016 07:34:18 +0200
Subject: all: use bytes.Equal, bytes.Contains and strings.Contains, again

The previous cleanup was done with a buggy tool, missing some potential
rewrites.

Change-Id: I333467036e355f999a6a493e8de87e084f374e26
Reviewed-on: https://go-review.googlesource.com/21378
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/cmd/go/go_test.go          | 4 ++--
 src/html/template/url.go       | 2 +-
 src/net/http/serve_test.go     | 2 +-
 src/path/filepath/path_test.go | 2 +-
 src/runtime/gcinfo_test.go     | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go
index 411fd1e322..42efa9f312 100644
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@@ -1466,7 +1466,7 @@ func TestGoTestWithPackageListedMultipleTimes(t *testing.T) {
 	defer tg.cleanup()
 	tg.parallel()
 	tg.run("test", "errors", "errors", "errors", "errors", "errors")
-	if strings.Index(strings.TrimSpace(tg.getStdout()), "\n") != -1 {
+	if strings.Contains(strings.TrimSpace(tg.getStdout()), "\n") {
 		t.Error("go test errors errors errors errors errors tested the same package multiple times")
 	}
 }
@@ -1495,7 +1495,7 @@ func TestGoListCmdOnlyShowsCommands(t *testing.T) {
 	tg.run("list", "cmd")
 	out := strings.TrimSpace(tg.getStdout())
 	for _, line := range strings.Split(out, "\n") {
-		if strings.Index(line, "cmd/") == -1 {
+		if !strings.Contains(line, "cmd/") {
 			t.Error("go list cmd shows non-commands")
 			break
 		}
diff --git a/src/html/template/url.go b/src/html/template/url.go
index 2ca76bf389..246bfd32cd 100644
--- a/src/html/template/url.go
+++ b/src/html/template/url.go
@@ -17,7 +17,7 @@ func urlFilter(args ...interface{}) string {
 	if t == contentTypeURL {
 		return s
 	}
-	if i := strings.IndexRune(s, ':'); i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
 		protocol := strings.ToLower(s[:i])
 		if protocol != "http" && protocol != "https" && protocol != "mailto" {
 			return "#" + filterFailsafe
diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go
index 4cd6ed077f..e0094234de 100644
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@@ -4267,7 +4267,7 @@ func BenchmarkClient(b *testing.B) {
 		if err != nil {
 			b.Fatalf("ReadAll: %v", err)
 		}
-		if bytes.Compare(body, data) != 0 {
+		if !bytes.Equal(body, data) {
 			b.Fatalf("Got body: %q", body)
 		}
 	}
diff --git a/src/path/filepath/path_test.go b/src/path/filepath/path_test.go
index 3622f9178e..1a4a9d2a1a 100644
--- a/src/path/filepath/path_test.go
+++ b/src/path/filepath/path_test.go
@@ -1015,7 +1015,7 @@ func TestAbs(t *testing.T) {
 		vol := filepath.VolumeName(root)
 		var extra []string
 		for _, path := range absTests {
-			if strings.Index(path, "$") != -1 {
+			if strings.Contains(path, "$") {
 				continue
 			}
 			path = vol + path
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index c1c2354bf9..9a61b4f2b2 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -59,7 +59,7 @@ func TestGCInfo(t *testing.T) {
 
 func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
 	mask := runtime.GCMask(p)
-	if bytes.Compare(mask, mask0) != 0 {
+	if !bytes.Equal(mask, mask0) {
 		t.Errorf("bad GC program for %v:\nwant %+v\ngot  %+v", name, mask0, mask)
 		return
 	}
-- 
cgit v1.3


From 3fafe2e8888dadb6877fa1e7569f5bd1f688dd3a Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Mon, 11 Apr 2016 08:57:52 +0200
Subject: internal/trace: support parsing of 1.5 traces

1. Parse out version from trace header.
2. Restore handling of 1.5 traces.
3. Restore optional symbolization of traces.
4. Add some canned 1.5 traces for regression testing
   (http benchmark trace, runtime/trace stress traces,
    plus one with broken timestamps).

Change-Id: Idb18a001d03ded8e13c2730eeeb37c5836e31256
Reviewed-on: https://go-review.googlesource.com/21803
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
---
 misc/nacl/testzip.proto                            |   4 +
 src/cmd/trace/main.go                              |   2 +-
 src/internal/trace/parser.go                       | 314 +++++++++++++++------
 src/internal/trace/parser_test.go                  |  61 +++-
 src/internal/trace/testdata/http_1_5_good          | Bin 0 -> 42218 bytes
 src/internal/trace/testdata/stress_1_5_good        | Bin 0 -> 7446 bytes
 src/internal/trace/testdata/stress_1_5_unordered   | Bin 0 -> 8194 bytes
 .../trace/testdata/stress_start_stop_1_5_good      | Bin 0 -> 6997 bytes
 src/runtime/trace.go                               |   2 +-
 src/runtime/trace/trace_test.go                    |   4 +-
 10 files changed, 295 insertions(+), 92 deletions(-)
 create mode 100644 src/internal/trace/testdata/http_1_5_good
 create mode 100644 src/internal/trace/testdata/stress_1_5_good
 create mode 100644 src/internal/trace/testdata/stress_1_5_unordered
 create mode 100644 src/internal/trace/testdata/stress_start_stop_1_5_good

(limited to 'src/runtime')

diff --git a/misc/nacl/testzip.proto b/misc/nacl/testzip.proto
index 42db92f327..8c14b87f0a 100644
--- a/misc/nacl/testzip.proto
+++ b/misc/nacl/testzip.proto
@@ -109,6 +109,10 @@ go	src=..
 			png
 				testdata
 					+
+		internal
+			trace
+				testdata
+					+
 		io
 			+
 		mime
diff --git a/src/cmd/trace/main.go b/src/cmd/trace/main.go
index 12bf8c3c16..cfd222e132 100644
--- a/src/cmd/trace/main.go
+++ b/src/cmd/trace/main.go
@@ -99,7 +99,7 @@ func parseEvents() ([]*trace.Event, error) {
 		defer tracef.Close()
 
 		// Parse and symbolize.
-		events, err := trace.Parse(bufio.NewReader(tracef))
+		events, err := trace.Parse(bufio.NewReader(tracef), programBinary)
 		if err != nil {
 			loader.err = fmt.Errorf("failed to parse trace: %v", err)
 			return
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index 65530b15c3..82ddb8b6c8 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -5,17 +5,22 @@
 package trace
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
 	"io"
+	"os"
+	"os/exec"
 	"sort"
+	"strconv"
+	"strings"
 )
 
 // Event describes one event in the trace.
 type Event struct {
 	Off   int       // offset in input file (for debugging and error reporting)
 	Type  byte      // one of Ev*
-	Seq   int64     // sequence number
+	seq   int64     // sequence number
 	Ts    int64     // timestamp in nanoseconds
 	P     int       // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
 	G     uint64    // G on which the event happened
@@ -53,12 +58,12 @@ const (
 )
 
 // Parse parses, post-processes and verifies the trace.
-func Parse(r io.Reader) ([]*Event, error) {
-	rawEvents, strings, err := readTrace(r)
+func Parse(r io.Reader, bin string) ([]*Event, error) {
+	ver, rawEvents, strings, err := readTrace(r)
 	if err != nil {
 		return nil, err
 	}
-	events, stacks, err := parseEvents(rawEvents, strings)
+	events, stacks, err := parseEvents(ver, rawEvents, strings)
 	if err != nil {
 		return nil, err
 	}
@@ -66,7 +71,7 @@ func Parse(r io.Reader) ([]*Event, error) {
 	if err != nil {
 		return nil, err
 	}
-	err = postProcessTrace(events)
+	err = postProcessTrace(ver, events)
 	if err != nil {
 		return nil, err
 	}
@@ -76,6 +81,11 @@ func Parse(r io.Reader) ([]*Event, error) {
 			ev.Stk = stacks[ev.StkID]
 		}
 	}
+	if ver < 1007 && bin != "" {
+		if err := symbolize(events, bin); err != nil {
+			return nil, err
+		}
+	}
 	return events, nil
 }
 
@@ -88,61 +98,82 @@ type rawEvent struct {
 
 // readTrace does wire-format parsing and verification.
 // It does not care about specific event types and argument meaning.
-func readTrace(r io.Reader) ([]rawEvent, map[uint64]string, error) {
+func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]string, err error) {
 	// Read and validate trace header.
 	var buf [16]byte
-	off, err := r.Read(buf[:])
-	if off != 16 || err != nil {
-		return nil, nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
+	off, err := io.ReadFull(r, buf[:])
+	if err != nil {
+		err = fmt.Errorf("failed to read header: read %v, err %v", off, err)
+		return
 	}
-	if !bytes.Equal(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) {
-		return nil, nil, fmt.Errorf("not a trace file")
+	ver, err = parseHeader(buf[:])
+	if err != nil {
+		return
+	}
+	switch ver {
+	case 1005, 1007:
+		break
+	default:
+		err = fmt.Errorf("unsupported trace file version %v.%v (update Go toolchain) %v", ver/1000, ver%1000, ver)
+		return
 	}
 
 	// Read events.
-	var events []rawEvent
-	strings := make(map[uint64]string)
+	strings = make(map[uint64]string)
 	for {
 		// Read event type and number of arguments (1 byte).
 		off0 := off
-		n, err := r.Read(buf[:1])
+		var n int
+		n, err = r.Read(buf[:1])
 		if err == io.EOF {
+			err = nil
 			break
 		}
 		if err != nil || n != 1 {
-			return nil, nil, fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+			err = fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+			return
 		}
 		off += n
 		typ := buf[0] << 2 >> 2
 		narg := buf[0] >> 6
+		if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
+			err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
+			return
+		}
 		if typ == EvString {
 			// String dictionary entry [ID, length, string].
 			var id uint64
 			id, off, err = readVal(r, off)
 			if err != nil {
-				return nil, nil, err
+				return
 			}
 			if id == 0 {
-				return nil, nil, fmt.Errorf("string at offset %d has invalid id 0", off)
+				err = fmt.Errorf("string at offset %d has invalid id 0", off)
+				return
 			}
 			if strings[id] != "" {
-				return nil, nil, fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+				err = fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+				return
 			}
 			var ln uint64
 			ln, off, err = readVal(r, off)
 			if err != nil {
-				return nil, nil, err
+				return
 			}
 			if ln == 0 {
-				return nil, nil, fmt.Errorf("string at offset %d has invalid length 0", off)
+				err = fmt.Errorf("string at offset %d has invalid length 0", off)
+				return
 			}
 			if ln > 1e6 {
-				return nil, nil, fmt.Errorf("string at offset %d has too large length %v", off, ln)
+				err = fmt.Errorf("string at offset %d has too large length %v", off, ln)
+				return
 			}
 			buf := make([]byte, ln)
-			n, err := io.ReadFull(r, buf)
+			var n int
+			n, err = io.ReadFull(r, buf)
 			if err != nil {
-				return nil, nil, fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+				err = fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+				return
 			}
 			off += n
 			strings[id] = string(buf)
@@ -154,7 +185,8 @@ func readTrace(r io.Reader) ([]rawEvent, map[uint64]string, error) {
 				var v uint64
 				v, off, err = readVal(r, off)
 				if err != nil {
-					return nil, nil, err
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
 				}
 				ev.args = append(ev.args, v)
 			}
@@ -163,39 +195,62 @@ func readTrace(r io.Reader) ([]rawEvent, map[uint64]string, error) {
 			var v uint64
 			v, off, err = readVal(r, off)
 			if err != nil {
-				return nil, nil, err
+				err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+				return
 			}
 			evLen := v
 			off1 := off
 			for evLen > uint64(off-off1) {
 				v, off, err = readVal(r, off)
 				if err != nil {
-					return nil, nil, err
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
 				}
 				ev.args = append(ev.args, v)
 			}
 			if evLen != uint64(off-off1) {
-				return nil, nil, fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+				err = fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+				return
 			}
 		}
 		events = append(events, ev)
 	}
-	return events, strings, nil
+	return
+}
+
+// parseHeader parses trace header of the form "go 1.7 trace\x00\x00\x00\x00"
+// and returns parsed version as 1007.
+func parseHeader(buf []byte) (int, error) {
+	if len(buf) != 16 {
+		return 0, fmt.Errorf("bad header length")
+	}
+	if buf[0] != 'g' || buf[1] != 'o' || buf[2] != ' ' ||
+		buf[3] < '1' || buf[3] > '9' ||
+		buf[4] != '.' ||
+		buf[5] < '1' || buf[5] > '9' {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	ver := int(buf[5] - '0')
+	i := 0
+	for ; buf[6+i] >= '0' && buf[6+i] <= '9' && i < 2; i++ {
+		ver = ver*10 + int(buf[6+i]-'0')
+	}
+	ver += int(buf[3]-'0') * 1000
+	if !bytes.Equal(buf[6+i:], []byte(" trace\x00\x00\x00\x00")[:10-i]) {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	return ver, nil
 }
 
 // Parse events transforms raw events into events.
 // It does analyze and verify per-event-type arguments.
-func parseEvents(rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
+func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
 	var ticksPerSec, lastSeq, lastTs int64
 	var lastG, timerGoid uint64
 	var lastP int
 	lastGs := make(map[int]uint64) // last goroutine running on P
 	stacks = make(map[uint64][]*Frame)
 	for _, raw := range rawEvents {
-		if raw.typ == EvNone || raw.typ >= EvCount {
-			err = fmt.Errorf("unknown event type %v at offset 0x%x", raw.typ, raw.off)
-			return
-		}
 		desc := EventDescriptions[raw.typ]
 		if desc.Name == "" {
 			err = fmt.Errorf("missing description for event type %v", raw.typ)
@@ -246,7 +301,11 @@ func parseEvents(rawEvents []rawEvent, strings map[uint64]string) (events []*Eve
 					raw.off, size)
 				return
 			}
-			if want := 2 + 4*size; uint64(len(raw.args)) != want {
+			want := 2 + 4*size
+			if ver < 1007 {
+				want = 2 + size
+			}
+			if uint64(len(raw.args)) != want {
 				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want %v, got %v",
 					raw.off, want, len(raw.args))
 				return
@@ -255,19 +314,23 @@ func parseEvents(rawEvents []rawEvent, strings map[uint64]string) (events []*Eve
 			if id != 0 && size > 0 {
 				stk := make([]*Frame, size)
 				for i := 0; i < int(size); i++ {
-					pc := raw.args[2+i*4+0]
-					fn := raw.args[2+i*4+1]
-					file := raw.args[2+i*4+2]
-					line := raw.args[2+i*4+3]
-					stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
+					if ver < 1007 {
+						stk[i] = &Frame{PC: raw.args[2+i]}
+					} else {
+						pc := raw.args[2+i*4+0]
+						fn := raw.args[2+i*4+1]
+						file := raw.args[2+i*4+2]
+						line := raw.args[2+i*4+3]
+						stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
+					}
 				}
 				stacks[id] = stk
 			}
 		default:
 			e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
-			e.Seq = lastSeq + int64(raw.args[0])
+			e.seq = lastSeq + int64(raw.args[0])
 			e.Ts = lastTs + int64(raw.args[1])
-			lastSeq = e.Seq
+			lastSeq = e.seq
 			lastTs = e.Ts
 			for i := range desc.Args {
 				e.Args[i] = raw.args[i+2]
@@ -289,7 +352,7 @@ func parseEvents(rawEvents []rawEvent, strings map[uint64]string) (events []*Eve
 			case EvGoSysExit:
 				// EvGoSysExit emission is delayed until the thread has a P.
 				// Give it the real sequence number and time stamp.
-				e.Seq = int64(e.Args[1])
+				e.seq = int64(e.Args[1])
 				if e.Args[2] != 0 {
 					e.Ts = int64(e.Args[2])
 				}
@@ -387,7 +450,7 @@ var ErrTimeOrder = fmt.Errorf("time stamps out of order")
 // The resulting trace is guaranteed to be consistent
 // (for example, a P does not run two Gs at the same time, or a G is indeed
 // blocked before an unblock event).
-func postProcessTrace(events []*Event) error {
+func postProcessTrace(ver int, events []*Event) error {
 	const (
 		gDead = iota
 		gRunnable
@@ -510,7 +573,12 @@ func postProcessTrace(events []*Event) error {
 			g.evStart = ev
 			p.g = ev.G
 			if g.evCreate != nil {
-				ev.StkID = g.evCreate.Args[1]
+				if ver < 1007 {
+					// +1 because symbolizer expects return pc.
+					ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
+				} else {
+					ev.StkID = g.evCreate.Args[1]
+				}
 				g.evCreate = nil
 			}
 
@@ -611,6 +679,79 @@ func postProcessTrace(events []*Event) error {
 	return nil
 }
 
+// symbolize attaches func/file/line info to stack traces.
+func symbolize(events []*Event, bin string) error {
+	// First, collect and dedup all pcs.
+	pcs := make(map[uint64]*Frame)
+	for _, ev := range events {
+		for _, f := range ev.Stk {
+			pcs[f.PC] = nil
+		}
+	}
+
+	// Start addr2line.
+	cmd := exec.Command("go", "tool", "addr2line", bin)
+	in, err := cmd.StdinPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdin: %v", err)
+	}
+	cmd.Stderr = os.Stderr
+	out, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdout: %v", err)
+	}
+	err = cmd.Start()
+	if err != nil {
+		return fmt.Errorf("failed to start addr2line: %v", err)
+	}
+	outb := bufio.NewReader(out)
+
+	// Write all pcs to addr2line.
+	// Need to copy pcs to an array, because map iteration order is non-deterministic.
+	var pcArray []uint64
+	for pc := range pcs {
+		pcArray = append(pcArray, pc)
+		_, err := fmt.Fprintf(in, "0x%x\n", pc-1)
+		if err != nil {
+			return fmt.Errorf("failed to write to addr2line: %v", err)
+		}
+	}
+	in.Close()
+
+	// Read in answers.
+	for _, pc := range pcArray {
+		fn, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		file, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		f := &Frame{PC: pc}
+		f.Fn = fn[:len(fn)-1]
+		f.File = file[:len(file)-1]
+		if colon := strings.LastIndex(f.File, ":"); colon != -1 {
+			ln, err := strconv.Atoi(f.File[colon+1:])
+			if err == nil {
+				f.File = f.File[:colon]
+				f.Line = ln
+			}
+		}
+		pcs[pc] = f
+	}
+	cmd.Wait()
+
+	// Replace frames in events array.
+	for _, ev := range events {
+		for i, f := range ev.Stk {
+			ev.Stk[i] = pcs[f.PC]
+		}
+	}
+
+	return nil
+}
+
 // readVal reads unsigned base-128 value from r.
 func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
 	off = off0
@@ -637,7 +778,7 @@ func (l eventList) Len() int {
 }
 
 func (l eventList) Less(i, j int) bool {
-	return l[i].Seq < l[j].Seq
+	return l[i].seq < l[j].seq
 }
 
 func (l eventList) Swap(i, j int) {
@@ -701,46 +842,47 @@ const (
 )
 
 var EventDescriptions = [EvCount]struct {
-	Name  string
-	Stack bool
-	Args  []string
+	Name       string
+	minVersion int
+	Stack      bool
+	Args       []string
 }{
-	EvNone:           {"None", false, []string{}},
-	EvBatch:          {"Batch", false, []string{"p", "seq", "ticks"}},
-	EvFrequency:      {"Frequency", false, []string{"freq", "unused"}},
-	EvStack:          {"Stack", false, []string{"id", "siz"}},
-	EvGomaxprocs:     {"Gomaxprocs", true, []string{"procs"}},
-	EvProcStart:      {"ProcStart", false, []string{"thread"}},
-	EvProcStop:       {"ProcStop", false, []string{}},
-	EvGCStart:        {"GCStart", true, []string{}},
-	EvGCDone:         {"GCDone", false, []string{}},
-	EvGCScanStart:    {"GCScanStart", false, []string{}},
-	EvGCScanDone:     {"GCScanDone", false, []string{}},
-	EvGCSweepStart:   {"GCSweepStart", true, []string{}},
-	EvGCSweepDone:    {"GCSweepDone", false, []string{}},
-	EvGoCreate:       {"GoCreate", true, []string{"g", "stack"}},
-	EvGoStart:        {"GoStart", false, []string{"g"}},
-	EvGoEnd:          {"GoEnd", false, []string{}},
-	EvGoStop:         {"GoStop", true, []string{}},
-	EvGoSched:        {"GoSched", true, []string{}},
-	EvGoPreempt:      {"GoPreempt", true, []string{}},
-	EvGoSleep:        {"GoSleep", true, []string{}},
-	EvGoBlock:        {"GoBlock", true, []string{}},
-	EvGoUnblock:      {"GoUnblock", true, []string{"g"}},
-	EvGoBlockSend:    {"GoBlockSend", true, []string{}},
-	EvGoBlockRecv:    {"GoBlockRecv", true, []string{}},
-	EvGoBlockSelect:  {"GoBlockSelect", true, []string{}},
-	EvGoBlockSync:    {"GoBlockSync", true, []string{}},
-	EvGoBlockCond:    {"GoBlockCond", true, []string{}},
-	EvGoBlockNet:     {"GoBlockNet", true, []string{}},
-	EvGoSysCall:      {"GoSysCall", true, []string{}},
-	EvGoSysExit:      {"GoSysExit", false, []string{"g", "seq", "ts"}},
-	EvGoSysBlock:     {"GoSysBlock", false, []string{}},
-	EvGoWaiting:      {"GoWaiting", false, []string{"g"}},
-	EvGoInSyscall:    {"GoInSyscall", false, []string{"g"}},
-	EvHeapAlloc:      {"HeapAlloc", false, []string{"mem"}},
-	EvNextGC:         {"NextGC", false, []string{"mem"}},
-	EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}},
-	EvFutileWakeup:   {"FutileWakeup", false, []string{}},
-	EvString:         {"String", false, []string{}},
+	EvNone:           {"None", 1005, false, []string{}},
+	EvBatch:          {"Batch", 1005, false, []string{"p", "seq", "ticks"}},
+	EvFrequency:      {"Frequency", 1005, false, []string{"freq", "unused"}},
+	EvStack:          {"Stack", 1005, false, []string{"id", "siz"}},
+	EvGomaxprocs:     {"Gomaxprocs", 1005, true, []string{"procs"}},
+	EvProcStart:      {"ProcStart", 1005, false, []string{"thread"}},
+	EvProcStop:       {"ProcStop", 1005, false, []string{}},
+	EvGCStart:        {"GCStart", 1005, true, []string{}},
+	EvGCDone:         {"GCDone", 1005, false, []string{}},
+	EvGCScanStart:    {"GCScanStart", 1005, false, []string{}},
+	EvGCScanDone:     {"GCScanDone", 1005, false, []string{}},
+	EvGCSweepStart:   {"GCSweepStart", 1005, true, []string{}},
+	EvGCSweepDone:    {"GCSweepDone", 1005, false, []string{}},
+	EvGoCreate:       {"GoCreate", 1005, true, []string{"g", "stack"}},
+	EvGoStart:        {"GoStart", 1005, false, []string{"g"}},
+	EvGoEnd:          {"GoEnd", 1005, false, []string{}},
+	EvGoStop:         {"GoStop", 1005, true, []string{}},
+	EvGoSched:        {"GoSched", 1005, true, []string{}},
+	EvGoPreempt:      {"GoPreempt", 1005, true, []string{}},
+	EvGoSleep:        {"GoSleep", 1005, true, []string{}},
+	EvGoBlock:        {"GoBlock", 1005, true, []string{}},
+	EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g"}},
+	EvGoBlockSend:    {"GoBlockSend", 1005, true, []string{}},
+	EvGoBlockRecv:    {"GoBlockRecv", 1005, true, []string{}},
+	EvGoBlockSelect:  {"GoBlockSelect", 1005, true, []string{}},
+	EvGoBlockSync:    {"GoBlockSync", 1005, true, []string{}},
+	EvGoBlockCond:    {"GoBlockCond", 1005, true, []string{}},
+	EvGoBlockNet:     {"GoBlockNet", 1005, true, []string{}},
+	EvGoSysCall:      {"GoSysCall", 1005, true, []string{}},
+	EvGoSysExit:      {"GoSysExit", 1005, false, []string{"g", "seq", "ts"}},
+	EvGoSysBlock:     {"GoSysBlock", 1005, false, []string{}},
+	EvGoWaiting:      {"GoWaiting", 1005, false, []string{"g"}},
+	EvGoInSyscall:    {"GoInSyscall", 1005, false, []string{"g"}},
+	EvHeapAlloc:      {"HeapAlloc", 1005, false, []string{"mem"}},
+	EvNextGC:         {"NextGC", 1005, false, []string{"mem"}},
+	EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g", "unused"}},
+	EvFutileWakeup:   {"FutileWakeup", 1005, false, []string{}},
+	EvString:         {"String", 1007, false, []string{}},
 }
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
index fecefc4053..db8d2a30ce 100644
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -5,6 +5,9 @@
 package trace
 
 import (
+	"bytes"
+	"io/ioutil"
+	"path/filepath"
 	"strings"
 	"testing"
 )
@@ -22,9 +25,63 @@ func TestCorruptedInputs(t *testing.T) {
 		"go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
 	}
 	for _, data := range tests {
-		events, err := Parse(strings.NewReader(data))
+		events, err := Parse(strings.NewReader(data), "")
 		if err == nil || events != nil {
-			t.Fatalf("no error on input: %q\n", data)
+			t.Fatalf("no error on input: %q", data)
+		}
+	}
+}
+
+func TestParseCanned(t *testing.T) {
+	files, err := ioutil.ReadDir("./testdata")
+	if err != nil {
+		t.Fatalf("failed to read ./testdata: %v", err)
+	}
+	for _, f := range files {
+		data, err := ioutil.ReadFile(filepath.Join("./testdata", f.Name()))
+		if err != nil {
+			t.Fatalf("failed to read input file: %v", err)
+		}
+		_, err = Parse(bytes.NewReader(data), "")
+		switch {
+		case strings.HasSuffix(f.Name(), "_good"):
+			if err != nil {
+				t.Errorf("failed to parse good trace %v: %v", f.Name(), err)
+			}
+		case strings.HasSuffix(f.Name(), "_unordered"):
+			if err != ErrTimeOrder {
+				t.Errorf("unordered trace is not detected %v: %v", f.Name(), err)
+			}
+		default:
+			t.Errorf("unknown input file suffix: %v", f.Name())
+		}
+	}
+}
+
+func TestParseVersion(t *testing.T) {
+	tests := map[string]int{
+		"go 1.5 trace\x00\x00\x00\x00": 1005,
+		"go 1.7 trace\x00\x00\x00\x00": 1007,
+		"go 1.10 trace\x00\x00\x00":    1010,
+		"go 1.25 trace\x00\x00\x00":    1025,
+		"go 1.234 trace\x00\x00":       1234,
+		"go 1.2345 trace\x00":          -1,
+		"go 0.0 trace\x00\x00\x00\x00": -1,
+		"go a.b trace\x00\x00\x00\x00": -1,
+	}
+	for header, ver := range tests {
+		ver1, err := parseHeader([]byte(header))
+		if ver == -1 {
+			if err == nil {
+				t.Fatalf("no error on input: %q, version %v", header, ver1)
+			}
+		} else {
+			if err != nil {
+				t.Fatalf("failed to parse: %q (%v)", header, err)
+			}
+			if ver != ver1 {
+				t.Fatalf("wrong version: %v, want %v, input: %q", ver1, ver, header)
+			}
 		}
 	}
 }
diff --git a/src/internal/trace/testdata/http_1_5_good b/src/internal/trace/testdata/http_1_5_good
new file mode 100644
index 0000000000..0736cae674
Binary files /dev/null and b/src/internal/trace/testdata/http_1_5_good differ
diff --git a/src/internal/trace/testdata/stress_1_5_good b/src/internal/trace/testdata/stress_1_5_good
new file mode 100644
index 0000000000..c5055ebd19
Binary files /dev/null and b/src/internal/trace/testdata/stress_1_5_good differ
diff --git a/src/internal/trace/testdata/stress_1_5_unordered b/src/internal/trace/testdata/stress_1_5_unordered
new file mode 100644
index 0000000000..11f7d745ca
Binary files /dev/null and b/src/internal/trace/testdata/stress_1_5_unordered differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_5_good b/src/internal/trace/testdata/stress_start_stop_1_5_good
new file mode 100644
index 0000000000..72a887b844
Binary files /dev/null and b/src/internal/trace/testdata/stress_start_stop_1_5_good differ
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index f54e5e0a7e..dcf534549a 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -356,7 +356,7 @@ func ReadTrace() []byte {
 		trace.headerWritten = true
 		trace.lockOwner = nil
 		unlock(&trace.lock)
-		return []byte("go 1.5 trace\x00\x00\x00\x00")
+		return []byte("go 1.7 trace\x00\x00\x00\x00")
 	}
 	// Wait for new data.
 	if trace.fullHead == 0 && !trace.shutdown {
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index b787a2fc27..d10e928a66 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -52,7 +52,7 @@ func TestTrace(t *testing.T) {
 		t.Fatalf("failed to start tracing: %v", err)
 	}
 	Stop()
-	_, err := trace.Parse(buf)
+	_, err := trace.Parse(buf, "")
 	if err == trace.ErrTimeOrder {
 		t.Skipf("skipping trace: %v", err)
 	}
@@ -62,7 +62,7 @@ func TestTrace(t *testing.T) {
 }
 
 func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
-	events, err := trace.Parse(r)
+	events, err := trace.Parse(r, "")
 	if err == trace.ErrTimeOrder {
 		t.Skipf("skipping trace: %v", err)
 	}
-- 
cgit v1.3


From 7cbe7b1e867db9001db35ca41ee3e4a3b0de31c7 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Fri, 18 Mar 2016 19:13:59 -0400
Subject: runtime/internal/atomic: add s390x atomic operations

Load and store instructions are atomic on the s390x.

Change-Id: I0031ed2fba43f33863bca114d0fdec2e7d1ce807
Reviewed-on: https://go-review.googlesource.com/20938
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/internal/atomic/asm_s390x.s     | 174 ++++++++++++++++++++++++++++
 src/runtime/internal/atomic/atomic_s390x.go |  73 ++++++++++++
 2 files changed, 247 insertions(+)
 create mode 100644 src/runtime/internal/atomic/asm_s390x.s
 create mode 100644 src/runtime/internal/atomic/atomic_s390x.go

(limited to 'src/runtime')

diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s
new file mode 100644
index 0000000000..c84718cb8f
--- /dev/null
+++ b/src/runtime/internal/atomic/asm_s390x.s
@@ -0,0 +1,174 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Cas(ptr *uint32, old, new uint32) bool
+// Atomically:
+//	if *ptr == old {
+//		*val = new
+//		return 1
+//	} else {
+//		return 0
+//	}
+TEXT ·Cas(SB), NOSPLIT, $0-17
+	MOVD	ptr+0(FP), R3
+	MOVWZ	old+8(FP), R4
+	MOVWZ	new+12(FP), R5
+	CS	R4, R5, 0(R3)    //  if (R4 == 0(R3)) then 0(R3)= R5
+	BNE	cas_fail
+	MOVB	$1, ret+16(FP)
+	RET
+cas_fail:
+	MOVB	$0, ret+16(FP)
+	RET
+
+// func Cas64(ptr *uint64, old, new uint64) bool
+// Atomically:
+//	if *ptr == old {
+//		*ptr = new
+//		return 1
+//	} else {
+//		return 0
+//	}
+TEXT ·Cas64(SB), NOSPLIT, $0-25
+	MOVD	ptr+0(FP), R3
+	MOVD	old+8(FP), R4
+	MOVD	new+16(FP), R5
+	CSG	R4, R5, 0(R3)    //  if (R4 == 0(R3)) then 0(R3)= R5
+	BNE	cas64_fail
+	MOVB	$1, ret+24(FP)
+	RET
+cas64_fail:
+	MOVB	$0, ret+24(FP)
+	RET
+
+// func Casuintptr(ptr *uintptr, old, new uintptr) bool
+TEXT ·Casuintptr(SB), NOSPLIT, $0-25
+	BR	·Cas64(SB)
+
+// func Loaduintptr(ptr *uintptr) uintptr
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-16
+	BR	·Load64(SB)
+
+// func Loaduint(ptr *uint) uint
+TEXT ·Loaduint(SB), NOSPLIT, $0-16
+	BR	·Load64(SB)
+
+// func Storeuintptr(ptr *uintptr, new uintptr)
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
+	BR	·Store64(SB)
+
+// func Loadint64(ptr *int64) int64
+TEXT ·Loadint64(SB), NOSPLIT, $0-16
+	BR	·Load64(SB)
+
+// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
+	BR	·Xadd64(SB)
+
+// func Xaddint64(ptr *int64, delta int64) int64
+TEXT ·Xaddint64(SB), NOSPLIT, $0-16
+	BR	·Xadd64(SB)
+
+// func Casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+// Atomically:
+//	if *ptr == old {
+//		*ptr = new
+//		return 1
+//	} else {
+//		return 0
+//	}
+TEXT ·Casp1(SB), NOSPLIT, $0-25
+	BR ·Cas64(SB)
+
+// func Xadd(ptr *uint32, delta int32) uint32
+// Atomically:
+//	*ptr += delta
+//	return *ptr
+TEXT ·Xadd(SB), NOSPLIT, $0-20
+	MOVD	ptr+0(FP), R4
+	MOVW	delta+8(FP), R5
+	MOVW	(R4), R3
+repeat:
+	ADD	R5, R3, R6
+	CS	R3, R6, (R4) // if R3==(R4) then (R4)=R6 else R3=(R4)
+	BNE	repeat
+	MOVW	R6, ret+16(FP)
+	RET
+
+// func Xadd64(ptr *uint64, delta int64) uint64
+TEXT ·Xadd64(SB), NOSPLIT, $0-24
+	MOVD	ptr+0(FP), R4
+	MOVD	delta+8(FP), R5
+	MOVD	(R4), R3
+repeat:
+	ADD	R5, R3, R6
+	CSG	R3, R6, (R4) // if R3==(R4) then (R4)=R6 else R3=(R4)
+	BNE	repeat
+	MOVD	R6, ret+16(FP)
+	RET
+
+// func Xchg(ptr *uint32, new uint32) uint32
+TEXT ·Xchg(SB), NOSPLIT, $0-20
+	MOVD	ptr+0(FP), R4
+	MOVW	new+8(FP), R3
+	MOVW	(R4), R6
+repeat:
+	CS	R6, R3, (R4) // if R6==(R4) then (R4)=R3 else R6=(R4)
+	BNE	repeat
+	MOVW	R6, ret+16(FP)
+	RET
+
+// func Xchg64(ptr *uint64, new uint64) uint64
+TEXT ·Xchg64(SB), NOSPLIT, $0-24
+	MOVD	ptr+0(FP), R4
+	MOVD	new+8(FP), R3
+	MOVD	(R4), R6
+repeat:
+	CSG	R6, R3, (R4) // if R6==(R4) then (R4)=R3 else R6=(R4)
+	BNE	repeat
+	MOVD	R6, ret+16(FP)
+	RET
+
+// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
+	BR	·Xchg64(SB)
+
+// func Or8(addr *uint8, v uint8)
+TEXT ·Or8(SB), NOSPLIT, $0-9
+	MOVD    ptr+0(FP), R3
+	MOVBZ   val+8(FP), R4
+	// Calculate shift.
+	AND	$3, R3, R5
+	XOR	$3, R5 // big endian - flip direction
+	SLD	$3, R5 // MUL $8, R5
+	SLD	R5, R4
+	// Align ptr down to 4 bytes so we can use 32-bit load/store.
+	AND	$-4, R3
+	MOVWZ	0(R3), R6
+again:
+	OR	R4, R6, R7
+	CS	R6, R7, 0(R3) // if R6==(R3) then (R3)=R7 else R6=(R3)
+	BNE	again
+	RET
+
+// func And8(addr *uint8, v uint8)
+TEXT ·And8(SB), NOSPLIT, $0-9
+	MOVD    ptr+0(FP), R3
+	MOVBZ   val+8(FP), R4
+	// Calculate shift.
+	AND	$3, R3, R5
+	XOR	$3, R5 // big endian - flip direction
+	SLD	$3, R5 // MUL $8, R5
+	OR	$-256, R4 // create 0xffffffffffffffxx
+	RLLG	R5, R4
+	// Align ptr down to 4 bytes so we can use 32-bit load/store.
+	AND	$-4, R3
+	MOVWZ	0(R3), R6
+again:
+	AND	R4, R6, R7
+	CS	R6, R7, 0(R3) // if R6==(R3) then (R3)=R7 else R6=(R3)
+	BNE	again
+	RET
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
new file mode 100644
index 0000000000..f31f1af444
--- /dev/null
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -0,0 +1,73 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+//go:nosplit
+//go:noinline
+func Load(ptr *uint32) uint32 {
+	return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
+	return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+//go:noinline
+func Load64(ptr *uint64) uint64 {
+	return *ptr
+}
+
+//go:noinline
+//go:nosplit
+func Store(ptr *uint32, val uint32) {
+	*ptr = val
+}
+
+//go:noinline
+//go:nosplit
+func Store64(ptr *uint64, val uint64) {
+	*ptr = val
+}
+
+// NO go:noescape annotation; see atomic_pointer.go.
+//go:noinline
+//go:nosplit
+func Storep1(ptr unsafe.Pointer, val unsafe.Pointer) {
+	*(*unsafe.Pointer)(ptr) = val
+}
+
+//go:noescape
+func And8(ptr *uint8, val uint8)
+
+//go:noescape
+func Or8(ptr *uint8, val uint8)
+
+// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+
+//go:noescape
+func Xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
+
+//go:noescape
+func Xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func Xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func Cas64(ptr *uint64, old, new uint64) bool
-- 
cgit v1.3


From 78ecd61f6245197f701629f5f511be7f2bc1ff58 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Fri, 18 Mar 2016 19:20:34 -0400
Subject: runtime/cgo: add s390x support

Change-Id: I64ada9fe34c3cfc4bd514ec5d8c8f4d4c99074fb
Reviewed-on: https://go-review.googlesource.com/20950
Reviewed-by: Bill O'Farrell <billotosyr@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/cgo/asm_s390x.s       | 44 +++++++++++++++++++++++++
 src/runtime/cgo/gcc_linux_s390x.c | 68 +++++++++++++++++++++++++++++++++++++++
 src/runtime/cgo/gcc_s390x.S       | 43 +++++++++++++++++++++++++
 3 files changed, 155 insertions(+)
 create mode 100644 src/runtime/cgo/asm_s390x.s
 create mode 100644 src/runtime/cgo/gcc_linux_s390x.c
 create mode 100644 src/runtime/cgo/gcc_s390x.S

(limited to 'src/runtime')

diff --git a/src/runtime/cgo/asm_s390x.s b/src/runtime/cgo/asm_s390x.s
new file mode 100644
index 0000000000..5ed13cfe1e
--- /dev/null
+++ b/src/runtime/cgo/asm_s390x.s
@@ -0,0 +1,44 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ * crosscall2 obeys the C ABI; fn obeys the Go ABI.
+ */
+TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
+	// Start with standard C stack frame layout and linkage
+
+	// Save R6-R15, F0, F2, F4 and F6 in the
+	// register save area of the calling function
+	STMG	R6, R15, 48(R15)
+	FMOVD	F0, 128(R15)
+	FMOVD	F2, 136(R15)
+	FMOVD	F4, 144(R15)
+	FMOVD	F6, 152(R15)
+
+	// Initialize Go ABI environment
+	XOR	R0, R0
+	BL	runtime·load_g(SB)
+
+	// Allocate 24 bytes on the stack
+	SUB	$24, R15
+
+	MOVD	R3, 8(R15)  // arg1
+	MOVW	R4, 16(R15) // arg2
+	BL	(R2)        // fn(arg1, arg2)
+
+	ADD	$24, R15
+
+	// Restore R6-R15, F0, F2, F4 and F6
+	LMG	48(R15), R6, R15
+	FMOVD	F0, 128(R15)
+	FMOVD	F2, 136(R15)
+	FMOVD	F4, 144(R15)
+	FMOVD	F6, 152(R15)
+
+	RET
+
diff --git a/src/runtime/cgo/gcc_linux_s390x.c b/src/runtime/cgo/gcc_linux_s390x.c
new file mode 100644
index 0000000000..81e3b339b0
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_s390x.c
@@ -0,0 +1,68 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void *threadentry(void*);
+
+void (*x_cgo_inittls)(void **tlsg, void **tlsbase);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*), void **tlsbase)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stacklo = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	// Leave stacklo=0 and set stackhi=size; mstack will do the rest.
+	ts->g->stackhi = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+extern void crosscall_s390x(void (*fn)(void), void *g);
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	// Save g for this thread in C TLS
+	setg_gcc((void*)ts.g);
+
+	crosscall_s390x(ts.fn, (void*)ts.g);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_s390x.S b/src/runtime/cgo/gcc_s390x.S
new file mode 100644
index 0000000000..6b163d0d21
--- /dev/null
+++ b/src/runtime/cgo/gcc_s390x.S
@@ -0,0 +1,43 @@
+// Copyright 2016 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * void crosscall_s390x(void (*fn)(void), void *g)
+ *
+ * Calling into the go tool chain, where all registers are caller save.
+ * Called from standard s390x C ABI, where r6-r13, r15, and f0, f2, f4 and f6 are
+ * callee-save, so they must be saved explicitly.
+ */
+.globl crosscall_s390x
+crosscall_s390x:
+	/*
+	 * save r6-r15, f0, f2, f4 and f6 in the
+	 * register save area of the calling function
+	 */
+	stmg	%r6, %r15, 48(%r15)
+	stdy	%f0, 128(%r15)
+	stdy	%f2, 136(%r15)
+	stdy	%f4, 144(%r15)
+	stdy	%f6, 152(%r15)
+
+	/* assumes this call does not clobber r2 or r15 */
+	xgr	%r0, %r0
+
+	/* grow stack 8 bytes and call fn */
+	agfi    %r15, -8
+	basr    %r14, %r2
+	agfi	%r15, 8
+
+	/* restore registers */
+	lmg	%r6, %r15, 48(%r15)
+	ldy	%f0, 128(%r15)
+	ldy	%f2, 136(%r15)
+	ldy	%f4, 144(%r15)
+	ldy	%f6, 152(%r15)
+
+	br      %r14 /* restored by lmg */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
-- 
cgit v1.3


From f028b9f9e2433662502283850d06e9e07e72a6bb Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Sun, 27 Mar 2016 10:21:48 -0400
Subject: cmd/link, etc: store typelinks as offsets

This is the first in a series of CLs to replace the use of pointers
in binary read-only data with offsets.

In standard Go binaries these CLs have a small effect, shrinking
8-byte pointers to 4-bytes. In position-independent code, it also
saves the dynamic relocation for the pointer. This has a significant
effect on the binary size when building as PIE, c-archive, or
c-shared.

darwin/amd64:
	cmd/go: -12KB (0.1%)
	jujud:  -82KB (0.1%)

linux/amd64 PIE:
	cmd/go:  -86KB (0.7%)
	jujud:  -569KB (0.7%)

For #6853.

Change-Id: Iad5625bbeba58dabfd4d334dbee3fcbfe04b2dcf
Reviewed-on: https://go-review.googlesource.com/21284
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/compile/internal/gc/go.go      |  2 --
 src/cmd/compile/internal/gc/main.go    |  4 ---
 src/cmd/compile/internal/gc/obj.go     |  6 +++++
 src/cmd/compile/internal/gc/reflect.go | 17 +++++-------
 src/cmd/internal/obj/data.go           | 21 ++++++++++++++-
 src/cmd/internal/obj/link.go           |  3 +++
 src/cmd/link/internal/ld/data.go       | 18 +++++++++++++
 src/cmd/link/internal/ld/symtab.go     |  4 +++
 src/reflect/export_test.go             |  8 +++---
 src/reflect/type.go                    | 47 ++++++++++++++++++++++------------
 src/runtime/runtime1.go                |  8 +++---
 src/runtime/symtab.go                  |  3 ++-
 12 files changed, 99 insertions(+), 42 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index d9b28ff8e6..5df49b56d6 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -171,8 +171,6 @@ var msanpkg *Pkg // package runtime/msan
 
 var typepkg *Pkg // fake package for runtime type info (headers)
 
-var typelinkpkg *Pkg // fake package for runtime type info (data)
-
 var unsafepkg *Pkg // package unsafe
 
 var trackpkg *Pkg // fake package for field tracking
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 26acf8861f..45a510d577 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -126,10 +126,6 @@ func Main() {
 	itabpkg.Name = "go.itab"
 	itabpkg.Prefix = "go.itab" // not go%2eitab
 
-	typelinkpkg = mkpkg("go.typelink")
-	typelinkpkg.Name = "go.typelink"
-	typelinkpkg.Prefix = "go.typelink" // not go%2etypelink
-
 	itablinkpkg = mkpkg("go.itablink")
 	itablinkpkg.Name = "go.itablink"
 	itablinkpkg.Prefix = "go.itablink" // not go%2eitablink
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index 23c8be645c..eed0ed6e24 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -321,6 +321,12 @@ func dsymptrLSym(s *obj.LSym, off int, x *obj.LSym, xoff int) int {
 	return off
 }
 
+func dsymptrOffLSym(s *obj.LSym, off int, x *obj.LSym, xoff int) int {
+	s.WriteOff(Ctxt, int64(off), x, int64(xoff))
+	off += 4
+	return off
+}
+
 func gdata(nam *Node, nr *Node, wid int) {
 	if nam.Op != ONAME {
 		Fatalf("gdata nam op %v", opnames[nam.Op])
diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index df9ef27b7a..ea67634260 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -879,7 +879,7 @@ func tracksym(t *Type, f *Field) *Sym {
 	return Pkglookup(Tconv(t, FmtLeft)+"."+f.Sym.Name, trackpkg)
 }
 
-func typelinksym(t *Type) *Sym {
+func typelinkLSym(t *Type) *obj.LSym {
 	// %-uT is what the generated Type's string field says.
 	// It uses (ambiguous) package names instead of import paths.
 	// %-T is the complete, unambiguous type name.
@@ -889,13 +889,8 @@ func typelinksym(t *Type) *Sym {
 	// ensure the types appear sorted by their string field. The
 	// names are a little long but they are discarded by the linker
 	// and do not end up in the symbol table of the final binary.
-	p := Tconv(t, FmtLeft|FmtUnsigned) + "\t" + Tconv(t, FmtLeft)
-
-	s := Pkglookup(p, typelinkpkg)
-
-	//print("typelinksym: %s -> %+S\n", p, s);
-
-	return s
+	name := "go.typelink." + Tconv(t, FmtLeft|FmtUnsigned) + "\t" + Tconv(t, FmtLeft)
+	return obj.Linklookup(Ctxt, name, 0)
 }
 
 func typesymprefix(prefix string, t *Type) *Sym {
@@ -1298,9 +1293,9 @@ ok:
 	if t.Sym == nil {
 		switch t.Etype {
 		case TPTR32, TPTR64, TARRAY, TCHAN, TFUNC, TMAP, TSTRUCT:
-			slink := typelinksym(t)
-			dsymptr(slink, 0, s, 0)
-			ggloblsym(slink, int32(Widthptr), int16(dupok|obj.RODATA))
+			slink := typelinkLSym(t)
+			dsymptrOffLSym(slink, 0, Linksym(s), 0)
+			ggloblLSym(slink, 4, int16(dupok|obj.RODATA))
 		}
 	}
 
diff --git a/src/cmd/internal/obj/data.go b/src/cmd/internal/obj/data.go
index 37ab70bb0e..546ff37269 100644
--- a/src/cmd/internal/obj/data.go
+++ b/src/cmd/internal/obj/data.go
@@ -111,17 +111,36 @@ func (s *LSym) WriteInt(ctxt *Link, off int64, siz int, i int64) {
 // rsym and roff specify the relocation for the address.
 func (s *LSym) WriteAddr(ctxt *Link, off int64, siz int, rsym *LSym, roff int64) {
 	if siz != ctxt.Arch.PtrSize {
-		ctxt.Diag("WriteAddr: bad address size: %d", siz)
+		ctxt.Diag("WriteAddr: bad address size %d in %s", siz, s.Name)
 	}
 	s.prepwrite(ctxt, off, siz)
 	r := Addrel(s)
 	r.Off = int32(off)
+	if int64(r.Off) != off {
+		ctxt.Diag("WriteAddr: off overflow %d in %s", off, s.Name)
+	}
 	r.Siz = uint8(siz)
 	r.Sym = rsym
 	r.Type = R_ADDR
 	r.Add = roff
 }
 
+// WriteOff writes a 4 byte offset to rsym+roff into s at offset off.
+// After linking the 4 bytes stored at s+off will be
+// rsym+roff-(start of section that s is in).
+func (s *LSym) WriteOff(ctxt *Link, off int64, rsym *LSym, roff int64) {
+	s.prepwrite(ctxt, off, 4)
+	r := Addrel(s)
+	r.Off = int32(off)
+	if int64(r.Off) != off {
+		ctxt.Diag("WriteOff: off overflow %d in %s", off, s.Name)
+	}
+	r.Siz = 4
+	r.Sym = rsym
+	r.Type = R_ADDROFF
+	r.Add = roff
+}
+
 // WriteString writes a string of size siz into s at offset off.
 func (s *LSym) WriteString(ctxt *Link, off int64, siz int, str string) {
 	if siz < len(str) {
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index 62175f9ed8..d44d4398b1 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -457,6 +457,9 @@ const (
 	// R_ADDRMIPS (only used on mips64) resolves to a 32-bit external address,
 	// by loading the address into a register with two instructions (lui, ori).
 	R_ADDRMIPS
+	// R_ADDROFF resolves to an offset from the beginning of the section holding
+	// the data being relocated to the referenced symbol.
+	R_ADDROFF
 	R_SIZE
 	R_CALL
 	R_CALLARM
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index ae7c287f59..cf51b0a908 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -525,6 +525,9 @@ func relocsym(s *LSym) {
 			}
 			o = Symaddr(r.Sym) + r.Add - int64(r.Sym.Sect.Vaddr)
 
+		case obj.R_ADDROFF:
+			o = Symaddr(r.Sym) - int64(r.Sym.Sect.Vaddr) + r.Add
+
 			// r->sym can be null when CALL $(constant) is transformed from absolute PC to relative PC call.
 		case obj.R_CALL, obj.R_GOTPCREL, obj.R_PCREL:
 			if Linkmode == LinkExternal && r.Sym != nil && r.Sym.Type != obj.SCONST && (r.Sym.Sect != Ctxt.Cursym.Sect || r.Type == obj.R_GOTPCREL) {
@@ -1599,6 +1602,10 @@ func dodata() {
 	sect.Vaddr = 0
 	Linklookup(Ctxt, "runtime.rodata", 0).Sect = sect
 	Linklookup(Ctxt, "runtime.erodata", 0).Sect = sect
+	if !UseRelro() {
+		Linklookup(Ctxt, "runtime.types", 0).Sect = sect
+		Linklookup(Ctxt, "runtime.etypes", 0).Sect = sect
+	}
 	for ; s != nil && s.Type < obj.STYPERELRO; s = s.Next {
 		datsize = aligndatsize(datsize, s)
 		s.Sect = sect
@@ -1631,6 +1638,8 @@ func dodata() {
 		sect.Align = maxalign(s, obj.STYPELINK-1)
 		datsize = Rnd(datsize, int64(sect.Align))
 		sect.Vaddr = 0
+		Linklookup(Ctxt, "runtime.types", 0).Sect = sect
+		Linklookup(Ctxt, "runtime.etypes", 0).Sect = sect
 		for ; s != nil && s.Type < obj.STYPELINK; s = s.Next {
 			datsize = aligndatsize(datsize, s)
 			if s.Outer != nil && s.Outer.Sect != nil && s.Outer.Sect != sect {
@@ -1970,10 +1979,12 @@ func address() {
 	} else {
 		rodata = text.Next
 	}
+	var relrodata *Section
 	typelink := rodata.Next
 	if UseRelro() {
 		// There is another section (.data.rel.ro) when building a shared
 		// object on elf systems.
+		relrodata = typelink
 		typelink = typelink.Next
 	}
 	itablink := typelink.Next
@@ -2007,6 +2018,11 @@ func address() {
 		s.Value = int64(sectSym.Sect.Vaddr + 16)
 	}
 
+	types := relrodata
+	if types == nil {
+		types = rodata
+	}
+
 	xdefine("runtime.text", obj.STEXT, int64(text.Vaddr))
 	xdefine("runtime.etext", obj.STEXT, int64(text.Vaddr+text.Length))
 	if HEADTYPE == obj.Hwindows {
@@ -2014,6 +2030,8 @@ func address() {
 	}
 	xdefine("runtime.rodata", obj.SRODATA, int64(rodata.Vaddr))
 	xdefine("runtime.erodata", obj.SRODATA, int64(rodata.Vaddr+rodata.Length))
+	xdefine("runtime.types", obj.SRODATA, int64(types.Vaddr))
+	xdefine("runtime.etypes", obj.SRODATA, int64(types.Vaddr+types.Length))
 	xdefine("runtime.typelink", obj.SRODATA, int64(typelink.Vaddr))
 	xdefine("runtime.etypelink", obj.SRODATA, int64(typelink.Vaddr+typelink.Length))
 	xdefine("runtime.itablink", obj.SRODATA, int64(itablink.Vaddr))
diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go
index ae0b17c259..678ed38730 100644
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@@ -329,6 +329,8 @@ func symtab() {
 	xdefine("runtime.eitablink", obj.SRODATA, 0)
 	xdefine("runtime.rodata", obj.SRODATA, 0)
 	xdefine("runtime.erodata", obj.SRODATA, 0)
+	xdefine("runtime.types", obj.SRODATA, 0)
+	xdefine("runtime.etypes", obj.SRODATA, 0)
 	xdefine("runtime.noptrdata", obj.SNOPTRDATA, 0)
 	xdefine("runtime.enoptrdata", obj.SNOPTRDATA, 0)
 	xdefine("runtime.data", obj.SDATA, 0)
@@ -537,6 +539,8 @@ func symtab() {
 	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.end", 0))
 	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.gcdata", 0))
 	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.gcbss", 0))
+	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.types", 0))
+	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.etypes", 0))
 	// The typelinks slice
 	Addaddr(Ctxt, moduledata, Linklookup(Ctxt, "runtime.typelink", 0))
 	adduint(Ctxt, moduledata, uint64(ntypelinks))
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go
index ddc64b46be..037c953718 100644
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -46,9 +46,11 @@ func FuncLayout(t Type, rcvr Type) (frametype Type, argSize, retOffset uintptr,
 
 func TypeLinks() []string {
 	var r []string
-	for _, m := range typelinks() {
-		for _, t := range m {
-			r = append(r, t.string)
+	sections, offset := typelinks()
+	for i, offs := range offset {
+		rodata := sections[i]
+		for _, off := range offs {
+			r = append(r, rtypeOff(rodata, off).string)
 		}
 	}
 	return r
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 8f13acf26e..7104fde60a 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -1558,30 +1558,48 @@ func haveIdenticalUnderlyingType(T, V *rtype) bool {
 }
 
 // typelinks is implemented in package runtime.
-// It returns a slice of all the 'typelink' information in the binary,
-// which is to say a slice of known types, sorted by string.
+// It returns a slice of the sections in each module,
+// and a slice of *rtype offsets in each module.
+//
+// The types in each module are sorted by string. That is, the first
+// two linked types of the first module are:
+//
+//	d0 := sections[0]
+//	t1 := (*rtype)(add(d0, offset[0][0]))
+//	t2 := (*rtype)(add(d0, offset[0][1]))
+//
+// and
+//
+//	t1.string < t2.string
+//
 // Note that strings are not unique identifiers for types:
 // there can be more than one with a given string.
 // Only types we might want to look up are included:
 // pointers, channels, maps, slices, and arrays.
-func typelinks() [][]*rtype
+func typelinks() (sections []unsafe.Pointer, offset [][]int32)
+
+func rtypeOff(section unsafe.Pointer, off int32) *rtype {
+	return (*rtype)(add(section, uintptr(off)))
+}
 
 // typesByString returns the subslice of typelinks() whose elements have
 // the given string representation.
 // It may be empty (no known types with that string) or may have
 // multiple elements (multiple types with that string).
 func typesByString(s string) []*rtype {
-	typs := typelinks()
+	sections, offset := typelinks()
 	var ret []*rtype
 
-	for _, typ := range typs {
+	for offsI, offs := range offset {
+		section := sections[offsI]
+
 		// We are looking for the first index i where the string becomes >= s.
 		// This is a copy of sort.Search, with f(h) replaced by (*typ[h].string >= s).
-		i, j := 0, len(typ)
+		i, j := 0, len(offs)
 		for i < j {
 			h := i + (j-i)/2 // avoid overflow when computing h
 			// i ≤ h < j
-			if !(typ[h].string >= s) {
+			if !(rtypeOff(section, offs[h]).string >= s) {
 				i = h + 1 // preserves f(i-1) == false
 			} else {
 				j = h // preserves f(j) == true
@@ -1592,17 +1610,12 @@ func typesByString(s string) []*rtype {
 		// Having found the first, linear scan forward to find the last.
 		// We could do a second binary search, but the caller is going
 		// to do a linear scan anyway.
-		j = i
-		for j < len(typ) && typ[j].string == s {
-			j++
-		}
-
-		if j > i {
-			if ret == nil {
-				ret = typ[i:j:j]
-			} else {
-				ret = append(ret, typ[i:j]...)
+		for j := i; j < len(offs); j++ {
+			typ := rtypeOff(section, offs[j])
+			if typ.string != s {
+				break
 			}
+			ret = append(ret, typ)
 		}
 	}
 	return ret
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index 95bebac593..e1956569fd 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -477,10 +477,12 @@ func gomcache() *mcache {
 }
 
 //go:linkname reflect_typelinks reflect.typelinks
-func reflect_typelinks() [][]*_type {
-	ret := [][]*_type{firstmoduledata.typelinks}
+func reflect_typelinks() ([]unsafe.Pointer, [][]int32) {
+	sections := []unsafe.Pointer{unsafe.Pointer(firstmoduledata.types)}
+	ret := [][]int32{firstmoduledata.typelinks}
 	for datap := firstmoduledata.next; datap != nil; datap = datap.next {
+		sections = append(sections, unsafe.Pointer(datap.types))
 		ret = append(ret, datap.typelinks)
 	}
-	return ret
+	return sections, ret
 }
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 158bdcea0d..8c70f22c1f 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -127,8 +127,9 @@ type moduledata struct {
 	bss, ebss             uintptr
 	noptrbss, enoptrbss   uintptr
 	end, gcdata, gcbss    uintptr
+	types, etypes         uintptr
 
-	typelinks []*_type
+	typelinks []int32 // offsets from types
 	itablinks []*itab
 
 	modulename   string
-- 
cgit v1.3


From 260b7daf0a3fa1548d976f2484325240d4bdb73a Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 12 Apr 2016 16:25:48 -0700
Subject: cmd/compile: fix arg to getcallerpc

getcallerpc's arg needs to point to the first argument slot.
I believe this bug was introduced by Michel's itab changes
(specifically https://go-review.googlesource.com/c/20902).

Fixes #15145

Change-Id: Ifb2e17f3658e2136c7950dfc789b4d5706320683
Reviewed-on: https://go-review.googlesource.com/21931
Reviewed-by: Michel Lespinasse <walken@google.com>
---
 src/runtime/iface.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 3ce1e237d3..a4c962fb7a 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -173,7 +173,7 @@ func convT2E(t *_type, elem unsafe.Pointer, x unsafe.Pointer) (e eface) {
 func convT2I(tab *itab, elem unsafe.Pointer, x unsafe.Pointer) (i iface) {
 	t := tab._type
 	if raceenabled {
-		raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2I))
+		raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I))
 	}
 	if msanenabled {
 		msanread(elem, t.size)
-- 
cgit v1.3


From 6af4e996e2f0408f159a8553d11122b9fe052ffb Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@google.com>
Date: Tue, 12 Apr 2016 15:51:24 -0700
Subject: runtime: simplify setPanicOnFault slightly

No need to acquire the M just to change G's paniconfault flag, and the
original C implementation of SetPanicOnFault did not. The M
acquisition logic is an artifact of golang.org/cl/131010044, which was
started before golang.org/cl/123640043 (which introduced the current
"getg" function) was submitted.

Change-Id: I6d1939008660210be46904395cf5f5bbc2c8f754
Reviewed-on: https://go-review.googlesource.com/21935
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/rdebug.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/rdebug.go b/src/runtime/rdebug.go
index d966734813..1b213f1934 100644
--- a/src/runtime/rdebug.go
+++ b/src/runtime/rdebug.go
@@ -15,9 +15,8 @@ func setMaxStack(in int) (out int) {
 
 //go:linkname setPanicOnFault runtime/debug.setPanicOnFault
 func setPanicOnFault(new bool) (old bool) {
-	mp := acquirem()
-	old = mp.curg.paniconfault
-	mp.curg.paniconfault = new
-	releasem(mp)
+	_g_ := getg()
+	old = _g_.paniconfault
+	_g_.paniconfault = new
 	return old
 }
-- 
cgit v1.3


From 7d469179e6e3dafe16700b7fc1cf8683ad9453fa Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Mon, 28 Mar 2016 10:32:27 -0400
Subject: cmd/compile, etc: store method tables as offsets

This CL introduces the typeOff type and a lookup method of the same
name that can turn a typeOff offset into an *rtype.

In a typical Go binary (built with buildmode=exe, pie, c-archive, or
c-shared), there is one moduledata and all typeOff values are offsets
relative to firstmoduledata.types. This makes computing the pointer
cheap in typical programs.

With buildmode=shared (and one day, buildmode=plugin) there are
multiple modules whose relative offset is determined at runtime.
We identify a type in the general case by the pair of the original
*rtype that references it and its typeOff value. We determine
the module from the original pointer, and then use the typeOff from
there to compute the final *rtype.

To ensure there is only one *rtype representing each type, the
runtime initializes a typemap for each module, using any identical
type from an earlier module when resolving that offset. This means
that types computed from an offset match the type mapped by the
pointer dynamic relocations.

A series of followup CLs will replace other *rtype values with typeOff
(and name/*string with nameOff).

For types created at runtime by reflect, type offsets are treated as
global IDs and reference into a reflect offset map kept by the runtime.

darwin/amd64:
	cmd/go:  -57KB (0.6%)
	jujud:  -557KB (0.8%)

linux/amd64 PIE:
	cmd/go: -361KB (3.0%)
	jujud:  -3.5MB (4.2%)

For #6853.

Change-Id: Icf096fd884a0a0cb9f280f46f7a26c70a9006c96
Reviewed-on: https://go-review.googlesource.com/21285
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/compile/internal/gc/reflect.go |  75 +++++---
 src/cmd/internal/obj/link.go           |  15 +-
 src/cmd/link/internal/ld/deadcode.go   |  14 +-
 src/cmd/link/internal/ld/decodesym.go  |  22 +--
 src/reflect/export_test.go             |   2 +-
 src/reflect/type.go                    | 267 +++++++++++++++++++++-------
 src/reflect/value.go                   |  15 +-
 src/runtime/iface.go                   |  10 +-
 src/runtime/proc.go                    |   3 +-
 src/runtime/runtime1.go                |  33 ++++
 src/runtime/symtab.go                  |   2 +
 src/runtime/type.go                    | 307 ++++++++++++++++++++++++++++++++-
 12 files changed, 637 insertions(+), 128 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index ea67634260..2bd50b4665 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -75,7 +75,7 @@ func uncommonSize(t *Type) int { // Sizeof(runtime.uncommontype{})
 	if t.Sym == nil && len(methods(t)) == 0 {
 		return 0
 	}
-	return 2*Widthptr + 2*Widthint
+	return 2 * Widthptr
 }
 
 func makefield(name string, t *Type) *Field {
@@ -580,13 +580,23 @@ func dextratype(s *Sym, ot int, t *Type, dataAdd int) int {
 
 	ot = dgopkgpath(s, ot, typePkg(t))
 
-	// slice header
-	ot = dsymptr(s, ot, s, ot+Widthptr+2*Widthint+dataAdd)
-
-	n := len(m)
-	ot = duintxx(s, ot, uint64(n), Widthint)
-	ot = duintxx(s, ot, uint64(n), Widthint)
+	dataAdd += Widthptr + 2 + 2
+	if Widthptr == 8 {
+		dataAdd += 4
+	}
+	mcount := len(m)
+	if mcount != int(uint16(mcount)) {
+		Fatalf("too many methods on %s: %d", t, mcount)
+	}
+	if dataAdd != int(uint16(dataAdd)) {
+		Fatalf("methods are too far away on %s: %d", t, dataAdd)
+	}
 
+	ot = duint16(s, ot, uint16(mcount))
+	ot = duint16(s, ot, uint16(dataAdd))
+	if Widthptr == 8 {
+		ot = duint32(s, ot, 0) // align for following pointers
+	}
 	return ot
 }
 
@@ -609,6 +619,7 @@ func typePkg(t *Type) *Pkg {
 // dextratypeData dumps the backing array for the []method field of
 // runtime.uncommontype.
 func dextratypeData(s *Sym, ot int, t *Type) int {
+	lsym := Linksym(s)
 	for _, a := range methods(t) {
 		// ../../../../runtime/type.go:/method
 		exported := exportname(a.name)
@@ -617,21 +628,24 @@ func dextratypeData(s *Sym, ot int, t *Type) int {
 			pkg = a.pkg
 		}
 		ot = dname(s, ot, a.name, "", pkg, exported)
-		ot = dmethodptr(s, ot, dtypesym(a.mtype))
-		ot = dmethodptr(s, ot, a.isym)
-		ot = dmethodptr(s, ot, a.tsym)
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(dtypesym(a.mtype)))
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.isym))
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.tsym))
+		if Widthptr == 8 {
+			ot = duintxxLSym(lsym, ot, 0, 4) // pad to reflect.method size
+		}
 	}
 	return ot
 }
 
-func dmethodptr(s *Sym, off int, x *Sym) int {
-	duintptr(s, off, 0)
-	r := obj.Addrel(Linksym(s))
-	r.Off = int32(off)
-	r.Siz = uint8(Widthptr)
-	r.Sym = Linksym(x)
-	r.Type = obj.R_METHOD
-	return off + Widthptr
+func dmethodptrOffLSym(s *obj.LSym, ot int, x *obj.LSym) int {
+	duintxxLSym(s, ot, 0, 4)
+	r := obj.Addrel(s)
+	r.Off = int32(ot)
+	r.Siz = 4
+	r.Sym = x
+	r.Type = obj.R_METHODOFF
+	return ot + 4
 }
 
 var kinds = []int{
@@ -1286,18 +1300,29 @@ ok:
 	ggloblsym(s, int32(ot), int16(dupok|obj.RODATA))
 
 	// generate typelink.foo pointing at s = type.foo.
+	//
 	// The linker will leave a table of all the typelinks for
-	// types in the binary, so reflect can find them.
-	// We only need the link for unnamed composites that
-	// we want be able to find.
-	if t.Sym == nil {
+	// types in the binary, so the runtime can find them.
+	//
+	// When buildmode=shared, all types are in typelinks so the
+	// runtime can deduplicate type pointers.
+	keep := Ctxt.Flag_dynlink
+	if !keep && t.Sym == nil {
+		// For an unnamed type, we only need the link if the type can
+		// be created at run time by reflect.PtrTo and similar
+		// functions. If the type exists in the program, those
+		// functions must return the existing type structure rather
+		// than creating a new one.
 		switch t.Etype {
 		case TPTR32, TPTR64, TARRAY, TCHAN, TFUNC, TMAP, TSTRUCT:
-			slink := typelinkLSym(t)
-			dsymptrOffLSym(slink, 0, Linksym(s), 0)
-			ggloblLSym(slink, 4, int16(dupok|obj.RODATA))
+			keep = true
 		}
 	}
+	if keep {
+		slink := typelinkLSym(t)
+		dsymptrOffLSym(slink, 0, Linksym(s), 0)
+		ggloblLSym(slink, 4, int16(dupok|obj.RODATA))
+	}
 
 	return s
 }
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index 42aaa5f4f0..55c9f4f9e2 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -457,8 +457,8 @@ const (
 	// R_ADDRMIPS (only used on mips64) resolves to a 32-bit external address,
 	// by loading the address into a register with two instructions (lui, ori).
 	R_ADDRMIPS
-	// R_ADDROFF resolves to an offset from the beginning of the section holding
-	// the data being relocated to the referenced symbol.
+	// R_ADDROFF resolves to a 32-bit offset from the beginning of the section
+	// holding the data being relocated to the referenced symbol.
 	R_ADDROFF
 	R_SIZE
 	R_CALL
@@ -492,11 +492,12 @@ const (
 	// should be linked into the final binary, even if there are no other
 	// direct references. (This is used for types reachable by reflection.)
 	R_USETYPE
-	// R_METHOD resolves to an *rtype for a method.
-	// It is used when linking from the uncommonType of another *rtype, and
-	// may be set to zero by the linker if it determines the method text is
-	// unreachable by the linked program.
-	R_METHOD
+	// R_METHODOFF resolves to a 32-bit offset from the beginning of the section
+	// holding the data being relocated to the referenced symbol.
+	// It is a variant of R_ADDROFF used when linking from the uncommonType of a
+	// *rtype, and may be set to zero by the linker if it determines the method
+	// text is unreachable by the linked program.
+	R_METHODOFF
 	R_POWER_TOC
 	R_GOTPCREL
 	// R_JMPMIPS (only used on mips64) resolves to non-PC-relative target address
diff --git a/src/cmd/link/internal/ld/deadcode.go b/src/cmd/link/internal/ld/deadcode.go
index 51fae02ef0..c83a104a54 100644
--- a/src/cmd/link/internal/ld/deadcode.go
+++ b/src/cmd/link/internal/ld/deadcode.go
@@ -19,7 +19,7 @@ import (
 //
 // This flood fill is wrapped in logic for pruning unused methods.
 // All methods are mentioned by relocations on their receiver's *rtype.
-// These relocations are specially defined as R_METHOD by the compiler
+// These relocations are specially defined as R_METHODOFF by the compiler
 // so we can detect and manipulated them here.
 //
 // There are three ways a method of a reachable type can be invoked:
@@ -100,7 +100,7 @@ func deadcode(ctxt *Link) {
 		d.flood()
 	}
 
-	// Remove all remaining unreached R_METHOD relocations.
+	// Remove all remaining unreached R_METHODOFF relocations.
 	for _, m := range d.markableMethods {
 		for _, r := range m.r {
 			d.cleanupReloc(r)
@@ -167,7 +167,7 @@ var markextra = []string{
 type methodref struct {
 	m   methodsig
 	src *LSym     // receiver type symbol
-	r   [3]*Reloc // R_METHOD relocations to fields of runtime.method
+	r   [3]*Reloc // R_METHODOFF relocations to fields of runtime.method
 }
 
 func (m methodref) ifn() *LSym { return m.r[1].Sym }
@@ -190,7 +190,7 @@ type deadcodepass struct {
 
 func (d *deadcodepass) cleanupReloc(r *Reloc) {
 	if r.Sym.Attr.Reachable() {
-		r.Type = obj.R_ADDR
+		r.Type = obj.R_ADDROFF
 	} else {
 		if Debug['v'] > 1 {
 			fmt.Fprintf(d.ctxt.Bso, "removing method %s\n", r.Sym.Name)
@@ -217,7 +217,7 @@ func (d *deadcodepass) mark(s, parent *LSym) {
 func (d *deadcodepass) markMethod(m methodref) {
 	for _, r := range m.r {
 		d.mark(r.Sym, m.src)
-		r.Type = obj.R_ADDR
+		r.Type = obj.R_ADDROFF
 	}
 }
 
@@ -291,14 +291,14 @@ func (d *deadcodepass) flood() {
 			}
 		}
 
-		mpos := 0 // 0-3, the R_METHOD relocs of runtime.uncommontype
+		mpos := 0 // 0-3, the R_METHODOFF relocs of runtime.uncommontype
 		var methods []methodref
 		for i := 0; i < len(s.R); i++ {
 			r := &s.R[i]
 			if r.Sym == nil {
 				continue
 			}
-			if r.Type != obj.R_METHOD {
+			if r.Type != obj.R_METHODOFF {
 				d.mark(r.Sym, s)
 				continue
 			}
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index 7daa8bc812..5fa8b4c81f 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -47,9 +47,9 @@ func decode_inuxi(p []byte, sz int) uint64 {
 	}
 }
 
-func commonsize() int      { return 6*SysArch.PtrSize + 8 }                 // runtime._type
-func structfieldSize() int { return 3 * SysArch.PtrSize }                   // runtime.structfield
-func uncommonSize() int    { return 2*SysArch.PtrSize + 2*SysArch.IntSize } // runtime.uncommontype
+func commonsize() int      { return 6*SysArch.PtrSize + 8 } // runtime._type
+func structfieldSize() int { return 3 * SysArch.PtrSize }   // runtime.structfield
+func uncommonSize() int    { return 2 * SysArch.PtrSize }   // runtime.uncommontype
 
 // Type.commonType.kind
 func decodetype_kind(s *LSym) uint8 {
@@ -341,12 +341,14 @@ func decodetype_methods(s *LSym) []methodsig {
 		// just Sizeof(rtype)
 	}
 
-	numMethods := int(decode_inuxi(s.P[off+2*SysArch.PtrSize:], SysArch.IntSize))
-	r := decode_reloc(s, int32(off+SysArch.PtrSize))
-	if r.Sym != s {
-		panic(fmt.Sprintf("method slice pointer in %s leads to a different symbol %s", s, r.Sym))
+	mcount := int(decode_inuxi(s.P[off+SysArch.PtrSize:], 2))
+	moff := int(decode_inuxi(s.P[off+SysArch.PtrSize+2:], 2))
+	off += moff          // offset to array of reflect.method values
+	var sizeofMethod int // sizeof reflect.method in program
+	if SysArch.PtrSize == 4 {
+		sizeofMethod = 4 * SysArch.PtrSize
+	} else {
+		sizeofMethod = 3 * SysArch.PtrSize
 	}
-	off = int(r.Add)                    // array of reflect.method values
-	sizeofMethod := 4 * SysArch.PtrSize // sizeof reflect.method in program
-	return decode_methodsig(s, off, sizeofMethod, numMethods)
+	return decode_methodsig(s, off, sizeofMethod, mcount)
 }
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go
index 037c953718..2769e0db40 100644
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -90,7 +90,7 @@ func FirstMethodNameBytes(t Type) *byte {
 	if ut == nil {
 		panic("type has no methods")
 	}
-	m := ut.methods[0]
+	m := ut.methods()[0]
 	if *m.name.data(0)&(1<<2) == 0 {
 		panic("method name does not have pkgPath *string")
 	}
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 7104fde60a..c7ed402be2 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -288,10 +288,10 @@ type typeAlg struct {
 
 // Method on non-interface type
 type method struct {
-	name name           // name of method
-	mtyp *rtype         // method type (without receiver)
-	ifn  unsafe.Pointer // fn used in interface call (one-word receiver)
-	tfn  unsafe.Pointer // fn used for normal method call
+	name name    // name of method
+	mtyp typeOff // method type (without receiver)
+	ifn  textOff // fn used in interface call (one-word receiver)
+	tfn  textOff // fn used for normal method call
 }
 
 // uncommonType is present only for types with names or methods
@@ -299,8 +299,9 @@ type method struct {
 // Using a pointer to this struct reduces the overall size required
 // to describe an unnamed type with no methods.
 type uncommonType struct {
-	pkgPath *string  // import path; nil for built-in types like int, string
-	methods []method // methods associated with type
+	pkgPath *string // import path; nil for built-in types like int, string
+	mcount  uint16  // number of methods
+	moff    uint16  // offset from this uncommontype to [mcount]method
 }
 
 // ChanDir represents a channel type's direction.
@@ -589,6 +590,10 @@ var kindNames = []string{
 	UnsafePointer: "unsafe.Pointer",
 }
 
+func (t *uncommonType) methods() []method {
+	return (*[1 << 16]method)(add(unsafe.Pointer(t), uintptr(t.moff)))[:t.mcount:t.mcount]
+}
+
 func (t *uncommonType) PkgPath() string {
 	if t == nil || t.pkgPath == nil {
 		return ""
@@ -596,13 +601,55 @@ func (t *uncommonType) PkgPath() string {
 	return *t.pkgPath
 }
 
+// resolveTypeOff resolves an *rtype offset from a base type.
+// The (*rtype).typeOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
+
+// resolveTextOff resolves an function pointer offset from a base type.
+// The (*rtype).textOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
+
+// addReflectOff adds a pointer to the reflection lookup map in the runtime.
+// It returns a new ID that can be used as a typeOff or textOff, and will
+// be resolved correctly. Implemented in the runtime package.
+func addReflectOff(ptr unsafe.Pointer) int32
+
+// resolveReflectType adds a *rtype to the reflection lookup map in the runtime.
+// It returns a new typeOff that can be used to refer to the pointer.
+func resolveReflectType(t *rtype) typeOff {
+	return typeOff(addReflectOff(unsafe.Pointer(t)))
+}
+
+// resolveReflectText adds a function pointer to the reflection lookup map in
+// the runtime. It returns a new textOff that can be used to refer to the
+// pointer.
+func resolveReflectText(ptr unsafe.Pointer) textOff {
+	return textOff(addReflectOff(ptr))
+}
+
+type typeOff int32 // offset to an *rtype
+type textOff int32 // offset from top of text section
+
+func (t *rtype) typeOff(off typeOff) *rtype {
+	if off == 0 {
+		return nil
+	}
+	return (*rtype)(resolveTypeOff(unsafe.Pointer(t), int32(off)))
+}
+
+func (t *rtype) textOff(off textOff) unsafe.Pointer {
+	return resolveTextOff(unsafe.Pointer(t), int32(off))
+}
+
 func (t *rtype) uncommon() *uncommonType {
 	if t.tflag&tflagUncommon == 0 {
 		return nil
 	}
 	switch t.Kind() {
 	case Struct:
-		return &(*structTypeWithMethods)(unsafe.Pointer(t)).u
+		return &(*structTypeUncommon)(unsafe.Pointer(t)).u
 	case Ptr:
 		type u struct {
 			ptrType
@@ -688,7 +735,7 @@ func (t *rtype) NumMethod() int {
 	if ut == nil {
 		return 0
 	}
-	return len(ut.methods)
+	return int(ut.mcount)
 }
 
 func (t *rtype) Method(i int) (m Method) {
@@ -698,10 +745,10 @@ func (t *rtype) Method(i int) (m Method) {
 	}
 	ut := t.uncommon()
 
-	if ut == nil || i < 0 || i >= len(ut.methods) {
+	if ut == nil || i < 0 || i >= int(ut.mcount) {
 		panic("reflect: Method index out of range")
 	}
-	p := &ut.methods[i]
+	p := ut.methods()[i]
 	m.Name = p.name.name()
 	fl := flag(Func)
 	if !p.name.isExported() {
@@ -712,8 +759,9 @@ func (t *rtype) Method(i int) (m Method) {
 		m.PkgPath = *pkgPath
 		fl |= flagStickyRO
 	}
-	if p.mtyp != nil {
-		ft := (*funcType)(unsafe.Pointer(p.mtyp))
+	if p.mtyp != 0 {
+		mtyp := t.typeOff(p.mtyp)
+		ft := (*funcType)(unsafe.Pointer(mtyp))
 		in := make([]Type, 0, 1+len(ft.in()))
 		in = append(in, t)
 		for _, arg := range ft.in() {
@@ -723,9 +771,10 @@ func (t *rtype) Method(i int) (m Method) {
 		for _, ret := range ft.out() {
 			out = append(out, ret)
 		}
-		mt := FuncOf(in, out, p.mtyp.IsVariadic())
+		mt := FuncOf(in, out, ft.IsVariadic())
 		m.Type = mt
-		fn := unsafe.Pointer(&p.tfn)
+		tfn := t.textOff(p.tfn)
+		fn := unsafe.Pointer(&tfn)
 		m.Func = Value{mt.(*rtype), fn, fl}
 	}
 	m.Index = i
@@ -741,8 +790,9 @@ func (t *rtype) MethodByName(name string) (m Method, ok bool) {
 	if ut == nil {
 		return Method{}, false
 	}
-	for i := range ut.methods {
-		p := &ut.methods[i]
+	utmethods := ut.methods()
+	for i := 0; i < int(ut.mcount); i++ {
+		p := utmethods[i]
 		if p.name.name() == name {
 			return t.Method(i), true
 		}
@@ -1430,10 +1480,11 @@ func implements(T, V *rtype) bool {
 		return false
 	}
 	i := 0
-	for j := 0; j < len(v.methods); j++ {
+	vmethods := v.methods()
+	for j := 0; j < int(v.mcount); j++ {
 		tm := &t.methods[i]
-		vm := &v.methods[j]
-		if vm.name.name() == tm.name.name() && vm.mtyp == tm.typ {
+		vm := vmethods[j]
+		if vm.name.name() == tm.name.name() && V.typeOff(vm.mtyp) == tm.typ {
 			if i++; i >= len(t.methods) {
 				return true
 			}
@@ -2161,21 +2212,55 @@ func SliceOf(t Type) Type {
 	return cachePut(ckey, &slice.rtype)
 }
 
-// structTypeWithMethods is a structType created at runtime with StructOf.
-// It is needed to pin the []method slice from its associated uncommonType struct.
-// Keep in sync with the memory layout of structType.
-type structTypeWithMethods struct {
-	structType
-	u uncommonType
-}
-
 // The structLookupCache caches StructOf lookups.
 // StructOf does not share the common lookupCache since we need to pin
-// the *structType and its associated *uncommonType (especially the
-// []method slice field of that uncommonType.)
+// the memory associated with *structTypeFixedN.
 var structLookupCache struct {
 	sync.RWMutex
-	m map[uint32][]*structTypeWithMethods // keyed by hash calculated in StructOf
+	m map[uint32][]interface {
+		common() *rtype
+	} // keyed by hash calculated in StructOf
+}
+
+type structTypeUncommon struct {
+	structType
+	u uncommonType
+}
+
+// A *rtype representing a struct is followed directly in memory by an
+// array of method objects representing the methods attached to the
+// struct. To get the same layout for a run time generated type, we
+// need an array directly following the uncommonType memory. The types
+// structTypeFixed4, ...structTypeFixedN are used to do this.
+//
+// A similar strategy is used for funcTypeFixed4, ...funcTypeFixedN.
+
+// TODO(crawshaw): as these structTypeFixedN and funcTypeFixedN structs
+// have no methods, they could be defined at runtime using the StructOf
+// function.
+
+type structTypeFixed4 struct {
+	structType
+	u uncommonType
+	m [4]method
+}
+
+type structTypeFixed8 struct {
+	structType
+	u uncommonType
+	m [8]method
+}
+
+type structTypeFixed16 struct {
+	structType
+	u uncommonType
+	m [16]method
+}
+
+type structTypeFixed32 struct {
+	structType
+	u uncommonType
+	m [32]method
 }
 
 // StructOf returns the struct type containing fields.
@@ -2192,7 +2277,7 @@ func StructOf(fields []StructField) Type {
 		typalign   uint8
 		comparable = true
 		hashable   = true
-		typ        = new(structTypeWithMethods)
+		methods    []method
 
 		fs   = make([]structField, len(fields))
 		repr = make([]byte, 0, 64)
@@ -2269,7 +2354,6 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-
 					} else {
 						tfn = MakeFunc(m.typ, func(in []Value) []Value {
 							var args []Value
@@ -2287,47 +2371,59 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-
 					}
 
-					typ.u.methods = append(
-						typ.u.methods,
-						method{
-							name: m.name,
-							mtyp: m.typ,
-							ifn:  unsafe.Pointer(&ifn),
-							tfn:  unsafe.Pointer(&tfn),
-						},
-					)
+					methods = append(methods, method{
+						name: m.name,
+						mtyp: resolveReflectType(m.typ),
+						ifn:  resolveReflectText(unsafe.Pointer(&ifn)),
+						tfn:  resolveReflectText(unsafe.Pointer(&tfn)),
+					})
 				}
 			case Ptr:
 				ptr := (*ptrType)(unsafe.Pointer(ft))
 				if unt := ptr.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ptr.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ptr.textOff(m.ifn)),
+							tfn:  resolveReflectText(ptr.textOff(m.tfn)),
+						})
 					}
 				}
 				if unt := ptr.elem.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ptr.elem.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ptr.elem.textOff(m.ifn)),
+							tfn:  resolveReflectText(ptr.elem.textOff(m.tfn)),
+						})
 					}
 				}
 			default:
 				if unt := ft.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ft.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ft.textOff(m.ifn)),
+							tfn:  resolveReflectText(ft.textOff(m.tfn)),
+						})
+
 					}
 				}
 			}
@@ -2359,6 +2455,49 @@ func StructOf(fields []StructField) Type {
 
 		fs[i] = f
 	}
+
+	var typ *structType
+	var ut *uncommonType
+	var typPin interface {
+		common() *rtype
+	} // structTypeFixedN
+
+	switch {
+	case len(methods) == 0:
+		t := new(structTypeUncommon)
+		typ = &t.structType
+		ut = &t.u
+		typPin = t
+	case len(methods) <= 4:
+		t := new(structTypeFixed4)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 8:
+		t := new(structTypeFixed8)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 16:
+		t := new(structTypeFixed16)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 32:
+		t := new(structTypeFixed32)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	default:
+		panic("reflect.StructOf: too many methods")
+	}
+	ut.mcount = uint16(len(methods))
+	ut.moff = uint16(unsafe.Sizeof(uncommonType{}))
+
 	if len(fs) > 0 {
 		repr = append(repr, ' ')
 	}
@@ -2372,15 +2511,16 @@ func StructOf(fields []StructField) Type {
 	// Make the struct type.
 	var istruct interface{} = struct{}{}
 	prototype := *(**structType)(unsafe.Pointer(&istruct))
-	typ.structType = *prototype
-	typ.structType.fields = fs
+	*typ = *prototype
+	typ.fields = fs
 
 	// Look in cache
 	structLookupCache.RLock()
-	for _, t := range structLookupCache.m[hash] {
-		if haveIdenticalUnderlyingType(&typ.rtype, &t.rtype) {
+	for _, st := range structLookupCache.m[hash] {
+		t := st.common()
+		if haveIdenticalUnderlyingType(&typ.rtype, t) {
 			structLookupCache.RUnlock()
-			return &t.rtype
+			return t
 		}
 	}
 	structLookupCache.RUnlock()
@@ -2389,11 +2529,14 @@ func StructOf(fields []StructField) Type {
 	structLookupCache.Lock()
 	defer structLookupCache.Unlock()
 	if structLookupCache.m == nil {
-		structLookupCache.m = make(map[uint32][]*structTypeWithMethods)
+		structLookupCache.m = make(map[uint32][]interface {
+			common() *rtype
+		})
 	}
-	for _, t := range structLookupCache.m[hash] {
-		if haveIdenticalUnderlyingType(&typ.rtype, &t.rtype) {
-			return &t.rtype
+	for _, st := range structLookupCache.m[hash] {
+		t := st.common()
+		if haveIdenticalUnderlyingType(&typ.rtype, t) {
+			return t
 		}
 	}
 
@@ -2403,9 +2546,8 @@ func StructOf(fields []StructField) Type {
 			// even if 't' wasn't a structType with methods, we should be ok
 			// as the 'u uncommonType' field won't be accessed except when
 			// tflag&tflagUncommon is set.
-			tt := (*structTypeWithMethods)(unsafe.Pointer(t))
-			structLookupCache.m[hash] = append(structLookupCache.m[hash], tt)
-			return &tt.rtype
+			structLookupCache.m[hash] = append(structLookupCache.m[hash], t)
+			return t
 		}
 	}
 
@@ -2414,7 +2556,7 @@ func StructOf(fields []StructField) Type {
 	typ.size = size
 	typ.align = typalign
 	typ.fieldAlign = typalign
-	if len(typ.u.methods) > 0 {
+	if len(methods) > 0 {
 		typ.tflag |= tflagUncommon
 	}
 	if !hasPtr {
@@ -2514,7 +2656,7 @@ func StructOf(fields []StructField) Type {
 		typ.kind &^= kindDirectIface
 	}
 
-	structLookupCache.m[hash] = append(structLookupCache.m[hash], typ)
+	structLookupCache.m[hash] = append(structLookupCache.m[hash], typPin)
 	return &typ.rtype
 }
 
@@ -2533,6 +2675,7 @@ func runtimeStructField(field StructField) structField {
 		}
 	}
 
+	_ = resolveReflectType(field.Type.common())
 	return structField{
 		name:   newName(field.Name, string(field.Tag), field.PkgPath, exported),
 		typ:    field.Type.common(),
diff --git a/src/reflect/value.go b/src/reflect/value.go
index 262545d973..d72c14e9e1 100644
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@@ -566,15 +566,16 @@ func methodReceiver(op string, v Value, methodIndex int) (rcvrtype, t *rtype, fn
 	} else {
 		rcvrtype = v.typ
 		ut := v.typ.uncommon()
-		if ut == nil || uint(i) >= uint(len(ut.methods)) {
+		if ut == nil || uint(i) >= uint(ut.mcount) {
 			panic("reflect: internal error: invalid method index")
 		}
-		m := &ut.methods[i]
+		m := ut.methods()[i]
 		if !m.name.isExported() {
 			panic("reflect: " + op + " of unexported method")
 		}
-		fn = unsafe.Pointer(&m.ifn)
-		t = m.mtyp
+		ifn := v.typ.textOff(m.ifn)
+		fn = unsafe.Pointer(&ifn)
+		t = v.typ.typeOff(m.mtyp)
 	}
 	return
 }
@@ -1687,11 +1688,11 @@ func (v Value) Type() Type {
 	}
 	// Method on concrete type.
 	ut := v.typ.uncommon()
-	if ut == nil || uint(i) >= uint(len(ut.methods)) {
+	if ut == nil || uint(i) >= uint(ut.mcount) {
 		panic("reflect: internal error: invalid method index")
 	}
-	m := &ut.methods[i]
-	return m.mtyp
+	m := ut.methods()[i]
+	return v.typ.typeOff(m.mtyp)
 }
 
 // Uint returns v's underlying value, as a uint64.
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index a4c962fb7a..700bdc2f48 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -93,7 +93,8 @@ func additab(m *itab, locked, canfail bool) {
 	// so can iterate over both in lock step;
 	// the loop is O(ni+nt) not O(ni*nt).
 	ni := len(inter.mhdr)
-	nt := len(x.mhdr)
+	nt := int(x.mcount)
+	xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt]
 	j := 0
 	for k := 0; k < ni; k++ {
 		i := &inter.mhdr[k]
@@ -104,15 +105,16 @@ func additab(m *itab, locked, canfail bool) {
 			ipkg = inter.pkgpath
 		}
 		for ; j < nt; j++ {
-			t := &x.mhdr[j]
-			if t.mtyp == itype && t.name.name() == iname {
+			t := &xmhdr[j]
+			if typ.typeOff(t.mtyp) == itype && t.name.name() == iname {
 				pkgPath := t.name.pkgPath()
 				if pkgPath == nil {
 					pkgPath = x.pkgpath
 				}
 				if t.name.isExported() || pkgPath == ipkg {
 					if m != nil {
-						*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = t.ifn
+						ifn := typ.textOff(t.ifn)
+						*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = ifn
 					}
 					goto nextimethod
 				}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 1a9dbd6c53..98a986cd63 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -435,9 +435,10 @@ func schedinit() {
 	tracebackinit()
 	moduledataverify()
 	stackinit()
-	itabsinit()
 	mallocinit()
 	mcommoninit(_g_.m)
+	typelinksinit()
+	itabsinit()
 
 	msigsave(_g_.m)
 	initSigmask = _g_.m.sigmask
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index e1956569fd..02aeedaf75 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -486,3 +486,36 @@ func reflect_typelinks() ([]unsafe.Pointer, [][]int32) {
 	}
 	return sections, ret
 }
+
+// reflect_resolveTypeOff resolves an *rtype offset from a base type.
+//go:linkname reflect_resolveTypeOff reflect.resolveTypeOff
+func reflect_resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
+	return unsafe.Pointer((*_type)(rtype).typeOff(typeOff(off)))
+}
+
+// reflect_resolveTextOff resolves an function pointer offset from a base type.
+//go:linkname reflect_resolveTextOff reflect.resolveTextOff
+func reflect_resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
+	return (*_type)(rtype).textOff(textOff(off))
+
+}
+
+// reflect_addReflectOff adds a pointer to the reflection offset lookup map.
+//go:linkname reflect_addReflectOff reflect.addReflectOff
+func reflect_addReflectOff(ptr unsafe.Pointer) int32 {
+	lock(&reflectOffs.lock)
+	if reflectOffs.m == nil {
+		reflectOffs.m = make(map[int32]unsafe.Pointer)
+		reflectOffs.minv = make(map[unsafe.Pointer]int32)
+		reflectOffs.next = -1
+	}
+	id, found := reflectOffs.minv[ptr]
+	if !found {
+		id = reflectOffs.next
+		reflectOffs.next-- // use negative offsets as IDs to aid debugging
+		reflectOffs.m[id] = ptr
+		reflectOffs.minv[ptr] = id
+	}
+	unlock(&reflectOffs.lock)
+	return id
+}
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 8c70f22c1f..2df390253a 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -137,6 +137,8 @@ type moduledata struct {
 
 	gcdatamask, gcbssmask bitvector
 
+	typemap map[typeOff]*_type // offset to *_rtype in previous module
+
 	next *moduledata
 }
 
diff --git a/src/runtime/type.go b/src/runtime/type.go
index fbf6f9973c..86131d3ff3 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -131,6 +131,92 @@ func (t *_type) name() string {
 	return t._string[i+1:]
 }
 
+// reflectOffs holds type offsets defined at run time by the reflect package.
+//
+// When a type is defined at run time, its *rtype data lives on the heap.
+// There are a wide range of possible addresses the heap may use, that
+// may not be representable as a 32-bit offset. Moreover the GC may
+// one day start moving heap memory, in which case there is no stable
+// offset that can be defined.
+//
+// To provide stable offsets, we add pin *rtype objects in a global map
+// and treat the offset as an identifier. We use negative offsets that
+// do not overlap with any compile-time module offsets.
+//
+// Entries are created by reflect.addReflectOff.
+var reflectOffs struct {
+	lock mutex
+	next int32
+	m    map[int32]unsafe.Pointer
+	minv map[unsafe.Pointer]int32
+}
+
+func (t *_type) typeOff(off typeOff) *_type {
+	if off == 0 {
+		return nil
+	}
+	base := uintptr(unsafe.Pointer(t))
+	var md *moduledata
+	for next := &firstmoduledata; next != nil; next = next.next {
+		if base >= next.types && base < next.etypes {
+			md = next
+			break
+		}
+	}
+	if md == nil {
+		lock(&reflectOffs.lock)
+		res := reflectOffs.m[int32(off)]
+		unlock(&reflectOffs.lock)
+		if res == nil {
+			println("runtime: typeOff", hex(off), "base", hex(base), "not in ranges:")
+			for next := &firstmoduledata; next != nil; next = next.next {
+				println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+			}
+			throw("runtime: type offset base pointer out of range")
+		}
+		return (*_type)(res)
+	}
+	if t := md.typemap[off]; t != nil {
+		return t
+	}
+	res := md.types + uintptr(off)
+	if res > md.etypes {
+		println("runtime: typeOff", hex(off), "out of range", hex(md.types), "-", hex(md.etypes))
+		throw("runtime: type offset out of range")
+	}
+	return (*_type)(unsafe.Pointer(res))
+}
+
+func (t *_type) textOff(off textOff) unsafe.Pointer {
+	base := uintptr(unsafe.Pointer(t))
+	var md *moduledata
+	for next := &firstmoduledata; next != nil; next = next.next {
+		if base >= next.types && base < next.etypes {
+			md = next
+			break
+		}
+	}
+	if md == nil {
+		lock(&reflectOffs.lock)
+		res := reflectOffs.m[int32(off)]
+		unlock(&reflectOffs.lock)
+		if res == nil {
+			println("runtime: textOff", hex(off), "base", hex(base), "not in ranges:")
+			for next := &firstmoduledata; next != nil; next = next.next {
+				println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+			}
+			throw("runtime: text offset base pointer out of range")
+		}
+		return res
+	}
+	res := md.text + uintptr(off)
+	if res > md.etext {
+		println("runtime: textOff", hex(off), "out of range", hex(md.text), "-", hex(md.etext))
+		throw("runtime: text offset out of range")
+	}
+	return unsafe.Pointer(res)
+}
+
 func (t *functype) in() []*_type {
 	// See funcType in reflect/type.go for details on data layout.
 	uadd := uintptr(unsafe.Sizeof(functype{}))
@@ -154,16 +240,20 @@ func (t *functype) dotdotdot() bool {
 	return t.outCount&(1<<15) != 0
 }
 
+type typeOff int32
+type textOff int32
+
 type method struct {
 	name name
-	mtyp *_type
-	ifn  unsafe.Pointer
-	tfn  unsafe.Pointer
+	mtyp typeOff
+	ifn  textOff
+	tfn  textOff
 }
 
 type uncommontype struct {
 	pkgpath *string
-	mhdr    []method
+	mcount  uint16 // number of methods
+	moff    uint16 // offset from this uncommontype to [mcount]method
 }
 
 type imethod struct {
@@ -270,6 +360,18 @@ func (n *name) name() (s string) {
 	return s
 }
 
+func (n *name) tag() (s string) {
+	tl := n.tagLen()
+	if tl == 0 {
+		return ""
+	}
+	nl := n.nameLen()
+	hdr := (*stringStruct)(unsafe.Pointer(&s))
+	hdr.str = unsafe.Pointer(n.data(3 + nl + 2))
+	hdr.len = tl
+	return s
+}
+
 func (n *name) pkgPath() *string {
 	if *n.data(0)&(1<<2) == 0 {
 		return nil
@@ -281,3 +383,200 @@ func (n *name) pkgPath() *string {
 	off = int(round(uintptr(off), sys.PtrSize))
 	return *(**string)(unsafe.Pointer(n.data(off)))
 }
+
+// typelinksinit scans the types from extra modules and builds the
+// moduledata typemap used to de-duplicate type pointers.
+func typelinksinit() {
+	if firstmoduledata.next == nil {
+		return
+	}
+	typehash := make(map[uint32][]*_type)
+
+	modules := []*moduledata{}
+	for md := &firstmoduledata; md != nil; md = md.next {
+		modules = append(modules, md)
+	}
+	prev, modules := modules[len(modules)-1], modules[:len(modules)-1]
+	for len(modules) > 0 {
+		// Collect types from the previous module into typehash.
+	collect:
+		for _, tl := range prev.typelinks {
+			var t *_type
+			if prev.typemap == nil {
+				t = (*_type)(unsafe.Pointer(prev.types + uintptr(tl)))
+			} else {
+				t = prev.typemap[typeOff(tl)]
+			}
+			// Add to typehash if not seen before.
+			tlist := typehash[t.hash]
+			for _, tcur := range tlist {
+				if tcur == t {
+					continue collect
+				}
+			}
+			typehash[t.hash] = append(tlist, t)
+		}
+
+		// If any of this module's typelinks match a type from a
+		// prior module, prefer that prior type by adding the offset
+		// to this module's typemap.
+		md := modules[len(modules)-1]
+		md.typemap = make(map[typeOff]*_type, len(md.typelinks))
+		for _, tl := range md.typelinks {
+			t := (*_type)(unsafe.Pointer(md.types + uintptr(tl)))
+			for _, candidate := range typehash[t.hash] {
+				if typesEqual(t, candidate) {
+					t = candidate
+					break
+				}
+			}
+			md.typemap[typeOff(tl)] = t
+		}
+
+		prev, modules = md, modules[:len(modules)-1]
+	}
+}
+
+// typesEqual reports whether two types are equal.
+//
+// Everywhere in the runtime and reflect packages, it is assumed that
+// there is exactly one *_type per Go type, so that pointer equality
+// can be used to test if types are equal. There is one place that
+// breaks this assumption: buildmode=shared. In this case a type can
+// appear as two different pieces of memory. This is hidden from the
+// runtime and reflect package by the per-module typemap built in
+// typelinksinit. It uses typesEqual to map types from later modules
+// back into earlier ones.
+//
+// Only typelinksinit needs this function.
+func typesEqual(t, v *_type) bool {
+	if t == v {
+		return true
+	}
+	kind := t.kind & kindMask
+	if kind != v.kind&kindMask {
+		return false
+	}
+	if t._string != v._string {
+		return false
+	}
+	ut := t.uncommon()
+	uv := v.uncommon()
+	if ut != nil || uv != nil {
+		if ut == nil || uv == nil {
+			return false
+		}
+		if !pkgPathEqual(ut.pkgpath, uv.pkgpath) {
+			return false
+		}
+	}
+	if kindBool <= kind && kind <= kindComplex128 {
+		return true
+	}
+	switch kind {
+	case kindString, kindUnsafePointer:
+		return true
+	case kindArray:
+		at := (*arraytype)(unsafe.Pointer(t))
+		av := (*arraytype)(unsafe.Pointer(v))
+		return typesEqual(at.elem, av.elem) && at.len == av.len
+	case kindChan:
+		ct := (*chantype)(unsafe.Pointer(t))
+		cv := (*chantype)(unsafe.Pointer(v))
+		return ct.dir == cv.dir && typesEqual(ct.elem, cv.elem)
+	case kindFunc:
+		ft := (*functype)(unsafe.Pointer(t))
+		fv := (*functype)(unsafe.Pointer(v))
+		if ft.outCount != fv.outCount || ft.inCount != fv.inCount {
+			return false
+		}
+		tin, vin := ft.in(), fv.in()
+		for i := 0; i < len(tin); i++ {
+			if !typesEqual(tin[i], vin[i]) {
+				return false
+			}
+		}
+		tout, vout := ft.out(), fv.out()
+		for i := 0; i < len(tout); i++ {
+			if !typesEqual(tout[i], vout[i]) {
+				return false
+			}
+		}
+		return true
+	case kindInterface:
+		it := (*interfacetype)(unsafe.Pointer(t))
+		iv := (*interfacetype)(unsafe.Pointer(v))
+		if !pkgPathEqual(it.pkgpath, iv.pkgpath) {
+			return false
+		}
+		if len(it.mhdr) != len(iv.mhdr) {
+			return false
+		}
+		for i := range it.mhdr {
+			tm := &it.mhdr[i]
+			vm := &iv.mhdr[i]
+			if tm.name.name() != vm.name.name() {
+				return false
+			}
+			if !pkgPathEqual(tm.name.pkgPath(), vm.name.pkgPath()) {
+				return false
+			}
+			if !typesEqual(tm._type, vm._type) {
+				return false
+			}
+		}
+		return true
+	case kindMap:
+		mt := (*maptype)(unsafe.Pointer(t))
+		mv := (*maptype)(unsafe.Pointer(v))
+		return typesEqual(mt.key, mv.key) && typesEqual(mt.elem, mv.elem)
+	case kindPtr:
+		pt := (*ptrtype)(unsafe.Pointer(t))
+		pv := (*ptrtype)(unsafe.Pointer(v))
+		return typesEqual(pt.elem, pv.elem)
+	case kindSlice:
+		st := (*slicetype)(unsafe.Pointer(t))
+		sv := (*slicetype)(unsafe.Pointer(v))
+		return typesEqual(st.elem, sv.elem)
+	case kindStruct:
+		st := (*structtype)(unsafe.Pointer(t))
+		sv := (*structtype)(unsafe.Pointer(v))
+		if len(st.fields) != len(sv.fields) {
+			return false
+		}
+		for i := range st.fields {
+			tf := &st.fields[i]
+			vf := &sv.fields[i]
+			if tf.name.name() != vf.name.name() {
+				return false
+			}
+			if !pkgPathEqual(tf.name.pkgPath(), vf.name.pkgPath()) {
+				return false
+			}
+			if !typesEqual(tf.typ, vf.typ) {
+				return false
+			}
+			if tf.name.tag() != vf.name.tag() {
+				return false
+			}
+			if tf.offset != vf.offset {
+				return false
+			}
+		}
+		return true
+	default:
+		println("runtime: impossible type kind", kind)
+		throw("runtime: impossible type kind")
+		return false
+	}
+}
+
+func pkgPathEqual(p, q *string) bool {
+	if p == q {
+		return true
+	}
+	if p == nil || q == nil {
+		return false
+	}
+	return *p == *q
+}
-- 
cgit v1.3


From c4807d4cc759025854e354fee99ac20d125f0d79 Mon Sep 17 00:00:00 2001
From: Lynn Boger <laboger@linux.vnet.ibm.com>
Date: Wed, 13 Apr 2016 08:58:10 -0500
Subject: runtime: improve memmove performance ppc64,ppc64le

This change improves the performance of memmove
on ppc64 & ppc64le mainly for moves >=32 bytes.
In addition, the test to detect backward moves
 was enhanced to avoid backward moves if source
and dest were in different types of storage, since
backward moves might not always be efficient.

Fixes #14507

The following shows some of the improvements from the test
in the runtime package:

BenchmarkMemmove32                   4229.56      4717.13      1.12x
BenchmarkMemmove64                   6156.03      7810.42      1.27x
BenchmarkMemmove128                  7521.69      12468.54     1.66x
BenchmarkMemmove256                  6729.90      18260.33     2.71x
BenchmarkMemmove512                  8521.59      18033.81     2.12x
BenchmarkMemmove1024                 9760.92      25762.61     2.64x
BenchmarkMemmove2048                 10241.00     29584.94     2.89x
BenchmarkMemmove4096                 10399.37     31882.31     3.07x

BenchmarkMemmoveUnalignedDst16       1943.69      2258.33      1.16x
BenchmarkMemmoveUnalignedDst32       3885.08      3965.81      1.02x
BenchmarkMemmoveUnalignedDst64       5121.63      6965.54      1.36x
BenchmarkMemmoveUnalignedDst128      7212.34      11372.68     1.58x
BenchmarkMemmoveUnalignedDst256      6564.52      16913.59     2.58x
BenchmarkMemmoveUnalignedDst512      8364.35      17782.57     2.13x
BenchmarkMemmoveUnalignedDst1024     9539.87      24914.72     2.61x
BenchmarkMemmoveUnalignedDst2048     9199.23      21235.11     2.31x
BenchmarkMemmoveUnalignedDst4096     10077.39     25231.99     2.50x

BenchmarkMemmoveUnalignedSrc32       3249.83      3742.52      1.15x
BenchmarkMemmoveUnalignedSrc64       5562.35      6627.96      1.19x
BenchmarkMemmoveUnalignedSrc128      6023.98      10200.84     1.69x
BenchmarkMemmoveUnalignedSrc256      6921.83      15258.43     2.20x
BenchmarkMemmoveUnalignedSrc512      8593.13      16541.97     1.93x
BenchmarkMemmoveUnalignedSrc1024     9730.95      22927.84     2.36x
BenchmarkMemmoveUnalignedSrc2048     9793.28      21537.73     2.20x
BenchmarkMemmoveUnalignedSrc4096     10132.96     26295.06     2.60x

Change-Id: I73af59970d4c97c728deabb9708b31ec7e01bdf2
Reviewed-on: https://go-review.googlesource.com/21990
Reviewed-by: Bill O'Farrell <billotosyr@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/memmove_ppc64x.s | 117 +++++++++++++++++++++++++++----------------
 1 file changed, 74 insertions(+), 43 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s
index ea73b455b4..26dabd9e69 100644
--- a/src/runtime/memmove_ppc64x.s
+++ b/src/runtime/memmove_ppc64x.s
@@ -11,78 +11,109 @@ TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
 	MOVD	to+0(FP), R3
 	MOVD	from+8(FP), R4
 	MOVD	n+16(FP), R5
-	CMP	R5, $0
-	BNE	check
-	RET
 
+	// Determine if there are doublewords to
+	// copy so a more efficient move can be done
 check:
-	ANDCC	$7, R5, R7	// R7 is the number of bytes to copy and CR0[EQ] is set if there are none.
-	SRAD	$3, R5, R6	// R6 is the number of words to copy
-	CMP	R6, $0, CR1	// CR1[EQ] is set if there are no words to copy.
-
-	CMP	R3, R4, CR2
-	BC	12, 9, backward	// I think you should be able to write this as "BGT CR2, backward"
+	ANDCC	$7, R5, R7	// R7: bytes to copy
+	SRAD	$3, R5, R6	// R6: double words to copy
+	CMP	R6, $0, CR1	// CR1[EQ] set if no double words to copy
 
-	// Copying forward proceeds by copying R6 words then copying R7 bytes.
-	// R3 and R4 are advanced as we copy. Because PPC64 lacks post-increment
-	// load/store, R3 and R4 point before the bytes that are to be copied.
+	// Determine overlap by subtracting dest - src and comparing against the
+	// length.  The catches the cases where src and dest are in different types
+	// of storage such as stack and static to avoid doing backward move when not
+	// necessary.
 
-	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
-
-	MOVD	R6, CTR
+	SUB	R4, R3, R8	// dest - src
+	CMPU	R8, R5, CR2	// < len?
+	BC	12, 8, backward // BLT CR2 backward
 
-	SUB	$8, R3
-	SUB	$8, R4
+	// Copying forward if no overlap.
 
-forwardlargeloop:
-	MOVDU	8(R4), R8
-	MOVDU	R8, 8(R3)
-	BC	16, 0, forwardlargeloop // "BDNZ"
-
-	ADD	$8, R3
-	ADD	$8, R4
+	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
+	MOVD	R6,CTR			// R6 = number of double words
+	SRADCC	$2,R6,R8		// 32 byte chunks?
+	BNE	forward32setup		//
+
+	// Move double words
+
+forward8:
+	MOVD    0(R4), R8		// double word
+	ADD     $8,R4
+	MOVD    R8, 0(R3)		//
+	ADD     $8,R3
+	BC      16, 0, forward8
+	BR	noforwardlarge		// handle remainder
+
+	// Prepare for moves of 32 bytes at a time.
+
+forward32setup:
+	DCBTST	(R3)			// prepare data cache
+	DCBT	(R4)
+	MOVD	R8, CTR			// double work count
+
+forward32:
+	MOVD	0(R4), R8		// load 4 double words
+	MOVD	8(R4), R9
+	MOVD	16(R4), R14
+	MOVD	24(R4), R15
+	ADD	$32,R4
+	MOVD	R8, 0(R3)		// store those 4
+	MOVD	R9, 8(R3)
+	MOVD	R14,16(R3)
+	MOVD	R15,24(R3)
+	ADD	$32,R3			// bump up for next set
+	BC	16, 0, forward32	// continue
+	RLDCLCC	$61,R5,$3,R6		// remaining doublewords
+	BEQ	noforwardlarge
+	MOVD	R6,CTR			// set up the CTR
+	BR	forward8
 
 noforwardlarge:
-	BNE	forwardtail	// Tests the bit set by ANDCC above
-	RET
+	CMP	R7,$0			// any remaining bytes
+	BC	4, 1, LR
 
 forwardtail:
-	SUB	$1, R3
-	SUB	$1, R4
-	MOVD	R7, CTR
+	MOVD	R7, CTR			// move tail bytes
 
 forwardtailloop:
-	MOVBZU	1(R4), R8
-	MOVBZU	R8, 1(R3)
+	MOVBZ	0(R4), R8		// move single bytes
+	ADD	$1,R4
+	MOVBZ	R8, 0(R3)
+	ADD	$1,R3
 	BC	16, 0, forwardtailloop
 	RET
 
 backward:
-	// Copying backwards proceeds by copying R7 bytes then copying R6 words.
+	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
 	// R3 and R4 are advanced to the end of the destination/source buffers
 	// respectively and moved back as we copy.
 
-	ADD	R5, R4, R4
-	ADD	R3, R5, R3
+	ADD	R5, R4, R4		// end of source
+	ADD	R3, R5, R3		// end of dest
 
-	BEQ	nobackwardtail
+	BEQ	nobackwardtail		// earlier condition
 
-	MOVD	R7, CTR
+	MOVD	R7, CTR			// bytes to move
 
 backwardtailloop:
-	MOVBZU	-1(R4), R8
-	MOVBZU	R8, -1(R3)
+	MOVBZ 	-1(R4), R8		// point to last byte
+	SUB	$1,R4
+	MOVBZ 	R8, -1(R3)
+	SUB	$1,R3
 	BC	16, 0, backwardtailloop
 
 nobackwardtail:
-	BC	4, 6, backwardlarge		// "BNE CR1"
-	RET
+	CMP	R6,$0
+	BC	4, 5, LR
 
 backwardlarge:
 	MOVD	R6, CTR
 
 backwardlargeloop:
-	MOVDU	-8(R4), R8
-	MOVDU	R8, -8(R3)
-	BC	16, 0, backwardlargeloop	// "BDNZ"
+	MOVD 	-8(R4), R8
+	SUB	$8,R4
+	MOVD 	R8, -8(R3)
+	SUB	$8,R3
+	BC	16, 0, backwardlargeloop	//
 	RET
-- 
cgit v1.3


From d8e8fc292ace5ae59a0da44dfca1dd5b1a71ecf1 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Apr 2016 11:13:39 -0400
Subject: runtime/internal/atomic: remove write barrier from Storep1 on s390x

atomic.Storep1 is not supposed to invoke a write barrier (that's what
atomicstorep is for), but currently does on s390x. This causes a panic
in runtime.mapzero when it tries to use atomic.Storep1 to store what's
actually a scalar.

Fix this by eliminating the write barrier from atomic.Storep1 on
s390x. Also add some documentation to atomicstorep to explain the
difference between these.

Fixes #15270.

Change-Id: I291846732d82f090a218df3ef6351180aff54e81
Reviewed-on: https://go-review.googlesource.com/21993
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Michael Munday <munday@ca.ibm.com>
---
 src/runtime/atomic_pointer.go               | 5 ++---
 src/runtime/internal/atomic/atomic_s390x.go | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index bd21b49945..d54f1d6eef 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -15,10 +15,9 @@ import (
 // escape analysis decisions about the pointer value being stored.
 // Instead, these are wrappers around the actual atomics (casp1 and so on)
 // that use noescape to convey which arguments do not escape.
-//
-// Additionally, these functions must update the shadow heap for
-// write barrier checking.
 
+// atomicstorep performs *ptr = new atomically and invokes a write barrier.
+//
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
 	atomic.Storep1(noescape(ptr), new)
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
index f31f1af444..b6d3d84bdf 100644
--- a/src/runtime/internal/atomic/atomic_s390x.go
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -40,7 +40,7 @@ func Store64(ptr *uint64, val uint64) {
 //go:noinline
 //go:nosplit
 func Storep1(ptr unsafe.Pointer, val unsafe.Pointer) {
-	*(*unsafe.Pointer)(ptr) = val
+	*(*uintptr)(ptr) = uintptr(val)
 }
 
 //go:noescape
-- 
cgit v1.3


From 4721ea6abcde318a2f5d61ec249cde5e9c57ebea Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Apr 2016 11:22:42 -0400
Subject: runtime/internal/atomic: rename Storep1 to StorepNoWB

Make it clear that the point of this function stores a pointer
*without* a write barrier.

sed -i -e 's/Storep1/StorepNoWB/' $(git grep -l Storep1)

Updates #15270.

Change-Id: Ifad7e17815e51a738070655fe3b178afdadaecf6
Reviewed-on: https://go-review.googlesource.com/21994
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
---
 src/runtime/atomic_pointer.go                 | 4 ++--
 src/runtime/hashmap.go                        | 4 ++--
 src/runtime/internal/atomic/asm_386.s         | 2 +-
 src/runtime/internal/atomic/asm_amd64.s       | 2 +-
 src/runtime/internal/atomic/asm_amd64p32.s    | 2 +-
 src/runtime/internal/atomic/asm_mips64x.s     | 2 +-
 src/runtime/internal/atomic/asm_ppc64x.s      | 2 +-
 src/runtime/internal/atomic/atomic_386.go     | 2 +-
 src/runtime/internal/atomic/atomic_amd64x.go  | 5 ++++-
 src/runtime/internal/atomic/atomic_arm.go     | 2 +-
 src/runtime/internal/atomic/atomic_arm64.go   | 2 +-
 src/runtime/internal/atomic/atomic_arm64.s    | 2 +-
 src/runtime/internal/atomic/atomic_mips64x.go | 2 +-
 src/runtime/internal/atomic/atomic_ppc64x.go  | 2 +-
 src/runtime/internal/atomic/atomic_s390x.go   | 2 +-
 15 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index d54f1d6eef..e9f5d03b2b 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -20,7 +20,7 @@ import (
 //
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
-	atomic.Storep1(noescape(ptr), new)
+	atomic.StorepNoWB(noescape(ptr), new)
 	writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
 }
 
@@ -44,7 +44,7 @@ func sync_atomic_StoreUintptr(ptr *uintptr, new uintptr)
 //go:nosplit
 func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
 	sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
-	atomic.Storep1(noescape(unsafe.Pointer(ptr)), new)
+	atomic.StorepNoWB(noescape(unsafe.Pointer(ptr)), new)
 	writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 }
 
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index d549ce4194..4f5d03d983 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -1075,8 +1075,8 @@ func mapzero(t *_type) {
 				throw("map element too large")
 			}
 		}
-		atomic.Storep1(unsafe.Pointer(&zeroptr), persistentalloc(cursize, 64, &memstats.other_sys))
-		atomic.Storep1(unsafe.Pointer(&zerosize), unsafe.Pointer(zerosize))
+		atomic.StorepNoWB(unsafe.Pointer(&zeroptr), persistentalloc(cursize, 64, &memstats.other_sys))
+		atomic.StorepNoWB(unsafe.Pointer(&zerosize), unsafe.Pointer(zerosize))
 	}
 	unlock(&zerolock)
 }
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index ce84fd83d1..ebecd0b4cb 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -102,7 +102,7 @@ TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
 	JMP	runtime∕internal∕atomic·Xchg(SB)
 
 
-TEXT runtime∕internal∕atomic·Storep1(SB), NOSPLIT, $0-8
+TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
 	MOVL	ptr+0(FP), BX
 	MOVL	val+4(FP), AX
 	XCHGL	AX, 0(BX)
diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s
index 7463fec4a1..94d4ac2698 100644
--- a/src/runtime/internal/atomic/asm_amd64.s
+++ b/src/runtime/internal/atomic/asm_amd64.s
@@ -115,7 +115,7 @@ TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
 TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-24
 	JMP	runtime∕internal∕atomic·Xchg64(SB)
 
-TEXT runtime∕internal∕atomic·Storep1(SB), NOSPLIT, $0-16
+TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16
 	MOVQ	ptr+0(FP), BX
 	MOVQ	val+8(FP), AX
 	XCHGQ	AX, 0(BX)
diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s
index f1e2c3aca6..74c79d08fd 100644
--- a/src/runtime/internal/atomic/asm_amd64p32.s
+++ b/src/runtime/internal/atomic/asm_amd64p32.s
@@ -115,7 +115,7 @@ TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
 TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
 	JMP	runtime∕internal∕atomic·Xchg(SB)
 
-TEXT runtime∕internal∕atomic·Storep1(SB), NOSPLIT, $0-8
+TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
 	MOVL	ptr+0(FP), BX
 	MOVL	val+4(FP), AX
 	XCHGL	AX, 0(BX)
diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s
index a454f284ab..d0f5c7bdd3 100644
--- a/src/runtime/internal/atomic/asm_mips64x.s
+++ b/src/runtime/internal/atomic/asm_mips64x.s
@@ -155,7 +155,7 @@ TEXT ·Xchg64(SB), NOSPLIT, $0-24
 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
 	JMP	·Xchg64(SB)
 
-TEXT ·Storep1(SB), NOSPLIT, $0-16
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
 	JMP	·Store64(SB)
 
 TEXT ·Store(SB), NOSPLIT, $0-12
diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s
index 45a48b6203..4a776787a2 100644
--- a/src/runtime/internal/atomic/asm_ppc64x.s
+++ b/src/runtime/internal/atomic/asm_ppc64x.s
@@ -150,7 +150,7 @@ TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-24
 	BR	runtime∕internal∕atomic·Xchg64(SB)
 
 
-TEXT runtime∕internal∕atomic·Storep1(SB), NOSPLIT, $0-16
+TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16
 	BR	runtime∕internal∕atomic·Store64(SB)
 
 TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index f4c50b0be1..23a8479515 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -73,4 +73,4 @@ func Store(ptr *uint32, val uint32)
 func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer)
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_amd64x.go b/src/runtime/internal/atomic/atomic_amd64x.go
index bd40fb3ea2..54851d30f4 100644
--- a/src/runtime/internal/atomic/atomic_amd64x.go
+++ b/src/runtime/internal/atomic/atomic_amd64x.go
@@ -61,5 +61,8 @@ func Store(ptr *uint32, val uint32)
 //go:noescape
 func Store64(ptr *uint64, val uint64)
 
+// StorepNoWB performs *ptr = val atomically and without a write
+// barrier.
+//
 // NO go:noescape annotation; see atomic_pointer.go.
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer)
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go
index c361aef382..244237df4d 100644
--- a/src/runtime/internal/atomic/atomic_arm.go
+++ b/src/runtime/internal/atomic/atomic_arm.go
@@ -85,7 +85,7 @@ func Loadp(addr unsafe.Pointer) unsafe.Pointer {
 }
 
 //go:nosplit
-func Storep1(addr unsafe.Pointer, v unsafe.Pointer) {
+func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer) {
 	for {
 		old := *(*unsafe.Pointer)(addr)
 		if Casp1((*unsafe.Pointer)(addr), old, v) {
diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go
index 6b32346656..dc82c3396d 100644
--- a/src/runtime/internal/atomic/atomic_arm64.go
+++ b/src/runtime/internal/atomic/atomic_arm64.go
@@ -77,4 +77,4 @@ func Store(ptr *uint32, val uint32)
 func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer)
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s
index 7b1b0efaf6..eb32f378aa 100644
--- a/src/runtime/internal/atomic/atomic_arm64.s
+++ b/src/runtime/internal/atomic/atomic_arm64.s
@@ -25,7 +25,7 @@ TEXT ·Loadp(SB),NOSPLIT,$-8-16
 	MOVD	R0, ret+8(FP)
 	RET
 
-TEXT runtime∕internal∕atomic·Storep1(SB), NOSPLIT, $0-16
+TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16
 	B	runtime∕internal∕atomic·Store64(SB)
 
 TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go
index 8094db58a0..d06ea4809a 100644
--- a/src/runtime/internal/atomic/atomic_mips64x.go
+++ b/src/runtime/internal/atomic/atomic_mips64x.go
@@ -53,4 +53,4 @@ func Store(ptr *uint32, val uint32)
 func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer)
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go
index bf82b82643..72c98eb0c5 100644
--- a/src/runtime/internal/atomic/atomic_ppc64x.go
+++ b/src/runtime/internal/atomic/atomic_ppc64x.go
@@ -53,4 +53,4 @@ func Store(ptr *uint32, val uint32)
 func Store64(ptr *uint64, val uint64)
 
 // NO go:noescape annotation; see atomic_pointer.go.
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer)
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
index b6d3d84bdf..9343853485 100644
--- a/src/runtime/internal/atomic/atomic_s390x.go
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -39,7 +39,7 @@ func Store64(ptr *uint64, val uint64) {
 // NO go:noescape annotation; see atomic_pointer.go.
 //go:noinline
 //go:nosplit
-func Storep1(ptr unsafe.Pointer, val unsafe.Pointer) {
+func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
 	*(*uintptr)(ptr) = uintptr(val)
 }
 
-- 
cgit v1.3


From d9712aa82af7192469d75802c6dc1734ea9858b2 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 13 Apr 2016 11:31:24 -0700
Subject: runtime: merge the darwin os*.go files together

Merge them together into os1_darwin.go. A future CL will rename it.

Change-Id: Ia4380d3296ebd5ce210908ce3582ff184566f692
Reviewed-on: https://go-review.googlesource.com/22004
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/os1_darwin.go | 50 ++++++++++++++++++++++++++++++++++++++++++++---
 src/runtime/os2_darwin.go | 14 -------------
 src/runtime/os_darwin.go  | 42 ---------------------------------------
 3 files changed, 47 insertions(+), 59 deletions(-)
 delete mode 100644 src/runtime/os2_darwin.go
 delete mode 100644 src/runtime/os_darwin.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_darwin.go b/src/runtime/os1_darwin.go
index 01dc90f97c..a0e3d8ed6b 100644
--- a/src/runtime/os1_darwin.go
+++ b/src/runtime/os1_darwin.go
@@ -6,11 +6,23 @@ package runtime
 
 import "unsafe"
 
-//extern SigTabTT runtime·sigtab[];
+type mOS struct {
+	machport uint32 // return address for mach ipc
+	waitsema uint32 // semaphore for parking on locks
+}
 
-type sigset uint32
+func bsdthread_create(stk, arg unsafe.Pointer, fn uintptr) int32
+func bsdthread_register() int32
 
-var sigset_all = ^sigset(0)
+//go:noescape
+func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
+
+func mach_reply_port() uint32
+func mach_task_self() uint32
+func mach_thread_self() uint32
+
+//go:noescape
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
 
 func unimplemented(name string) {
 	println(name, "not implemented")
@@ -473,6 +485,38 @@ func memlimit() uintptr {
 	return 0
 }
 
+const (
+	_NSIG        = 32
+	_SI_USER     = 0 /* empirically true, but not what headers say */
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_SS_DISABLE  = 4
+)
+
+//go:noescape
+func sigprocmask(how uint32, new, old *sigset)
+
+//go:noescape
+func sigaction(mode uint32, new *sigactiont, old *usigactiont)
+
+//go:noescape
+func sigaltstack(new, old *stackt)
+
+func sigtramp()
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+func raise(sig int32)
+func raiseproc(int32)
+
+//extern SigTabTT runtime·sigtab[];
+
+type sigset uint32
+
+var sigset_all = ^sigset(0)
+
 //go:nosplit
 //go:nowritebarrierrec
 func setsig(i int32, fn uintptr, restart bool) {
diff --git a/src/runtime/os2_darwin.go b/src/runtime/os2_darwin.go
deleted file mode 100644
index 542bd74219..0000000000
--- a/src/runtime/os2_darwin.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	_NSIG        = 32
-	_SI_USER     = 0 /* empirically true, but not what headers say */
-	_SIG_BLOCK   = 1
-	_SIG_UNBLOCK = 2
-	_SIG_SETMASK = 3
-	_SS_DISABLE  = 4
-)
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
deleted file mode 100644
index e9b8933fb9..0000000000
--- a/src/runtime/os_darwin.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type mOS struct {
-	machport uint32 // return address for mach ipc
-	waitsema uint32 // semaphore for parking on locks
-}
-
-func bsdthread_create(stk, arg unsafe.Pointer, fn uintptr) int32
-func bsdthread_register() int32
-
-//go:noescape
-func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
-
-func mach_reply_port() uint32
-func mach_task_self() uint32
-func mach_thread_self() uint32
-
-//go:noescape
-func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-
-//go:noescape
-func sigprocmask(how uint32, new, old *sigset)
-
-//go:noescape
-func sigaction(mode uint32, new *sigactiont, old *usigactiont)
-
-//go:noescape
-func sigaltstack(new, old *stackt)
-
-func sigtramp()
-
-//go:noescape
-func setitimer(mode int32, new, old *itimerval)
-
-func raise(sig int32)
-func raiseproc(int32)
-- 
cgit v1.3


From 73e2ad20220050f88b1ea79bf5a2e4c4fbee0533 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 13 Apr 2016 11:33:42 -0700
Subject: runtime: rename os1_darwin.go to os_darwin.go

Change-Id: If0e0bc5a85101db1e70faaab168fc2d12024eb93
Reviewed-on: https://go-review.googlesource.com/22005
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/os1_darwin.go | 582 ----------------------------------------------
 src/runtime/os_darwin.go  | 582 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 582 insertions(+), 582 deletions(-)
 delete mode 100644 src/runtime/os1_darwin.go
 create mode 100644 src/runtime/os_darwin.go

(limited to 'src/runtime')

diff --git a/src/runtime/os1_darwin.go b/src/runtime/os1_darwin.go
deleted file mode 100644
index a0e3d8ed6b..0000000000
--- a/src/runtime/os1_darwin.go
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-type mOS struct {
-	machport uint32 // return address for mach ipc
-	waitsema uint32 // semaphore for parking on locks
-}
-
-func bsdthread_create(stk, arg unsafe.Pointer, fn uintptr) int32
-func bsdthread_register() int32
-
-//go:noescape
-func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
-
-func mach_reply_port() uint32
-func mach_task_self() uint32
-func mach_thread_self() uint32
-
-//go:noescape
-func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-
-func unimplemented(name string) {
-	println(name, "not implemented")
-	*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
-}
-
-//go:nosplit
-func semawakeup(mp *m) {
-	mach_semrelease(mp.waitsema)
-}
-
-//go:nosplit
-func semacreate(mp *m) {
-	if mp.waitsema != 0 {
-		return
-	}
-	systemstack(func() {
-		mp.waitsema = mach_semcreate()
-	})
-}
-
-// BSD interface for threading.
-func osinit() {
-	// bsdthread_register delayed until end of goenvs so that we
-	// can look at the environment first.
-
-	ncpu = getncpu()
-}
-
-func getncpu() int32 {
-	// Use sysctl to fetch hw.ncpu.
-	mib := [2]uint32{6, 3}
-	out := uint32(0)
-	nout := unsafe.Sizeof(out)
-	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
-	if ret >= 0 && int32(out) > 0 {
-		return int32(out)
-	}
-	return 1
-}
-
-var urandom_dev = []byte("/dev/urandom\x00")
-
-//go:nosplit
-func getRandomData(r []byte) {
-	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
-	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
-	closefd(fd)
-	extendRandom(r, int(n))
-}
-
-func goenvs() {
-	goenvs_unix()
-
-	// Register our thread-creation callback (see sys_darwin_{amd64,386}.s)
-	// but only if we're not using cgo. If we are using cgo we need
-	// to let the C pthread library install its own thread-creation callback.
-	if !iscgo {
-		if bsdthread_register() != 0 {
-			if gogetenv("DYLD_INSERT_LIBRARIES") != "" {
-				throw("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)")
-			}
-			throw("runtime: bsdthread_register error")
-		}
-	}
-}
-
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
-func newosproc(mp *m, stk unsafe.Pointer) {
-	if false {
-		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, " ostk=", &mp, "\n")
-	}
-
-	var oset sigset
-	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
-	errno := bsdthread_create(stk, unsafe.Pointer(mp), funcPC(mstart))
-	sigprocmask(_SIG_SETMASK, &oset, nil)
-
-	if errno < 0 {
-		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -errno, ")\n")
-		throw("runtime.newosproc")
-	}
-}
-
-// newosproc0 is a version of newosproc that can be called before the runtime
-// is initialized.
-//
-// As Go uses bsdthread_register when running without cgo, this function is
-// not safe to use after initialization as it does not pass an M as fnarg.
-//
-//go:nosplit
-func newosproc0(stacksize uintptr, fn unsafe.Pointer, fnarg uintptr) {
-	stack := sysAlloc(stacksize, &memstats.stacks_sys)
-	if stack == nil {
-		write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
-		exit(1)
-	}
-	stk := unsafe.Pointer(uintptr(stack) + stacksize)
-
-	var oset sigset
-	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
-	errno := bsdthread_create(stk, fn, fnarg)
-	sigprocmask(_SIG_SETMASK, &oset, nil)
-
-	if errno < 0 {
-		write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
-		exit(1)
-	}
-}
-
-var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
-var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
-
-// Called to do synchronous initialization of Go code built with
-// -buildmode=c-archive or -buildmode=c-shared.
-// None of the Go runtime is initialized.
-//go:nosplit
-//go:nowritebarrierrec
-func libpreinit() {
-	initsig(true)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-func mpreinit(mp *m) {
-	mp.gsignal = malg(32 * 1024) // OS X wants >= 8K
-	mp.gsignal.m = mp
-}
-
-//go:nosplit
-func msigsave(mp *m) {
-	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-	sigprocmask(_SIG_SETMASK, &sigmask, nil)
-}
-
-//go:nosplit
-func sigblock() {
-	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, cannot allocate memory.
-func minit() {
-	// Initialize signal handling.
-	_g_ := getg()
-
-	// The alternate signal stack is buggy on arm and arm64.
-	// The signal handler handles it directly.
-	// The sigaltstack assembly function does nothing.
-	if GOARCH != "arm" && GOARCH != "arm64" {
-		var st stackt
-		sigaltstack(nil, &st)
-		if st.ss_flags&_SS_DISABLE != 0 {
-			signalstack(&_g_.m.gsignal.stack)
-			_g_.m.newSigstack = true
-		} else {
-			// Use existing signal stack.
-			stsp := uintptr(unsafe.Pointer(st.ss_sp))
-			_g_.m.gsignal.stack.lo = stsp
-			_g_.m.gsignal.stack.hi = stsp + st.ss_size
-			_g_.m.gsignal.stackguard0 = stsp + _StackGuard
-			_g_.m.gsignal.stackguard1 = stsp + _StackGuard
-			_g_.m.gsignal.stackAlloc = st.ss_size
-			_g_.m.newSigstack = false
-		}
-	}
-
-	// restore signal mask from m.sigmask and unblock essential signals
-	nmask := _g_.m.sigmask
-	for i := range sigtable {
-		if sigtable[i].flags&_SigUnblock != 0 {
-			nmask &^= 1 << (uint32(i) - 1)
-		}
-	}
-	sigprocmask(_SIG_SETMASK, &nmask, nil)
-}
-
-// Called from dropm to undo the effect of an minit.
-//go:nosplit
-func unminit() {
-	if getg().m.newSigstack {
-		signalstack(nil)
-	}
-}
-
-// Mach IPC, to get at semaphores
-// Definitions are in /usr/include/mach on a Mac.
-
-func macherror(r int32, fn string) {
-	print("mach error ", fn, ": ", r, "\n")
-	throw("mach error")
-}
-
-const _DebugMach = false
-
-var zerondr machndr
-
-func mach_msgh_bits(a, b uint32) uint32 {
-	return a | b<<8
-}
-
-func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 {
-	// TODO: Loop on interrupt.
-	return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify)
-}
-
-// Mach RPC (MIG)
-const (
-	_MinMachMsg = 48
-	_MachReply  = 100
-)
-
-type codemsg struct {
-	h    machheader
-	ndr  machndr
-	code int32
-}
-
-func machcall(h *machheader, maxsize int32, rxsize int32) int32 {
-	_g_ := getg()
-	port := _g_.m.machport
-	if port == 0 {
-		port = mach_reply_port()
-		_g_.m.machport = port
-	}
-
-	h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE)
-	h.msgh_local_port = port
-	h.msgh_reserved = 0
-	id := h.msgh_id
-
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		print("send:\t")
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0)
-	if ret != 0 {
-		if _DebugMach {
-			print("mach_msg error ", ret, "\n")
-		}
-		return ret
-	}
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	if h.msgh_id != id+_MachReply {
-		if _DebugMach {
-			print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n")
-		}
-		return -303 // MIG_REPLY_MISMATCH
-	}
-	// Look for a response giving the return value.
-	// Any call can send this back with an error,
-	// and some calls only have return values so they
-	// send it back on success too. I don't quite see how
-	// you know it's one of these and not the full response
-	// format, so just look if the message is right.
-	c := (*codemsg)(unsafe.Pointer(h))
-	if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 {
-		if _DebugMach {
-			print("mig result ", c.code, "\n")
-		}
-		return c.code
-	}
-	if h.msgh_size != uint32(rxsize) {
-		if _DebugMach {
-			print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n")
-		}
-		return -307 // MIG_ARRAY_TOO_LARGE
-	}
-	return 0
-}
-
-// Semaphores!
-
-const (
-	tmach_semcreate = 3418
-	rmach_semcreate = tmach_semcreate + _MachReply
-
-	tmach_semdestroy = 3419
-	rmach_semdestroy = tmach_semdestroy + _MachReply
-
-	_KERN_ABORTED             = 14
-	_KERN_OPERATION_TIMED_OUT = 49
-)
-
-type tmach_semcreatemsg struct {
-	h      machheader
-	ndr    machndr
-	policy int32
-	value  int32
-}
-
-type rmach_semcreatemsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-type tmach_semdestroymsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-func mach_semcreate() uint32 {
-	var m [256]uint8
-	tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m))
-	rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = 0
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semcreate
-	tx.ndr = zerondr
-
-	tx.policy = 0 // 0 = SYNC_POLICY_FIFO
-	tx.value = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx)))
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_create")
-	}
-	if rx.body.msgh_descriptor_count != 1 {
-		unimplemented("mach_semcreate desc count")
-	}
-	return rx.semaphore.name
-}
-
-func mach_semdestroy(sem uint32) {
-	var m [256]uint8
-	tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semdestroy
-	tx.body.msgh_descriptor_count = 1
-	tx.semaphore.name = sem
-	tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND
-	tx.semaphore._type = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_destroy")
-	}
-}
-
-// The other calls have simple system call traps in sys_darwin_{amd64,386}.s
-
-func mach_semaphore_wait(sema uint32) int32
-func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
-func mach_semaphore_signal(sema uint32) int32
-func mach_semaphore_signal_all(sema uint32) int32
-
-func semasleep1(ns int64) int32 {
-	_g_ := getg()
-
-	if ns >= 0 {
-		var nsecs int32
-		secs := timediv(ns, 1000000000, &nsecs)
-		r := mach_semaphore_timedwait(_g_.m.waitsema, uint32(secs), uint32(nsecs))
-		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT {
-			return -1
-		}
-		if r != 0 {
-			macherror(r, "semaphore_wait")
-		}
-		return 0
-	}
-
-	for {
-		r := mach_semaphore_wait(_g_.m.waitsema)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_wait")
-	}
-	return 0
-}
-
-//go:nosplit
-func semasleep(ns int64) int32 {
-	var r int32
-	systemstack(func() {
-		r = semasleep1(ns)
-	})
-	return r
-}
-
-//go:nosplit
-func mach_semrelease(sem uint32) {
-	for {
-		r := mach_semaphore_signal(sem)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-
-		// mach_semrelease must be completely nosplit,
-		// because it is called from Go code.
-		// If we're going to die, start that process on the system stack
-		// to avoid a Go stack split.
-		systemstack(func() { macherror(r, "semaphore_signal") })
-	}
-}
-
-//go:nosplit
-func osyield() {
-	usleep(1)
-}
-
-func memlimit() uintptr {
-	// NOTE(rsc): Could use getrlimit here,
-	// like on FreeBSD or Linux, but Darwin doesn't enforce
-	// ulimit -v, so it's unclear why we'd try to stay within
-	// the limit.
-	return 0
-}
-
-const (
-	_NSIG        = 32
-	_SI_USER     = 0 /* empirically true, but not what headers say */
-	_SIG_BLOCK   = 1
-	_SIG_UNBLOCK = 2
-	_SIG_SETMASK = 3
-	_SS_DISABLE  = 4
-)
-
-//go:noescape
-func sigprocmask(how uint32, new, old *sigset)
-
-//go:noescape
-func sigaction(mode uint32, new *sigactiont, old *usigactiont)
-
-//go:noescape
-func sigaltstack(new, old *stackt)
-
-func sigtramp()
-
-//go:noescape
-func setitimer(mode int32, new, old *itimerval)
-
-func raise(sig int32)
-func raiseproc(int32)
-
-//extern SigTabTT runtime·sigtab[];
-
-type sigset uint32
-
-var sigset_all = ^sigset(0)
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsig(i int32, fn uintptr, restart bool) {
-	var sa sigactiont
-	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
-	if restart {
-		sa.sa_flags |= _SA_RESTART
-	}
-	sa.sa_mask = ^uint32(0)
-	sa.sa_tramp = unsafe.Pointer(funcPC(sigtramp)) // runtime·sigtramp's job is to call into real handler
-	*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = fn
-	sigaction(uint32(i), &sa, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func setsigstack(i int32) {
-	var osa usigactiont
-	sigaction(uint32(i), nil, &osa)
-	handler := *(*uintptr)(unsafe.Pointer(&osa.__sigaction_u))
-	if handler == 0 || handler == _SIG_DFL || handler == _SIG_IGN || osa.sa_flags&_SA_ONSTACK != 0 {
-		return
-	}
-	var sa sigactiont
-	*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = handler
-	sa.sa_tramp = unsafe.Pointer(funcPC(sigtramp))
-	sa.sa_mask = osa.sa_mask
-	sa.sa_flags = osa.sa_flags | _SA_ONSTACK
-	sigaction(uint32(i), &sa, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func getsig(i int32) uintptr {
-	var sa usigactiont
-	sigaction(uint32(i), nil, &sa)
-	return *(*uintptr)(unsafe.Pointer(&sa.__sigaction_u))
-}
-
-//go:nosplit
-func signalstack(s *stack) {
-	var st stackt
-	if s == nil {
-		st.ss_flags = _SS_DISABLE
-	} else {
-		st.ss_sp = (*byte)(unsafe.Pointer(s.lo))
-		st.ss_size = s.hi - s.lo
-		st.ss_flags = 0
-	}
-	sigaltstack(&st, nil)
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func updatesigmask(m sigmask) {
-	s := sigset(m[0])
-	sigprocmask(_SIG_SETMASK, &s, nil)
-}
-
-func unblocksig(sig int32) {
-	mask := sigset(1) << (uint32(sig) - 1)
-	sigprocmask(_SIG_UNBLOCK, &mask, nil)
-}
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
new file mode 100644
index 0000000000..a0e3d8ed6b
--- /dev/null
+++ b/src/runtime/os_darwin.go
@@ -0,0 +1,582 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type mOS struct {
+	machport uint32 // return address for mach ipc
+	waitsema uint32 // semaphore for parking on locks
+}
+
+func bsdthread_create(stk, arg unsafe.Pointer, fn uintptr) int32
+func bsdthread_register() int32
+
+//go:noescape
+func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
+
+func mach_reply_port() uint32
+func mach_task_self() uint32
+func mach_thread_self() uint32
+
+//go:noescape
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+
+func unimplemented(name string) {
+	println(name, "not implemented")
+	*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	mach_semrelease(mp.waitsema)
+}
+
+//go:nosplit
+func semacreate(mp *m) {
+	if mp.waitsema != 0 {
+		return
+	}
+	systemstack(func() {
+		mp.waitsema = mach_semcreate()
+	})
+}
+
+// BSD interface for threading.
+func osinit() {
+	// bsdthread_register delayed until end of goenvs so that we
+	// can look at the environment first.
+
+	ncpu = getncpu()
+}
+
+func getncpu() int32 {
+	// Use sysctl to fetch hw.ncpu.
+	mib := [2]uint32{6, 3}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 && int32(out) > 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+//go:nosplit
+func getRandomData(r []byte) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+	closefd(fd)
+	extendRandom(r, int(n))
+}
+
+func goenvs() {
+	goenvs_unix()
+
+	// Register our thread-creation callback (see sys_darwin_{amd64,386}.s)
+	// but only if we're not using cgo. If we are using cgo we need
+	// to let the C pthread library install its own thread-creation callback.
+	if !iscgo {
+		if bsdthread_register() != 0 {
+			if gogetenv("DYLD_INSERT_LIBRARIES") != "" {
+				throw("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)")
+			}
+			throw("runtime: bsdthread_register error")
+		}
+	}
+}
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, " ostk=", &mp, "\n")
+	}
+
+	var oset sigset
+	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
+	errno := bsdthread_create(stk, unsafe.Pointer(mp), funcPC(mstart))
+	sigprocmask(_SIG_SETMASK, &oset, nil)
+
+	if errno < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -errno, ")\n")
+		throw("runtime.newosproc")
+	}
+}
+
+// newosproc0 is a version of newosproc that can be called before the runtime
+// is initialized.
+//
+// As Go uses bsdthread_register when running without cgo, this function is
+// not safe to use after initialization as it does not pass an M as fnarg.
+//
+//go:nosplit
+func newosproc0(stacksize uintptr, fn unsafe.Pointer, fnarg uintptr) {
+	stack := sysAlloc(stacksize, &memstats.stacks_sys)
+	if stack == nil {
+		write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
+		exit(1)
+	}
+	stk := unsafe.Pointer(uintptr(stack) + stacksize)
+
+	var oset sigset
+	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
+	errno := bsdthread_create(stk, fn, fnarg)
+	sigprocmask(_SIG_SETMASK, &oset, nil)
+
+	if errno < 0 {
+		write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
+		exit(1)
+	}
+}
+
+var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
+var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
+
+// Called to do synchronous initialization of Go code built with
+// -buildmode=c-archive or -buildmode=c-shared.
+// None of the Go runtime is initialized.
+//go:nosplit
+//go:nowritebarrierrec
+func libpreinit() {
+	initsig(true)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024) // OS X wants >= 8K
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+	sigprocmask(_SIG_SETMASK, &sigmask, nil)
+}
+
+//go:nosplit
+func sigblock() {
+	sigprocmask(_SIG_SETMASK, &sigset_all, nil)
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	// Initialize signal handling.
+	_g_ := getg()
+
+	// The alternate signal stack is buggy on arm and arm64.
+	// The signal handler handles it directly.
+	// The sigaltstack assembly function does nothing.
+	if GOARCH != "arm" && GOARCH != "arm64" {
+		var st stackt
+		sigaltstack(nil, &st)
+		if st.ss_flags&_SS_DISABLE != 0 {
+			signalstack(&_g_.m.gsignal.stack)
+			_g_.m.newSigstack = true
+		} else {
+			// Use existing signal stack.
+			stsp := uintptr(unsafe.Pointer(st.ss_sp))
+			_g_.m.gsignal.stack.lo = stsp
+			_g_.m.gsignal.stack.hi = stsp + st.ss_size
+			_g_.m.gsignal.stackguard0 = stsp + _StackGuard
+			_g_.m.gsignal.stackguard1 = stsp + _StackGuard
+			_g_.m.gsignal.stackAlloc = st.ss_size
+			_g_.m.newSigstack = false
+		}
+	}
+
+	// restore signal mask from m.sigmask and unblock essential signals
+	nmask := _g_.m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			nmask &^= 1 << (uint32(i) - 1)
+		}
+	}
+	sigprocmask(_SIG_SETMASK, &nmask, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	if getg().m.newSigstack {
+		signalstack(nil)
+	}
+}
+
+// Mach IPC, to get at semaphores
+// Definitions are in /usr/include/mach on a Mac.
+
+func macherror(r int32, fn string) {
+	print("mach error ", fn, ": ", r, "\n")
+	throw("mach error")
+}
+
+const _DebugMach = false
+
+var zerondr machndr
+
+func mach_msgh_bits(a, b uint32) uint32 {
+	return a | b<<8
+}
+
+func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 {
+	// TODO: Loop on interrupt.
+	return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify)
+}
+
+// Mach RPC (MIG)
+const (
+	_MinMachMsg = 48
+	_MachReply  = 100
+)
+
+type codemsg struct {
+	h    machheader
+	ndr  machndr
+	code int32
+}
+
+func machcall(h *machheader, maxsize int32, rxsize int32) int32 {
+	_g_ := getg()
+	port := _g_.m.machport
+	if port == 0 {
+		port = mach_reply_port()
+		_g_.m.machport = port
+	}
+
+	h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE)
+	h.msgh_local_port = port
+	h.msgh_reserved = 0
+	id := h.msgh_id
+
+	if _DebugMach {
+		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
+		print("send:\t")
+		var i uint32
+		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
+			print(" ", p[i])
+			if i%8 == 7 {
+				print("\n\t")
+			}
+		}
+		if i%8 != 0 {
+			print("\n")
+		}
+	}
+	ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0)
+	if ret != 0 {
+		if _DebugMach {
+			print("mach_msg error ", ret, "\n")
+		}
+		return ret
+	}
+	if _DebugMach {
+		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
+		var i uint32
+		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
+			print(" ", p[i])
+			if i%8 == 7 {
+				print("\n\t")
+			}
+		}
+		if i%8 != 0 {
+			print("\n")
+		}
+	}
+	if h.msgh_id != id+_MachReply {
+		if _DebugMach {
+			print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n")
+		}
+		return -303 // MIG_REPLY_MISMATCH
+	}
+	// Look for a response giving the return value.
+	// Any call can send this back with an error,
+	// and some calls only have return values so they
+	// send it back on success too. I don't quite see how
+	// you know it's one of these and not the full response
+	// format, so just look if the message is right.
+	c := (*codemsg)(unsafe.Pointer(h))
+	if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 {
+		if _DebugMach {
+			print("mig result ", c.code, "\n")
+		}
+		return c.code
+	}
+	if h.msgh_size != uint32(rxsize) {
+		if _DebugMach {
+			print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n")
+		}
+		return -307 // MIG_ARRAY_TOO_LARGE
+	}
+	return 0
+}
+
+// Semaphores!
+
+const (
+	tmach_semcreate = 3418
+	rmach_semcreate = tmach_semcreate + _MachReply
+
+	tmach_semdestroy = 3419
+	rmach_semdestroy = tmach_semdestroy + _MachReply
+
+	_KERN_ABORTED             = 14
+	_KERN_OPERATION_TIMED_OUT = 49
+)
+
+type tmach_semcreatemsg struct {
+	h      machheader
+	ndr    machndr
+	policy int32
+	value  int32
+}
+
+type rmach_semcreatemsg struct {
+	h         machheader
+	body      machbody
+	semaphore machport
+}
+
+type tmach_semdestroymsg struct {
+	h         machheader
+	body      machbody
+	semaphore machport
+}
+
+func mach_semcreate() uint32 {
+	var m [256]uint8
+	tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m))
+	rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m))
+
+	tx.h.msgh_bits = 0
+	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
+	tx.h.msgh_remote_port = mach_task_self()
+	tx.h.msgh_id = tmach_semcreate
+	tx.ndr = zerondr
+
+	tx.policy = 0 // 0 = SYNC_POLICY_FIFO
+	tx.value = 0
+
+	for {
+		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx)))
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_create")
+	}
+	if rx.body.msgh_descriptor_count != 1 {
+		unimplemented("mach_semcreate desc count")
+	}
+	return rx.semaphore.name
+}
+
+func mach_semdestroy(sem uint32) {
+	var m [256]uint8
+	tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m))
+
+	tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX
+	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
+	tx.h.msgh_remote_port = mach_task_self()
+	tx.h.msgh_id = tmach_semdestroy
+	tx.body.msgh_descriptor_count = 1
+	tx.semaphore.name = sem
+	tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND
+	tx.semaphore._type = 0
+
+	for {
+		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0)
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_destroy")
+	}
+}
+
+// The other calls have simple system call traps in sys_darwin_{amd64,386}.s
+
+func mach_semaphore_wait(sema uint32) int32
+func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
+func mach_semaphore_signal(sema uint32) int32
+func mach_semaphore_signal_all(sema uint32) int32
+
+func semasleep1(ns int64) int32 {
+	_g_ := getg()
+
+	if ns >= 0 {
+		var nsecs int32
+		secs := timediv(ns, 1000000000, &nsecs)
+		r := mach_semaphore_timedwait(_g_.m.waitsema, uint32(secs), uint32(nsecs))
+		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT {
+			return -1
+		}
+		if r != 0 {
+			macherror(r, "semaphore_wait")
+		}
+		return 0
+	}
+
+	for {
+		r := mach_semaphore_wait(_g_.m.waitsema)
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_wait")
+	}
+	return 0
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	var r int32
+	systemstack(func() {
+		r = semasleep1(ns)
+	})
+	return r
+}
+
+//go:nosplit
+func mach_semrelease(sem uint32) {
+	for {
+		r := mach_semaphore_signal(sem)
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+
+		// mach_semrelease must be completely nosplit,
+		// because it is called from Go code.
+		// If we're going to die, start that process on the system stack
+		// to avoid a Go stack split.
+		systemstack(func() { macherror(r, "semaphore_signal") })
+	}
+}
+
+//go:nosplit
+func osyield() {
+	usleep(1)
+}
+
+func memlimit() uintptr {
+	// NOTE(rsc): Could use getrlimit here,
+	// like on FreeBSD or Linux, but Darwin doesn't enforce
+	// ulimit -v, so it's unclear why we'd try to stay within
+	// the limit.
+	return 0
+}
+
+const (
+	_NSIG        = 32
+	_SI_USER     = 0 /* empirically true, but not what headers say */
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_SS_DISABLE  = 4
+)
+
+//go:noescape
+func sigprocmask(how uint32, new, old *sigset)
+
+//go:noescape
+func sigaction(mode uint32, new *sigactiont, old *usigactiont)
+
+//go:noescape
+func sigaltstack(new, old *stackt)
+
+func sigtramp()
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+func raise(sig int32)
+func raiseproc(int32)
+
+//extern SigTabTT runtime·sigtab[];
+
+type sigset uint32
+
+var sigset_all = ^sigset(0)
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = ^uint32(0)
+	sa.sa_tramp = unsafe.Pointer(funcPC(sigtramp)) // runtime·sigtramp's job is to call into real handler
+	*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = fn
+	sigaction(uint32(i), &sa, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func setsigstack(i int32) {
+	var osa usigactiont
+	sigaction(uint32(i), nil, &osa)
+	handler := *(*uintptr)(unsafe.Pointer(&osa.__sigaction_u))
+	if handler == 0 || handler == _SIG_DFL || handler == _SIG_IGN || osa.sa_flags&_SA_ONSTACK != 0 {
+		return
+	}
+	var sa sigactiont
+	*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = handler
+	sa.sa_tramp = unsafe.Pointer(funcPC(sigtramp))
+	sa.sa_mask = osa.sa_mask
+	sa.sa_flags = osa.sa_flags | _SA_ONSTACK
+	sigaction(uint32(i), &sa, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func getsig(i int32) uintptr {
+	var sa usigactiont
+	sigaction(uint32(i), nil, &sa)
+	return *(*uintptr)(unsafe.Pointer(&sa.__sigaction_u))
+}
+
+//go:nosplit
+func signalstack(s *stack) {
+	var st stackt
+	if s == nil {
+		st.ss_flags = _SS_DISABLE
+	} else {
+		st.ss_sp = (*byte)(unsafe.Pointer(s.lo))
+		st.ss_size = s.hi - s.lo
+		st.ss_flags = 0
+	}
+	sigaltstack(&st, nil)
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func updatesigmask(m sigmask) {
+	s := sigset(m[0])
+	sigprocmask(_SIG_SETMASK, &s, nil)
+}
+
+func unblocksig(sig int32) {
+	mask := sigset(1) << (uint32(sig) - 1)
+	sigprocmask(_SIG_UNBLOCK, &mask, nil)
+}
-- 
cgit v1.3


From f120936dfffa3ac935730699587e6957f2d5ea61 Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Thu, 31 Mar 2016 10:02:10 -0400
Subject: cmd/compile, etc: use name for type pkgPath

By replacing the *string used to represent pkgPath with a
reflect.name everywhere, the embedded *string for package paths
inside the reflect.name can be replaced by an offset, nameOff.
This reduces the number of pointers in the type information.

This also moves all reflect.name types into the same section, making
it possible to use nameOff more widely in later CLs.

No significant binary size change for normal binaries, but:

linux/amd64 PIE:
	cmd/go: -440KB (3.7%)
	jujud:  -2.6MB (3.2%)

For #6853.

Change-Id: I3890b132a784a1090b1b72b32febfe0bea77eaee
Reviewed-on: https://go-review.googlesource.com/21395
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/cmd/compile/internal/gc/go.go      |   2 +-
 src/cmd/compile/internal/gc/reflect.go | 117 +++++++++++++++++++--------------
 src/cmd/internal/obj/data.go           |  13 +++-
 src/reflect/type.go                    |  64 +++++++++---------
 src/runtime/heapdump.go                |   5 +-
 src/runtime/iface.go                   |   8 +--
 src/runtime/type.go                    |  72 ++++++++++++--------
 7 files changed, 168 insertions(+), 113 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index 5df49b56d6..8411d2d0ac 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -20,7 +20,7 @@ const (
 type Pkg struct {
 	Name     string // package name, e.g. "sys"
 	Path     string // string literal used in import statement, e.g. "runtime/internal/sys"
-	Pathsym  *Sym
+	Pathsym  *obj.LSym
 	Prefix   string // escaped path for use in symbol table
 	Imported bool   // export data of this package was parsed
 	Exported bool   // import line written in export data
diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index 2bd50b4665..70a75f9324 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -412,8 +412,6 @@ func imethods(t *Type) []*Sig {
 	return methods
 }
 
-var dimportpath_gopkg *Pkg
-
 func dimportpath(p *Pkg) {
 	if p.Pathsym != nil {
 		return
@@ -426,27 +424,18 @@ func dimportpath(p *Pkg) {
 		return
 	}
 
-	if dimportpath_gopkg == nil {
-		dimportpath_gopkg = mkpkg("go")
-		dimportpath_gopkg.Name = "go"
-	}
-
-	nam := "importpath." + p.Prefix + "."
-
-	n := Nod(ONAME, nil, nil)
-	n.Sym = Pkglookup(nam, dimportpath_gopkg)
-
-	n.Class = PEXTERN
-	n.Xoffset = 0
-	p.Pathsym = n.Sym
-
+	var str string
 	if p == localpkg {
 		// Note: myimportpath != "", or else dgopkgpath won't call dimportpath.
-		gdatastring(n, myimportpath)
+		str = myimportpath
 	} else {
-		gdatastring(n, p.Path)
+		str = p.Path
 	}
-	ggloblsym(n.Sym, int32(Types[TSTRING].Width), obj.DUPOK|obj.RODATA)
+
+	s := obj.Linklookup(Ctxt, "go.importpath."+p.Prefix+".", 0)
+	ot := dnameData(s, 0, str, "", nil, false)
+	ggloblLSym(s, int32(ot), obj.DUPOK|obj.RODATA)
+	p.Pathsym = s
 }
 
 func dgopkgpath(s *Sym, ot int, pkg *Pkg) int {
@@ -469,7 +458,23 @@ func dgopkgpathLSym(s *obj.LSym, ot int, pkg *Pkg) int {
 	}
 
 	dimportpath(pkg)
-	return dsymptrLSym(s, ot, Linksym(pkg.Pathsym), 0)
+	return dsymptrLSym(s, ot, pkg.Pathsym, 0)
+}
+
+// dgopkgpathOffLSym writes an offset relocation in s at offset ot to the pkg path symbol.
+func dgopkgpathOffLSym(s *obj.LSym, ot int, pkg *Pkg) int {
+	if pkg == localpkg && myimportpath == "" {
+		// If we don't know the full import path of the package being compiled
+		// (i.e. -p was not passed on the compiler command line), emit a reference to
+		// go.importpath.""., which the linker will rewrite using the correct import path.
+		// Every package that imports this one directly defines the symbol.
+		// See also https://groups.google.com/forum/#!topic/golang-dev/myb9s53HxGQ.
+		ns := obj.Linklookup(Ctxt, `go.importpath."".`, 0)
+		return dsymptrOffLSym(s, ot, ns, 0)
+	}
+
+	dimportpath(pkg)
+	return dsymptrOffLSym(s, ot, pkg.Pathsym, 0)
 }
 
 // isExportedField reports whether a struct field is exported.
@@ -495,13 +500,12 @@ func dnameField(s *Sym, ot int, ft *Field) int {
 	if ft.Note != nil {
 		tag = *ft.Note
 	}
-	return dname(s, ot, name, tag, nil, isExportedField(ft))
+	nsym := dname(name, tag, nil, isExportedField(ft))
+	return dsymptrLSym(Linksym(s), ot, nsym, 0)
 }
 
-var dnameCount int
-
-// dname dumps a reflect.name for a struct field or method.
-func dname(s *Sym, ot int, name, tag string, pkg *Pkg, exported bool) int {
+// dnameData writes the contents of a reflect.name into s at offset ot.
+func dnameData(s *obj.LSym, ot int, name, tag string, pkg *Pkg, exported bool) int {
 	if len(name) > 1<<16-1 {
 		Fatalf("name too long: %s", name)
 	}
@@ -534,31 +538,46 @@ func dname(s *Sym, ot int, name, tag string, pkg *Pkg, exported bool) int {
 		copy(tb[2:], tag)
 	}
 
-	// Very few names require a pkgPath *string (only those
-	// defined in a different package than their type). So if
-	// there is no pkgPath, we treat the name contents as string
-	// data that duplicates across packages.
-	var bsym *obj.LSym
+	ot = int(s.WriteBytes(Ctxt, int64(ot), b))
+
+	if pkg != nil {
+		ot = dgopkgpathOffLSym(s, ot, pkg)
+	}
+
+	return ot
+}
+
+var dnameCount int
+
+// dname creates a reflect.name for a struct field or method.
+func dname(name, tag string, pkg *Pkg, exported bool) *obj.LSym {
+	// Write out data as "type.." to signal two things to the
+	// linker, first that when dynamically linking, the symbol
+	// should be moved to a relro section, and second that the
+	// contents should not be decoded as a type.
+	sname := "type..namedata."
 	if pkg == nil {
-		_, bsym = stringsym(string(b))
+		// In the common case, share data with other packages.
+		if name == "" {
+			if exported {
+				sname += "-noname-exported." + tag
+			} else {
+				sname += "-noname-unexported." + tag
+			}
+		} else {
+			sname += name + "." + tag
+		}
 	} else {
-		// Write out data as "type.." to signal two things to the
-		// linker, first that when dynamically linking, the symbol
-		// should be moved to a relro section, and second that the
-		// contents should not be decoded as a type.
-		bsymname := fmt.Sprintf(`type..methodname."".%d`, dnameCount)
+		sname = fmt.Sprintf(`%s"".%d`, sname, dnameCount)
 		dnameCount++
-		bsym = obj.Linklookup(Ctxt, bsymname, 0)
-		bsym.P = b
-		boff := len(b)
-		boff = int(Rnd(int64(boff), int64(Widthptr)))
-		boff = dgopkgpathLSym(bsym, boff, pkg)
-		ggloblLSym(bsym, int32(boff), obj.RODATA|obj.LOCAL)
 	}
-
-	ot = dsymptrLSym(Linksym(s), ot, bsym, 0)
-
-	return ot
+	s := obj.Linklookup(Ctxt, sname, 0)
+	if len(s.P) > 0 {
+		return s
+	}
+	ot := dnameData(s, 0, name, tag, pkg, exported)
+	ggloblLSym(s, int32(ot), obj.DUPOK|obj.RODATA)
+	return s
 }
 
 // dextratype dumps the fields of a runtime.uncommontype.
@@ -627,7 +646,8 @@ func dextratypeData(s *Sym, ot int, t *Type) int {
 		if !exported && a.pkg != typePkg(t) {
 			pkg = a.pkg
 		}
-		ot = dname(s, ot, a.name, "", pkg, exported)
+		nsym := dname(a.name, "", pkg, exported)
+		ot = dsymptrLSym(lsym, ot, nsym, 0)
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(dtypesym(a.mtype)))
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.isym))
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.tsym))
@@ -1213,7 +1233,8 @@ ok:
 			if !exported && a.pkg != tpkg {
 				pkg = a.pkg
 			}
-			ot = dname(s, ot, a.name, "", pkg, exported)
+			nsym := dname(a.name, "", pkg, exported)
+			ot = dsymptrLSym(Linksym(s), ot, nsym, 0)
 			ot = dsymptr(s, ot, dtypesym(a.type_), 0)
 		}
 
diff --git a/src/cmd/internal/obj/data.go b/src/cmd/internal/obj/data.go
index 546ff37269..d7f0840bc1 100644
--- a/src/cmd/internal/obj/data.go
+++ b/src/cmd/internal/obj/data.go
@@ -75,7 +75,11 @@ func (s *LSym) prepwrite(ctxt *Link, off int64, siz int) {
 	if s.Type == SBSS || s.Type == STLSBSS {
 		ctxt.Diag("cannot supply data for BSS var")
 	}
-	s.Grow(off + int64(siz))
+	l := off + int64(siz)
+	s.Grow(l)
+	if l > s.Size {
+		s.Size = l
+	}
 }
 
 // WriteFloat32 writes f into s at offset off.
@@ -150,6 +154,13 @@ func (s *LSym) WriteString(ctxt *Link, off int64, siz int, str string) {
 	copy(s.P[off:off+int64(siz)], str)
 }
 
+// WriteBytes writes a slice of bytes into s at offset off.
+func (s *LSym) WriteBytes(ctxt *Link, off int64, b []byte) int64 {
+	s.prepwrite(ctxt, off, len(b))
+	copy(s.P[off:], b)
+	return off + int64(len(b))
+}
+
 func Addrel(s *LSym) *Reloc {
 	s.R = append(s.R, Reloc{})
 	return &s.R[len(s.R)-1]
diff --git a/src/reflect/type.go b/src/reflect/type.go
index c7ed402be2..3c7affcd7f 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -299,9 +299,9 @@ type method struct {
 // Using a pointer to this struct reduces the overall size required
 // to describe an unnamed type with no methods.
 type uncommonType struct {
-	pkgPath *string // import path; nil for built-in types like int, string
-	mcount  uint16  // number of methods
-	moff    uint16  // offset from this uncommontype to [mcount]method
+	pkgPath name   // import path; empty for built-in types like int, string
+	mcount  uint16 // number of methods
+	moff    uint16 // offset from this uncommontype to [mcount]method
 }
 
 // ChanDir represents a channel type's direction.
@@ -354,7 +354,7 @@ type imethod struct {
 // interfaceType represents an interface type.
 type interfaceType struct {
 	rtype   `reflect:"interface"`
-	pkgPath *string   // import path
+	pkgPath name      // import path
 	methods []imethod // sorted by hash
 }
 
@@ -396,7 +396,7 @@ type structField struct {
 // structType represents a struct type.
 type structType struct {
 	rtype   `reflect:"struct"`
-	pkgPath *string
+	pkgPath name
 	fields  []structField // sorted by offset
 }
 
@@ -406,7 +406,7 @@ type structType struct {
 //
 //	1<<0 the name is exported
 //	1<<1 tag data follows the name
-//	1<<2 pkgPath *string follow the name and tag
+//	1<<2 pkgPath nameOff follows the name and tag
 //
 // The next two bytes are the data length:
 //
@@ -417,10 +417,9 @@ type structType struct {
 // If tag data follows then bytes 3+l and 3+l+1 are the tag length,
 // with the data following.
 //
-// If the import path follows, then ptrSize bytes at the end of
-// the data form a *string. The pointer is aligned to its width.
-// The import path is only set for concrete methods that are defined
-// in a different package than their type.
+// If the import path follows, then 4 bytes at the end of
+// the data form a nameOff. The import path is only set for concrete
+// methods that are defined in a different package than their type.
 type name struct {
 	bytes *byte
 }
@@ -446,6 +445,9 @@ func (n *name) tagLen() int {
 }
 
 func (n *name) name() (s string) {
+	if n.bytes == nil {
+		return ""
+	}
 	nl := n.nameLen()
 	if nl == 0 {
 		return ""
@@ -468,16 +470,18 @@ func (n *name) tag() (s string) {
 	return s
 }
 
-func (n *name) pkgPath() *string {
-	if *n.data(0)&(1<<2) == 0 {
-		return nil
+func (n *name) pkgPath() string {
+	if n.bytes == nil || *n.data(0)&(1<<2) == 0 {
+		return ""
 	}
 	off := 3 + n.nameLen()
 	if tl := n.tagLen(); tl > 0 {
 		off += 2 + tl
 	}
-	off = int(round(uintptr(off), ptrSize))
-	return *(**string)(unsafe.Pointer(n.data(off)))
+	var nameOff int32
+	copy((*[4]byte)(unsafe.Pointer(&nameOff))[:], (*[4]byte)(unsafe.Pointer(n.data(off)))[:])
+	pkgPathName := name{(*byte)(resolveTypeOff(unsafe.Pointer(n), nameOff))}
+	return pkgPathName.name()
 }
 
 // round n up to a multiple of a.  a must be a power of 2.
@@ -595,10 +599,10 @@ func (t *uncommonType) methods() []method {
 }
 
 func (t *uncommonType) PkgPath() string {
-	if t == nil || t.pkgPath == nil {
+	if t == nil {
 		return ""
 	}
-	return *t.pkgPath
+	return t.pkgPath.name()
 }
 
 // resolveTypeOff resolves an *rtype offset from a base type.
@@ -752,11 +756,10 @@ func (t *rtype) Method(i int) (m Method) {
 	m.Name = p.name.name()
 	fl := flag(Func)
 	if !p.name.isExported() {
-		pkgPath := p.name.pkgPath()
-		if pkgPath == nil {
-			pkgPath = ut.pkgPath
+		m.PkgPath = p.name.pkgPath()
+		if m.PkgPath == "" {
+			m.PkgPath = ut.pkgPath.name()
 		}
-		m.PkgPath = *pkgPath
 		fl |= flagStickyRO
 	}
 	if p.mtyp != 0 {
@@ -1004,11 +1007,10 @@ func (t *interfaceType) Method(i int) (m Method) {
 	p := &t.methods[i]
 	m.Name = p.name.name()
 	if !p.name.isExported() {
-		pkgPath := p.name.pkgPath()
-		if pkgPath == nil {
-			pkgPath = t.pkgPath
+		m.PkgPath = p.name.pkgPath()
+		if m.PkgPath == "" {
+			m.PkgPath = t.pkgPath.name()
 		}
-		m.PkgPath = *pkgPath
 	}
 	m.Type = toType(p.typ)
 	m.Index = i
@@ -1146,9 +1148,9 @@ func (t *structType) Field(i int) (f StructField) {
 		f.Name = t.Name()
 		f.Anonymous = true
 	}
-	if t.pkgPath != nil && !p.name.isExported() {
+	if !p.name.isExported() {
 		// Fields never have an import path in their name.
-		f.PkgPath = *t.pkgPath
+		f.PkgPath = t.pkgPath.name()
 	}
 	if tag := p.name.tag(); tag != "" {
 		f.Tag = StructTag(tag)
@@ -2325,7 +2327,7 @@ func StructOf(fields []StructField) Type {
 			case Interface:
 				ift := (*interfaceType)(unsafe.Pointer(ft))
 				for im, m := range ift.methods {
-					if m.name.pkgPath() != nil {
+					if m.name.pkgPath() != "" {
 						// TODO(sbinet)
 						panic("reflect: embedded interface with unexported method(s) not implemented")
 					}
@@ -2384,7 +2386,7 @@ func StructOf(fields []StructField) Type {
 				ptr := (*ptrType)(unsafe.Pointer(ft))
 				if unt := ptr.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != nil {
+						if m.name.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
@@ -2398,7 +2400,7 @@ func StructOf(fields []StructField) Type {
 				}
 				if unt := ptr.elem.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != nil {
+						if m.name.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
@@ -2413,7 +2415,7 @@ func StructOf(fields []StructField) Type {
 			default:
 				if unt := ft.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != nil {
+						if m.name.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 2410b1954a..adfd660847 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -183,10 +183,11 @@ func dumptype(t *_type) {
 	dumpint(tagType)
 	dumpint(uint64(uintptr(unsafe.Pointer(t))))
 	dumpint(uint64(t.size))
-	if x := t.uncommon(); x == nil || x.pkgpath == nil {
+	if x := t.uncommon(); x == nil || x.pkgpath.name() == "" {
 		dumpstr(t._string)
 	} else {
-		pkgpath := stringStructOf(x.pkgpath)
+		pkgpathstr := x.pkgpath.name()
+		pkgpath := stringStructOf(&pkgpathstr)
 		namestr := t.name()
 		name := stringStructOf(&namestr)
 		dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 700bdc2f48..84f0ee8f0c 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -101,15 +101,15 @@ func additab(m *itab, locked, canfail bool) {
 		iname := i.name.name()
 		itype := i._type
 		ipkg := i.name.pkgPath()
-		if ipkg == nil {
-			ipkg = inter.pkgpath
+		if ipkg == "" {
+			ipkg = inter.pkgpath.name()
 		}
 		for ; j < nt; j++ {
 			t := &xmhdr[j]
 			if typ.typeOff(t.mtyp) == itype && t.name.name() == iname {
 				pkgPath := t.name.pkgPath()
-				if pkgPath == nil {
-					pkgPath = x.pkgpath
+				if pkgPath == "" {
+					pkgPath = x.pkgpath.name()
 				}
 				if t.name.isExported() || pkgPath == ipkg {
 					if m != nil {
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 86131d3ff3..711753bab5 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -6,10 +6,7 @@
 
 package runtime
 
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
+import "unsafe"
 
 // tflag is documented in ../reflect/type.go.
 type tflag uint8
@@ -151,6 +148,33 @@ var reflectOffs struct {
 	minv map[unsafe.Pointer]int32
 }
 
+func resolveNameOff(ptrInModule unsafe.Pointer, off nameOff) name {
+	if off == 0 {
+		return name{}
+	}
+	base := uintptr(ptrInModule)
+	var md *moduledata
+	for next := &firstmoduledata; next != nil; next = next.next {
+		if base >= next.types && base < next.etypes {
+			md = next
+			break
+		}
+	}
+	if md == nil {
+		println("runtime: nameOff", hex(off), "base", hex(base), "not in ranges:")
+		for next := &firstmoduledata; next != nil; next = next.next {
+			println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+		}
+		throw("runtime: name offset base pointer out of range")
+	}
+	res := md.types + uintptr(off)
+	if res > md.etypes {
+		println("runtime: nameOff", hex(off), "out of range", hex(md.types), "-", hex(md.etypes))
+		throw("runtime: name offset out of range")
+	}
+	return name{(*byte)(unsafe.Pointer(res))}
+}
+
 func (t *_type) typeOff(off typeOff) *_type {
 	if off == 0 {
 		return nil
@@ -240,6 +264,7 @@ func (t *functype) dotdotdot() bool {
 	return t.outCount&(1<<15) != 0
 }
 
+type nameOff int32
 type typeOff int32
 type textOff int32
 
@@ -251,7 +276,7 @@ type method struct {
 }
 
 type uncommontype struct {
-	pkgpath *string
+	pkgpath name
 	mcount  uint16 // number of methods
 	moff    uint16 // offset from this uncommontype to [mcount]method
 }
@@ -263,7 +288,7 @@ type imethod struct {
 
 type interfacetype struct {
 	typ     _type
-	pkgpath *string
+	pkgpath name
 	mhdr    []imethod
 }
 
@@ -319,7 +344,7 @@ type structfield struct {
 
 type structtype struct {
 	typ     _type
-	pkgPath *string
+	pkgPath name
 	fields  []structfield
 }
 
@@ -350,6 +375,9 @@ func (n *name) tagLen() int {
 }
 
 func (n *name) name() (s string) {
+	if n.bytes == nil {
+		return ""
+	}
 	nl := n.nameLen()
 	if nl == 0 {
 		return ""
@@ -372,16 +400,18 @@ func (n *name) tag() (s string) {
 	return s
 }
 
-func (n *name) pkgPath() *string {
-	if *n.data(0)&(1<<2) == 0 {
-		return nil
+func (n *name) pkgPath() string {
+	if n.bytes == nil || *n.data(0)&(1<<2) == 0 {
+		return ""
 	}
 	off := 3 + n.nameLen()
 	if tl := n.tagLen(); tl > 0 {
 		off += 2 + tl
 	}
-	off = int(round(uintptr(off), sys.PtrSize))
-	return *(**string)(unsafe.Pointer(n.data(off)))
+	var nameOff nameOff
+	copy((*[4]byte)(unsafe.Pointer(&nameOff))[:], (*[4]byte)(unsafe.Pointer(n.data(off)))[:])
+	pkgPathName := resolveNameOff(unsafe.Pointer(n.bytes), nameOff)
+	return pkgPathName.name()
 }
 
 // typelinksinit scans the types from extra modules and builds the
@@ -466,7 +496,7 @@ func typesEqual(t, v *_type) bool {
 		if ut == nil || uv == nil {
 			return false
 		}
-		if !pkgPathEqual(ut.pkgpath, uv.pkgpath) {
+		if ut.pkgpath.name() != uv.pkgpath.name() {
 			return false
 		}
 	}
@@ -506,7 +536,7 @@ func typesEqual(t, v *_type) bool {
 	case kindInterface:
 		it := (*interfacetype)(unsafe.Pointer(t))
 		iv := (*interfacetype)(unsafe.Pointer(v))
-		if !pkgPathEqual(it.pkgpath, iv.pkgpath) {
+		if it.pkgpath.name() != iv.pkgpath.name() {
 			return false
 		}
 		if len(it.mhdr) != len(iv.mhdr) {
@@ -518,7 +548,7 @@ func typesEqual(t, v *_type) bool {
 			if tm.name.name() != vm.name.name() {
 				return false
 			}
-			if !pkgPathEqual(tm.name.pkgPath(), vm.name.pkgPath()) {
+			if tm.name.pkgPath() != vm.name.pkgPath() {
 				return false
 			}
 			if !typesEqual(tm._type, vm._type) {
@@ -550,7 +580,7 @@ func typesEqual(t, v *_type) bool {
 			if tf.name.name() != vf.name.name() {
 				return false
 			}
-			if !pkgPathEqual(tf.name.pkgPath(), vf.name.pkgPath()) {
+			if tf.name.pkgPath() != vf.name.pkgPath() {
 				return false
 			}
 			if !typesEqual(tf.typ, vf.typ) {
@@ -570,13 +600,3 @@ func typesEqual(t, v *_type) bool {
 		return false
 	}
 }
-
-func pkgPathEqual(p, q *string) bool {
-	if p == q {
-		return true
-	}
-	if p == nil || q == nil {
-		return false
-	}
-	return *p == *q
-}
-- 
cgit v1.3


From 02b8e6978a86c2f4f3a604e8b05014d127f4020a Mon Sep 17 00:00:00 2001
From: Jeremy Jackins <jeremyjackins@gmail.com>
Date: Wed, 13 Apr 2016 18:16:21 +0900
Subject: runtime: find a home for orphaned comments

These comments were left behind after runtime.h was converted
from C to Go. I examined the original code and tried to move these
to the places that the most sense.

Change-Id: I8769d60234c0113d682f9de3bd8d6c34c450c188
Reviewed-on: https://go-review.googlesource.com/21969
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/lfstack.go    |   3 ++
 src/runtime/lock_futex.go |   6 +--
 src/runtime/mmap.go       |   3 ++
 src/runtime/runtime2.go   | 107 +++++++++++++++-------------------------------
 4 files changed, 44 insertions(+), 75 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index 1261f54d97..db54ecb05e 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -3,6 +3,9 @@
 // license that can be found in the LICENSE file.
 
 // Lock-free stack.
+// Initialize head to 0, compare with 0 to test for emptiness.
+// The stack does not keep pointers to nodes,
+// so they can be garbage collected if there are no other pointers to nodes.
 // The following code runs only on g0 stack.
 
 package runtime
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index d28fd92720..073136abd0 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -13,13 +13,13 @@ import (
 
 // This implementation depends on OS-specific implementations of
 //
-//	runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
+//	futexsleep(addr *uint32, val uint32, ns int64)
 //		Atomically,
-//			if(*addr == val) sleep
+//			if *addr == val { sleep }
 //		Might be woken up spuriously; that's allowed.
 //		Don't sleep longer than ns; ns < 0 means forever.
 //
-//	runtime·futexwakeup(uint32 *addr, uint32 cnt)
+//	futexwakeup(addr *uint32, cnt uint32)
 //		If any procs are sleeping on addr, wake up at most cnt.
 
 const (
diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go
index 6363a90242..53617e41e4 100644
--- a/src/runtime/mmap.go
+++ b/src/runtime/mmap.go
@@ -13,4 +13,7 @@ package runtime
 import "unsafe"
 
 // mmap calls the mmap system call. It is implemented in assembly.
+// We only pass the lower 32 bits of file offset to the
+// assembly routine; the higher bits (if required), should be provided
+// by the assembly routine as 0.
 func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index e0137f7e97..0fdea400de 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -10,9 +10,7 @@ import (
 	"unsafe"
 )
 
-/*
- * defined constants
- */
+// defined constants
 const (
 	// G status
 	//
@@ -99,6 +97,10 @@ const (
 	_Pdead
 )
 
+// Mutual exclusion locks.  In the uncontended case,
+// as fast as spin locks (just a few user-level instructions),
+// but on the contention path they sleep in the kernel.
+// A zeroed Mutex is unlocked (no need to initialize each lock).
 type mutex struct {
 	// Futex-based impl treats it as uint32 key,
 	// while sema-based impl as M* waitm.
@@ -106,6 +108,26 @@ type mutex struct {
 	key uintptr
 }
 
+// sleep and wakeup on one-time events.
+// before any calls to notesleep or notewakeup,
+// must call noteclear to initialize the Note.
+// then, exactly one thread can call notesleep
+// and exactly one thread can call notewakeup (once).
+// once notewakeup has been called, the notesleep
+// will return.  future notesleep will return immediately.
+// subsequent noteclear must be called only after
+// previous notesleep has returned, e.g. it's disallowed
+// to call noteclear straight after notewakeup.
+//
+// notetsleep is like notesleep but wakes up after
+// a given number of nanoseconds even if the event
+// has not yet happened.  if a goroutine uses notetsleep to
+// wake up early, it must wait to call noteclear until it
+// can be sure that no other goroutine is calling
+// notewakeup.
+//
+// notesleep/notetsleep are generally called on g0,
+// notetsleepg is similar to notetsleep but is called on user g.
 type note struct {
 	// Futex-based impl treats it as uint32 key,
 	// while sema-based impl as M* waitm.
@@ -397,8 +419,8 @@ type m struct {
 	waittraceskip int
 	startingtrace bool
 	syscalltick   uint32
-	//#ifdef GOOS_windows
-	thread uintptr // thread handle
+	thread        uintptr // thread handle
+
 	// these are here because they are too large to be on the stack
 	// of low-level NOSPLIT functions.
 	libcall   libcall
@@ -406,7 +428,7 @@ type m struct {
 	libcallsp uintptr
 	libcallg  guintptr
 	syscall   libcall // stores syscall parameters on windows
-	//#endif
+
 	mOS
 }
 
@@ -530,10 +552,10 @@ type schedt struct {
 	totaltime      int64 // ∫gomaxprocs dt up to procresizetime
 }
 
-// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
+// The m.locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
 // The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
 // External locks are not recursive; a second lock is silently ignored.
-// The upper bits of m->locked record the nesting depth of calls to lockOSThread
+// The upper bits of m.locked record the nesting depth of calls to lockOSThread
 // (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
 // Internal locks can be recursive. For instance, a lock for cgo can occur while the main
 // goroutine is holding the lock during the initialization phase.
@@ -603,13 +625,6 @@ type forcegcstate struct {
 	idle uint32
 }
 
-/*
- * known to compiler
- */
-const (
-	_Structrnd = sys.RegSize
-)
-
 // startup_random_data holds random bytes initialized at startup. These come from
 // the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
 var startupRandomData []byte
@@ -635,9 +650,7 @@ func extendRandom(r []byte, n int) {
 	}
 }
 
-/*
- * deferred subroutine calls
- */
+// deferred subroutine calls
 type _defer struct {
 	siz     int32
 	started bool
@@ -648,9 +661,7 @@ type _defer struct {
 	link    *_defer
 }
 
-/*
- * panics
- */
+// panics
 type _panic struct {
 	argp      unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink
 	arg       interface{}    // argument to panic
@@ -659,10 +670,7 @@ type _panic struct {
 	aborted   bool           // the panic was aborted
 }
 
-/*
- * stack traces
- */
-
+// stack traces
 type stkframe struct {
 	fn       *_func     // function being run
 	pc       uintptr    // program counter within fn
@@ -682,10 +690,8 @@ const (
 	_TraceJumpStack                 // if traceback is on a systemstack, resume trace at g that called into it
 )
 
-const (
-	// The maximum number of frames we print for a traceback
-	_TracebackMaxFrames = 100
-)
+// The maximum number of frames we print for a traceback
+const _TracebackMaxFrames = 100
 
 var (
 	emptystring string
@@ -716,46 +722,3 @@ var (
 	islibrary bool // -buildmode=c-shared
 	isarchive bool // -buildmode=c-archive
 )
-
-/*
- * mutual exclusion locks.  in the uncontended case,
- * as fast as spin locks (just a few user-level instructions),
- * but on the contention path they sleep in the kernel.
- * a zeroed Mutex is unlocked (no need to initialize each lock).
- */
-
-/*
- * sleep and wakeup on one-time events.
- * before any calls to notesleep or notewakeup,
- * must call noteclear to initialize the Note.
- * then, exactly one thread can call notesleep
- * and exactly one thread can call notewakeup (once).
- * once notewakeup has been called, the notesleep
- * will return.  future notesleep will return immediately.
- * subsequent noteclear must be called only after
- * previous notesleep has returned, e.g. it's disallowed
- * to call noteclear straight after notewakeup.
- *
- * notetsleep is like notesleep but wakes up after
- * a given number of nanoseconds even if the event
- * has not yet happened.  if a goroutine uses notetsleep to
- * wake up early, it must wait to call noteclear until it
- * can be sure that no other goroutine is calling
- * notewakeup.
- *
- * notesleep/notetsleep are generally called on g0,
- * notetsleepg is similar to notetsleep but is called on user g.
- */
-// bool	runtime·notetsleep(Note*, int64);  // false - timeout
-// bool	runtime·notetsleepg(Note*, int64);  // false - timeout
-
-/*
- * Lock-free stack.
- * Initialize uint64 head to 0, compare with 0 to test for emptiness.
- * The stack does not keep pointers to nodes,
- * so they can be garbage collected if there are no other pointers to nodes.
- */
-
-// for mmap, we only pass the lower 32 bits of file offset to the
-// assembly routine; the higher bits (if required), should be provided
-// by the assembly routine as 0.
-- 
cgit v1.3


From 98b6febcef8f6d7411a77e9e828df681871a28ad Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Thu, 14 Apr 2016 10:28:35 -0700
Subject: runtime/internal/sys: better fallback algorithms for intrinsics

Use deBruijn sequences to count low-order zeros.
Reorg bswap to not use &^, it takes another instruction on x86.

Change-Id: I4a5ed9fd16ee6a279d88c067e8a2ba11de821156
Reviewed-on: https://go-review.googlesource.com/22084
Reviewed-by: David Chase <drchase@google.com>
---
 src/runtime/internal/sys/intrinsics.go      | 135 +++++++++++++++-------------
 src/runtime/internal/sys/intrinsics_test.go |  54 +++++++++++
 2 files changed, 126 insertions(+), 63 deletions(-)
 create mode 100644 src/runtime/internal/sys/intrinsics_test.go

(limited to 'src/runtime')

diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go
index 8feb754dbd..1054c6948f 100644
--- a/src/runtime/internal/sys/intrinsics.go
+++ b/src/runtime/internal/sys/intrinsics.go
@@ -4,88 +4,97 @@
 
 package sys
 
+// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
+
+const deBruijn64 = 0x0218a392cd3d5dbf
+
+var deBruijnIdx64 = [64]byte{
+	0, 1, 2, 7, 3, 13, 8, 19,
+	4, 25, 14, 28, 9, 34, 20, 40,
+	5, 17, 26, 38, 15, 46, 29, 48,
+	10, 31, 35, 54, 21, 50, 41, 57,
+	63, 6, 12, 18, 24, 27, 33, 39,
+	16, 37, 45, 47, 30, 53, 49, 56,
+	62, 11, 23, 32, 36, 44, 52, 55,
+	61, 22, 43, 51, 60, 42, 59, 58,
+}
+
+const deBruijn32 = 0x04653adf
+
+var deBruijnIdx32 = [32]byte{
+	0, 1, 2, 6, 3, 11, 7, 16,
+	4, 14, 12, 21, 8, 23, 17, 26,
+	31, 5, 10, 15, 13, 20, 22, 25,
+	30, 9, 19, 24, 29, 18, 28, 27,
+}
+
+const deBruijn16 = 0x09af
+
+var deBruijnIdx16 = [16]byte{
+	0, 1, 2, 5, 3, 9, 6, 11,
+	15, 4, 8, 10, 14, 7, 13, 12,
+}
+
+const deBruijn8 = 0x17
+
+var deBruijnIdx8 = [8]byte{
+	0, 1, 2, 4, 7, 3, 6, 5,
+}
+
 // Ctz64 counts trailing (low-order) zeroes,
 // and if all are zero, then 64.
 func Ctz64(x uint64) uint64 {
-	if x&0xffffffff == 0 {
-		return 32 + uint64(Ctz32(uint32(x>>32)))
-	}
-	return uint64(Ctz32(uint32(x)))
-
+	x &= -x                      // isolate low-order bit
+	y := x * deBruijn64 >> 58    // extract part of deBruijn sequence
+	y = uint64(deBruijnIdx64[y]) // convert to bit index
+	z := (x - 1) >> 57 & 64      // adjustment if zero
+	return y + z
 }
 
 // Ctz32 counts trailing (low-order) zeroes,
 // and if all are zero, then 32.
 func Ctz32(x uint32) uint32 {
-	if x&0xffff == 0 {
-		return 16 + uint32(Ctz16(uint16(x>>16)))
-	}
-	return uint32(Ctz16(uint16(x)))
+	x &= -x                      // isolate low-order bit
+	y := x * deBruijn32 >> 27    // extract part of deBruijn sequence
+	y = uint32(deBruijnIdx32[y]) // convert to bit index
+	z := (x - 1) >> 26 & 32      // adjustment if zero
+	return y + z
 }
 
 // Ctz16 counts trailing (low-order) zeroes,
 // and if all are zero, then 16.
 func Ctz16(x uint16) uint16 {
-	if x&0xff == 0 {
-		return 8 + uint16(Ctz8(uint8(x>>8)))
-	}
-	return uint16(Ctz8(uint8(x)))
+	x &= -x                      // isolate low-order bit
+	y := x * deBruijn16 >> 12    // extract part of deBruijn sequence
+	y = uint16(deBruijnIdx16[y]) // convert to bit index
+	z := (x - 1) >> 11 & 16      // adjustment if zero
+	return y + z
 }
 
 // Ctz8 counts trailing (low-order) zeroes,
 // and if all are zero, then 8.
 func Ctz8(x uint8) uint8 {
-	return ctzVals[x]
+	x &= -x                    // isolate low-order bit
+	y := x * deBruijn8 >> 5    // extract part of deBruijn sequence
+	y = uint8(deBruijnIdx8[y]) // convert to bit index
+	z := (x - 1) >> 4 & 8      // adjustment if zero
+	return y + z
 }
 
-var ctzVals = [256]uint8{
-	8, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	6, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	7, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	6, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	5, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0,
-	4, 0, 1, 0, 2, 0, 1, 0,
-	3, 0, 1, 0, 2, 0, 1, 0}
-
 // Bswap64 returns its input with byte order reversed
 // 0x0102030405060708 -> 0x0807060504030201
 func Bswap64(x uint64) uint64 {
-	c8 := uint64(0xff00ff00ff00ff00)
-	a := (x & c8) >> 8
-	b := (x &^ c8) << 8
+	c8 := uint64(0x00ff00ff00ff00ff)
+	a := x >> 8 & c8
+	b := (x & c8) << 8
 	x = a | b
-	c16 := uint64(0xffff0000ffff0000)
-	a = (x & c16) >> 16
-	b = (x &^ c16) << 16
+	c16 := uint64(0x0000ffff0000ffff)
+	a = x >> 16 & c16
+	b = (x & c16) << 16
 	x = a | b
-	c32 := uint64(0xffffffff00000000)
-	a = (x & c32) >> 32
-	b = (x &^ c32) << 32
+	c32 := uint64(0x00000000ffffffff)
+	a = x >> 32 & c32
+	b = (x & c32) << 32
 	x = a | b
 	return x
 }
@@ -93,13 +102,13 @@ func Bswap64(x uint64) uint64 {
 // Bswap32 returns its input with byte order reversed
 // 0x01020304 -> 0x04030201
 func Bswap32(x uint32) uint32 {
-	c8 := uint32(0xff00ff00)
-	a := (x & c8) >> 8
-	b := (x &^ c8) << 8
+	c8 := uint32(0x00ff00ff)
+	a := x >> 8 & c8
+	b := (x & c8) << 8
 	x = a | b
-	c16 := uint32(0xffff0000)
-	a = (x & c16) >> 16
-	b = (x &^ c16) << 16
+	c16 := uint32(0x0000ffff)
+	a = x >> 16 & c16
+	b = (x & c16) << 16
 	x = a | b
 	return x
 }
diff --git a/src/runtime/internal/sys/intrinsics_test.go b/src/runtime/internal/sys/intrinsics_test.go
new file mode 100644
index 0000000000..097631bc1e
--- /dev/null
+++ b/src/runtime/internal/sys/intrinsics_test.go
@@ -0,0 +1,54 @@
+package sys_test
+
+import (
+	"runtime/internal/sys"
+	"testing"
+)
+
+func TestCtz64(t *testing.T) {
+	for i := uint(0); i <= 64; i++ {
+		x := uint64(5) << i
+		if got := sys.Ctz64(x); got != uint64(i) {
+			t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
+		}
+	}
+}
+func TestCtz32(t *testing.T) {
+	for i := uint(0); i <= 32; i++ {
+		x := uint32(5) << i
+		if got := sys.Ctz32(x); got != uint32(i) {
+			t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
+		}
+	}
+}
+func TestCtz16(t *testing.T) {
+	for i := uint(0); i <= 16; i++ {
+		x := uint16(5) << i
+		if got := sys.Ctz16(x); got != uint16(i) {
+			t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
+		}
+	}
+}
+func TestCtz8(t *testing.T) {
+	for i := uint(0); i <= 8; i++ {
+		x := uint8(5) << i
+		if got := sys.Ctz8(x); got != uint8(i) {
+			t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
+		}
+	}
+}
+
+func TestBswap64(t *testing.T) {
+	x := uint64(0x1122334455667788)
+	y := sys.Bswap64(x)
+	if y != 0x8877665544332211 {
+		t.Errorf("Bswap(%x)=%x, want 0x8877665544332211", x, y)
+	}
+}
+func TestBswap32(t *testing.T) {
+	x := uint32(0x11223344)
+	y := sys.Bswap32(x)
+	if y != 0x44332211 {
+		t.Errorf("Bswap(%x)=%x, want 0x44332211", x, y)
+	}
+}
-- 
cgit v1.3


From 8f6c35de2f7c972a4f34efddd21281b7060c4457 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Apr 2016 15:06:01 -0400
Subject: runtime: make sync_atomic_SwapPointer signature match sync/atomic

SwapPointer is declared as

  func SwapPointer(addr *unsafe.Pointer, new unsafe.Pointer) (old unsafe.Pointer)

in sync/atomic, but defined in the runtime (where it's actually
implemented) as

  func sync_atomic_SwapPointer(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer

Make ptr a *unsafe.Pointer in the runtime definition to match the type
in sync/atomic.

Change-Id: I99bab651b995001bbe54f9e790fdef2417ef0e9e
Reviewed-on: https://go-review.googlesource.com/21998
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
---
 src/runtime/atomic_pointer.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index e9f5d03b2b..ee55de13b9 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -53,9 +53,9 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr
 
 //go:linkname sync_atomic_SwapPointer sync/atomic.SwapPointer
 //go:nosplit
-func sync_atomic_SwapPointer(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
-	old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(ptr)), uintptr(new)))
-	writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
+func sync_atomic_SwapPointer(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
+	old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(new)))
+	writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 	return old
 }
 
-- 
cgit v1.3


From 7c7081f514c9ec4820a842f7ef07394df4d41bab Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Apr 2016 15:12:32 -0400
Subject: sync/atomic: don't atomically write pointers twice

sync/atomic.StorePointer (which is implemented in
runtime/atomic_pointer.go) writes the pointer twice (through two
completely different code paths, no less). Fix it to only write once.

Change-Id: Id3b2aef9aa9081c2cf096833e001b93d3dd1f5da
Reviewed-on: https://go-review.googlesource.com/21999
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/atomic_pointer.go | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/runtime')

diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index ee55de13b9..4fe334014d 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -44,7 +44,6 @@ func sync_atomic_StoreUintptr(ptr *uintptr, new uintptr)
 //go:nosplit
 func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
 	sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
-	atomic.StorepNoWB(noescape(unsafe.Pointer(ptr)), new)
 	writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 }
 
-- 
cgit v1.3


From 6f59ccb052534f3293e9e0b0e02f35578901ef3c Mon Sep 17 00:00:00 2001
From: Mikio Hara <mikioh.mikioh@gmail.com>
Date: Fri, 15 Jan 2016 14:57:41 +0900
Subject: runtime: don't always unblock all signals on dragonfly, freebsd and
 openbsd

https://golang.org/cl/10173 intrduced msigsave, ensureSigM and
_SigUnblock but didn't enable the new signal save/restore mechanism for
SIG{HUP,INT,QUIT,ABRT,TERM} on DragonFly BSD, FreeBSD and OpenBSD.

At present, it looks like they have the implementation. This change
enables the new mechanism on DragonFly BSD, FreeBSD and OpenBSD the same
as Darwin, NetBSD.

Change-Id: Ifb4b4743b3b4f50bfcdc7cf1fe1b59c377fa2a41
Reviewed-on: https://go-review.googlesource.com/18657
Run-TryBot: Mikio Hara <mikioh.mikioh@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 misc/cgo/test/cgo_linux_test.go    |  9 ++++-----
 misc/cgo/test/cgo_unix_test.go     |  1 +
 misc/cgo/test/sigprocmask.c        | 38 ++++++++++++++++++++++++++++++++++++
 misc/cgo/test/sigprocmask.go       | 40 ++++++++++++++++++++++++++++++++++++++
 misc/cgo/test/sigprocmask_linux.c  | 36 ----------------------------------
 misc/cgo/test/sigprocmask_linux.go | 38 ------------------------------------
 src/runtime/signal_dragonfly.go    | 14 ++++++-------
 src/runtime/signal_freebsd.go      | 14 ++++++-------
 src/runtime/signal_openbsd.go      | 14 ++++++-------
 9 files changed, 104 insertions(+), 100 deletions(-)
 create mode 100644 misc/cgo/test/sigprocmask.c
 create mode 100644 misc/cgo/test/sigprocmask.go
 delete mode 100644 misc/cgo/test/sigprocmask_linux.c
 delete mode 100644 misc/cgo/test/sigprocmask_linux.go

(limited to 'src/runtime')

diff --git a/misc/cgo/test/cgo_linux_test.go b/misc/cgo/test/cgo_linux_test.go
index 3cc2af5919..6e1d1065f6 100644
--- a/misc/cgo/test/cgo_linux_test.go
+++ b/misc/cgo/test/cgo_linux_test.go
@@ -6,8 +6,7 @@ package cgotest
 
 import "testing"
 
-func TestSetgid(t *testing.T)      { testSetgid(t) }
-func Test6997(t *testing.T)        { test6997(t) }
-func TestBuildID(t *testing.T)     { testBuildID(t) }
-func Test9400(t *testing.T)        { test9400(t) }
-func TestSigProcMask(t *testing.T) { testSigProcMask(t) }
+func TestSetgid(t *testing.T)  { testSetgid(t) }
+func Test6997(t *testing.T)    { test6997(t) }
+func TestBuildID(t *testing.T) { testBuildID(t) }
+func Test9400(t *testing.T)    { test9400(t) }
diff --git a/misc/cgo/test/cgo_unix_test.go b/misc/cgo/test/cgo_unix_test.go
index 5808e6edc8..5fe3251e0b 100644
--- a/misc/cgo/test/cgo_unix_test.go
+++ b/misc/cgo/test/cgo_unix_test.go
@@ -9,3 +9,4 @@ package cgotest
 import "testing"
 
 func TestSigaltstack(t *testing.T) { testSigaltstack(t) }
+func TestSigprocmask(t *testing.T) { testSigprocmask(t) }
diff --git a/misc/cgo/test/sigprocmask.c b/misc/cgo/test/sigprocmask.c
new file mode 100644
index 0000000000..bd99647d2b
--- /dev/null
+++ b/misc/cgo/test/sigprocmask.c
@@ -0,0 +1,38 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !windows
+
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+extern void IntoGoAndBack();
+
+int CheckBlocked() {
+	sigset_t mask;
+	sigprocmask(SIG_BLOCK, NULL, &mask);
+	return sigismember(&mask, SIGIO);
+}
+
+static void* sigthreadfunc(void* unused) {
+	sigset_t mask;
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGIO);
+	sigprocmask(SIG_BLOCK, &mask, NULL);
+	IntoGoAndBack();
+	return NULL;
+}
+
+int RunSigThread() {
+	pthread_t thread;
+	int r;
+
+	r = pthread_create(&thread, NULL, &sigthreadfunc, NULL);
+	if (r != 0)
+		return r;
+	return pthread_join(thread, NULL);
+}
diff --git a/misc/cgo/test/sigprocmask.go b/misc/cgo/test/sigprocmask.go
new file mode 100644
index 0000000000..39b658e96c
--- /dev/null
+++ b/misc/cgo/test/sigprocmask.go
@@ -0,0 +1,40 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !windows
+
+package cgotest
+
+/*
+#cgo CFLAGS: -pthread
+#cgo LDFLAGS: -pthread
+extern int RunSigThread();
+extern int CheckBlocked();
+*/
+import "C"
+import (
+	"os"
+	"os/signal"
+	"syscall"
+	"testing"
+)
+
+var blocked bool
+
+//export IntoGoAndBack
+func IntoGoAndBack() {
+	// Verify that SIGIO stays blocked on the C thread
+	// even when unblocked for signal.Notify().
+	signal.Notify(make(chan os.Signal), syscall.SIGIO)
+	blocked = C.CheckBlocked() != 0
+}
+
+func testSigprocmask(t *testing.T) {
+	if r := C.RunSigThread(); r != 0 {
+		t.Error("pthread_create/pthread_join failed")
+	}
+	if !blocked {
+		t.Error("Go runtime unblocked SIGIO")
+	}
+}
diff --git a/misc/cgo/test/sigprocmask_linux.c b/misc/cgo/test/sigprocmask_linux.c
deleted file mode 100644
index 518c533fa4..0000000000
--- a/misc/cgo/test/sigprocmask_linux.c
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <signal.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
-
-extern void IntoGoAndBack();
-
-int CheckBlocked() {
-	sigset_t mask;
-	sigprocmask(SIG_BLOCK, NULL, &mask);
-	return sigismember(&mask, SIGIO);
-}
-
-static void* sigthreadfunc(void* unused) {
-	sigset_t mask;
-	sigemptyset(&mask);
-	sigaddset(&mask, SIGIO);
-	sigprocmask(SIG_BLOCK, &mask, NULL);
-	IntoGoAndBack();
-	return NULL;
-}
-
-int RunSigThread() {
-	pthread_t thread;
-	int r;
-
-	r = pthread_create(&thread, NULL, &sigthreadfunc, NULL);
-	if (r != 0)
-		return r;
-	return pthread_join(thread, NULL);
-}
diff --git a/misc/cgo/test/sigprocmask_linux.go b/misc/cgo/test/sigprocmask_linux.go
deleted file mode 100644
index 7d343e92c4..0000000000
--- a/misc/cgo/test/sigprocmask_linux.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cgotest
-
-/*
-#cgo CFLAGS: -pthread
-#cgo LDFLAGS: -pthread
-extern int RunSigThread();
-extern int CheckBlocked();
-*/
-import "C"
-import (
-	"os"
-	"os/signal"
-	"syscall"
-	"testing"
-)
-
-var blocked bool
-
-//export IntoGoAndBack
-func IntoGoAndBack() {
-	// Verify that SIGIO stays blocked on the C thread
-	// even when unblocked for signal.Notify().
-	signal.Notify(make(chan os.Signal), syscall.SIGIO)
-	blocked = C.CheckBlocked() != 0
-}
-
-func testSigProcMask(t *testing.T) {
-	if r := C.RunSigThread(); r != 0 {
-		t.Error("pthread_create/pthread_join failed")
-	}
-	if !blocked {
-		t.Error("Go runtime unblocked SIGIO")
-	}
-}
diff --git a/src/runtime/signal_dragonfly.go b/src/runtime/signal_dragonfly.go
index f507a07233..8e9ce17c86 100644
--- a/src/runtime/signal_dragonfly.go
+++ b/src/runtime/signal_dragonfly.go
@@ -14,14 +14,14 @@ var sigtable = [...]sigTabT{
 	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
 	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
 	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
-	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
-	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
 	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
 	/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
-	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
 	/* 9 */ {0, "SIGKILL: kill"},
-	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
-	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
 	/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
 	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
 	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
@@ -30,14 +30,14 @@ var sigtable = [...]sigTabT{
 	/* 17 */ {0, "SIGSTOP: stop"},
 	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
 	/* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
-	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
 	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
 	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
 	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
 	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
 	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
 	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
-	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
 	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
 	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
 	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
diff --git a/src/runtime/signal_freebsd.go b/src/runtime/signal_freebsd.go
index cd2068a62c..c8b09e92d9 100644
--- a/src/runtime/signal_freebsd.go
+++ b/src/runtime/signal_freebsd.go
@@ -16,14 +16,14 @@ var sigtable = [...]sigTabT{
 	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
 	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
 	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
-	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
-	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
 	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
 	/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
-	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
 	/* 9 */ {0, "SIGKILL: kill"},
-	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
-	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
 	/* 12 */ {_SigNotify, "SIGSYS: bad system call"},
 	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
 	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
@@ -32,14 +32,14 @@ var sigtable = [...]sigTabT{
 	/* 17 */ {0, "SIGSTOP: stop"},
 	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
 	/* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
-	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
 	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
 	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
 	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
 	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
 	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
 	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
-	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
 	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
 	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
 	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
diff --git a/src/runtime/signal_openbsd.go b/src/runtime/signal_openbsd.go
index 3c50190da4..9275279860 100644
--- a/src/runtime/signal_openbsd.go
+++ b/src/runtime/signal_openbsd.go
@@ -16,14 +16,14 @@ var sigtable = [...]sigTabT{
 	/*  1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
 	/*  2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
 	/*  3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
-	/*  4 */ {_SigThrow, "SIGILL: illegal instruction"},
-	/*  5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/*  4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+	/*  5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
 	/*  6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
 	/*  7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
-	/*  8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/*  8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
 	/*  9 */ {0, "SIGKILL: kill"},
-	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
-	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
 	/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
 	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
 	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
@@ -32,14 +32,14 @@ var sigtable = [...]sigTabT{
 	/* 17 */ {0, "SIGSTOP: stop"},
 	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
 	/* 19 */ {_SigNotify + _SigDefault, "SIGCONT: continue after stop"},
-	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
 	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
 	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
 	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
 	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
 	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
 	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
-	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
 	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
 	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
 	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
-- 
cgit v1.3


From c955bb2040e601c474e547b8badbe44677c9fbdf Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 12:12:45 -0400
Subject: runtime: common auxv parser

Currently several different Linux architectures have separate copies
of the auxv parser. Bring these all together into a single copy of the
parser that calls out to a per-arch handler for each tag/value pair.
This is in preparation for handling common auxv tags in one place.

For #9993.

Change-Id: Iceebc3afad6b4133b70fca7003561ae370445c10
Reviewed-on: https://go-review.googlesource.com/22061
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Hudson-Doyle <michael.hudson@canonical.com>
---
 src/runtime/os_linux.go         | 23 ++++++++++++++++++
 src/runtime/os_linux_386.go     | 24 ++++---------------
 src/runtime/os_linux_arm.go     | 48 ++++++++++++++-----------------------
 src/runtime/os_linux_arm64.go   | 32 +++++++------------------
 src/runtime/os_linux_noauxv.go  | 10 ++++++++
 src/runtime/vdso_linux_amd64.go | 52 +++++++++++++----------------------------
 src/runtime/vdso_none.go        |  5 +---
 7 files changed, 81 insertions(+), 113 deletions(-)
 create mode 100644 src/runtime/os_linux_noauxv.go

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 7d8cc7e5c4..eeb30c7dd9 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -176,6 +176,29 @@ func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
 var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
 var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
 
+const (
+	_AT_NULL = 0 // End of vector
+)
+
+func sysargs(argc int32, argv **byte) {
+	n := argc + 1
+
+	// skip over argv, envp to get to auxv
+	for argv_index(argv, n) != nil {
+		n++
+	}
+
+	// skip NULL separator
+	n++
+
+	// now argv+n is auxv
+	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
+	for i := 0; auxv[i] != _AT_NULL; i += 2 {
+		tag, val := auxv[i], auxv[i+1]
+		archauxv(tag, val)
+	}
+}
+
 func osinit() {
 	ncpu = getproccount()
 }
diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
index 0f39cade3b..2383d962b2 100644
--- a/src/runtime/os_linux_386.go
+++ b/src/runtime/os_linux_386.go
@@ -4,30 +4,16 @@
 
 package runtime
 
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
+import "unsafe"
 
 const (
-	_AT_NULL    = 0
 	_AT_RANDOM  = 25
 	_AT_SYSINFO = 32
 )
 
-func sysargs(argc int32, argv **byte) {
-	// skip over argv, envv to get to auxv
-	n := argc + 1
-	for argv_index(argv, n) != nil {
-		n++
-	}
-	n++
-	auxv := (*[1 << 28]uint32)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
-
-	for i := 0; auxv[i] != _AT_NULL; i += 2 {
-		switch auxv[i] {
-		case _AT_RANDOM:
-			startupRandomData = (*[16]byte)(unsafe.Pointer(uintptr(auxv[i+1])))[:]
-		}
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 	}
 }
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
index 8fdfb585ba..a61be916b6 100644
--- a/src/runtime/os_linux_arm.go
+++ b/src/runtime/os_linux_arm.go
@@ -4,13 +4,9 @@
 
 package runtime
 
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
+import "unsafe"
 
 const (
-	_AT_NULL     = 0
 	_AT_PLATFORM = 15 //  introduced in at least 2.6.11
 	_AT_HWCAP    = 16 // introduced in at least 2.6.11
 	_AT_RANDOM   = 25 // introduced in 2.6.29
@@ -36,33 +32,23 @@ func checkgoarm() {
 	}
 }
 
-func sysargs(argc int32, argv **byte) {
-	// skip over argv, envv to get to auxv
-	n := argc + 1
-	for argv_index(argv, n) != nil {
-		n++
-	}
-	n++
-	auxv := (*[1 << 28]uint32)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
-
-	for i := 0; auxv[i] != _AT_NULL; i += 2 {
-		switch auxv[i] {
-		case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
-			startupRandomData = (*[16]byte)(unsafe.Pointer(uintptr(auxv[i+1])))[:]
-			// the pointer provided may not be word aligned, so we must treat it
-			// as a byte array.
-			randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
-				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
-
-		case _AT_PLATFORM: // v5l, v6l, v7l
-			t := *(*uint8)(unsafe.Pointer(uintptr(auxv[i+1] + 1)))
-			if '5' <= t && t <= '7' {
-				armArch = t - '0'
-			}
-
-		case _AT_HWCAP: // CPU capability bit flags
-			hwcap = auxv[i+1]
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
+		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
+		// the pointer provided may not be word aligned, so we must treat it
+		// as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+
+	case _AT_PLATFORM: // v5l, v6l, v7l
+		t := *(*uint8)(unsafe.Pointer(val + 1))
+		if '5' <= t && t <= '7' {
+			armArch = t - '0'
 		}
+
+	case _AT_HWCAP: // CPU capability bit flags
+		hwcap = uint32(val)
 	}
 }
 
diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index 57184b0d3a..aa9d4d9885 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -4,36 +4,22 @@
 
 package runtime
 
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
+import "unsafe"
 
 const (
-	_AT_NULL   = 0
 	_AT_RANDOM = 25 // introduced in 2.6.29
 )
 
 var randomNumber uint32
 
-func sysargs(argc int32, argv **byte) {
-	// skip over argv, envv to get to auxv
-	n := argc + 1
-	for argv_index(argv, n) != nil {
-		n++
-	}
-	n++
-	auxv := (*[1 << 29]uint64)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
-
-	for i := 0; auxv[i] != _AT_NULL; i += 2 {
-		switch auxv[i] {
-		case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
-			startupRandomData = (*[16]byte)(unsafe.Pointer(uintptr(auxv[i+1])))[:]
-			// the pointer provided may not be word aligned, so we must treat it
-			// as a byte array.
-			randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
-				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
-		}
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
+		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
+		// the pointer provided may not be word aligned, so we must treat it
+		// as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
 	}
 }
 
diff --git a/src/runtime/os_linux_noauxv.go b/src/runtime/os_linux_noauxv.go
new file mode 100644
index 0000000000..d26c85bbb6
--- /dev/null
+++ b/src/runtime/os_linux_noauxv.go
@@ -0,0 +1,10 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !386,!amd64,!arm,!arm64
+
+package runtime
+
+func archauxv(tag, val uintptr) {
+}
diff --git a/src/runtime/vdso_linux_amd64.go b/src/runtime/vdso_linux_amd64.go
index 42571e063c..1aae9b6570 100644
--- a/src/runtime/vdso_linux_amd64.go
+++ b/src/runtime/vdso_linux_amd64.go
@@ -4,10 +4,7 @@
 
 package runtime
 
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
+import "unsafe"
 
 // Look up symbols in the Linux vDSO.
 
@@ -23,7 +20,6 @@ import (
 const (
 	_AT_RANDOM       = 25
 	_AT_SYSINFO_EHDR = 33
-	_AT_NULL         = 0 /* End of vector */
 
 	_PT_LOAD    = 1 /* Loadable program segment */
 	_PT_DYNAMIC = 2 /* Dynamic linking information */
@@ -294,37 +290,21 @@ func vdso_parse_symbols(info *vdso_info, version int32) {
 	}
 }
 
-func sysargs(argc int32, argv **byte) {
-	n := argc + 1
-
-	// skip envp to get to ELF auxiliary vector.
-	for argv_index(argv, n) != nil {
-		n++
-	}
-
-	// skip NULL separator
-	n++
-
-	// now argv+n is auxv
-	auxv := (*[1 << 32]elf64Auxv)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
-
-	for i := 0; auxv[i].a_type != _AT_NULL; i++ {
-		av := &auxv[i]
-		switch av.a_type {
-		case _AT_SYSINFO_EHDR:
-			if av.a_val == 0 {
-				// Something went wrong
-				continue
-			}
-			var info vdso_info
-			// TODO(rsc): I don't understand why the compiler thinks info escapes
-			// when passed to the three functions below.
-			info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
-			vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(uintptr(av.a_val))))
-			vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
-
-		case _AT_RANDOM:
-			startupRandomData = (*[16]byte)(unsafe.Pointer(uintptr(av.a_val)))[:]
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_SYSINFO_EHDR:
+		if val == 0 {
+			// Something went wrong
+			return
 		}
+		var info vdso_info
+		// TODO(rsc): I don't understand why the compiler thinks info escapes
+		// when passed to the three functions below.
+		info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
+		vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(val)))
+		vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
+
+	case _AT_RANDOM:
+		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 	}
 }
diff --git a/src/runtime/vdso_none.go b/src/runtime/vdso_none.go
index e14e1a4707..efae23f6ee 100644
--- a/src/runtime/vdso_none.go
+++ b/src/runtime/vdso_none.go
@@ -2,10 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !linux !amd64
-// +build !linux !386
-// +build !linux !arm
-// +build !linux !arm64
+// +build !linux
 
 package runtime
 
-- 
cgit v1.3


From 90addd3d41852192ba697d33c9b1660988b82ed7 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 12:32:28 -0400
Subject: runtime: common handling of _AT_RANDOM auxv

The Linux kernel provides 16 bytes of random data via the auxv vector
at startup. Currently we consume this separately on 386, amd64, arm,
and arm64. Now that we have a common auxv parser, handle _AT_RANDOM in
the common path.

Change-Id: Ib69549a1d37e2d07a351cf0f44007bcd24f0d20d
Reviewed-on: https://go-review.googlesource.com/22062
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/os_linux.go         |  9 ++++++++-
 src/runtime/os_linux_386.go     |  7 -------
 src/runtime/os_linux_arm.go     |  9 ++++-----
 src/runtime/os_linux_arm64.go   | 14 ++++----------
 src/runtime/vdso_linux_amd64.go |  4 ----
 5 files changed, 16 insertions(+), 27 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index eeb30c7dd9..4645f1c33d 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -177,7 +177,8 @@ var failallocatestack = []byte("runtime: failed to allocate stack for the new OS
 var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
 
 const (
-	_AT_NULL = 0 // End of vector
+	_AT_NULL   = 0  // End of vector
+	_AT_RANDOM = 25 // introduced in 2.6.29
 )
 
 func sysargs(argc int32, argv **byte) {
@@ -195,6 +196,12 @@ func sysargs(argc int32, argv **byte) {
 	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
 	for i := 0; auxv[i] != _AT_NULL; i += 2 {
 		tag, val := auxv[i], auxv[i+1]
+		switch tag {
+		case _AT_RANDOM:
+			// The kernel provides a pointer to 16-bytes
+			// worth of random data.
+			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
+		}
 		archauxv(tag, val)
 	}
 }
diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
index 2383d962b2..cf031afe45 100644
--- a/src/runtime/os_linux_386.go
+++ b/src/runtime/os_linux_386.go
@@ -4,16 +4,9 @@
 
 package runtime
 
-import "unsafe"
-
 const (
-	_AT_RANDOM  = 25
 	_AT_SYSINFO = 32
 )
 
 func archauxv(tag, val uintptr) {
-	switch tag {
-	case _AT_RANDOM:
-		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
-	}
 }
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
index a61be916b6..8e2765a413 100644
--- a/src/runtime/os_linux_arm.go
+++ b/src/runtime/os_linux_arm.go
@@ -9,7 +9,6 @@ import "unsafe"
 const (
 	_AT_PLATFORM = 15 //  introduced in at least 2.6.11
 	_AT_HWCAP    = 16 // introduced in at least 2.6.11
-	_AT_RANDOM   = 25 // introduced in 2.6.29
 
 	_HWCAP_VFP   = 1 << 6  // introduced in at least 2.6.11
 	_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
@@ -34,10 +33,10 @@ func checkgoarm() {
 
 func archauxv(tag, val uintptr) {
 	switch tag {
-	case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
-		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
-		// the pointer provided may not be word aligned, so we must treat it
-		// as a byte array.
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
 		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
 			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
 
diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index aa9d4d9885..43262aea14 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -4,20 +4,14 @@
 
 package runtime
 
-import "unsafe"
-
-const (
-	_AT_RANDOM = 25 // introduced in 2.6.29
-)
-
 var randomNumber uint32
 
 func archauxv(tag, val uintptr) {
 	switch tag {
-	case _AT_RANDOM: // kernel provides a pointer to 16-bytes worth of random data
-		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
-		// the pointer provided may not be word aligned, so we must treat it
-		// as a byte array.
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
 		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
 			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
 	}
diff --git a/src/runtime/vdso_linux_amd64.go b/src/runtime/vdso_linux_amd64.go
index 1aae9b6570..8a970dfbe6 100644
--- a/src/runtime/vdso_linux_amd64.go
+++ b/src/runtime/vdso_linux_amd64.go
@@ -18,7 +18,6 @@ import "unsafe"
 // http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
 
 const (
-	_AT_RANDOM       = 25
 	_AT_SYSINFO_EHDR = 33
 
 	_PT_LOAD    = 1 /* Loadable program segment */
@@ -303,8 +302,5 @@ func archauxv(tag, val uintptr) {
 		info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
 		vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(val)))
 		vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
-
-	case _AT_RANDOM:
-		startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 	}
 }
-- 
cgit v1.3


From d6b177d1eb7849375e246b97c58406aed7350c86 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 15:08:24 -0400
Subject: runtime: remove empty 386 archauxv

archauxv no longer does anything on 386, so remove it.

Change-Id: I94545238e40fa6a6832a7c3b40aedfc6c1f6a97b
Reviewed-on: https://go-review.googlesource.com/22063
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/os_linux_386.go    | 12 ------------
 src/runtime/os_linux_noauxv.go |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)
 delete mode 100644 src/runtime/os_linux_386.go

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
deleted file mode 100644
index cf031afe45..0000000000
--- a/src/runtime/os_linux_386.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	_AT_SYSINFO = 32
-)
-
-func archauxv(tag, val uintptr) {
-}
diff --git a/src/runtime/os_linux_noauxv.go b/src/runtime/os_linux_noauxv.go
index d26c85bbb6..0b46f594ce 100644
--- a/src/runtime/os_linux_noauxv.go
+++ b/src/runtime/os_linux_noauxv.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !386,!amd64,!arm,!arm64
+// +build !amd64,!arm,!arm64
 
 package runtime
 
-- 
cgit v1.3


From 8ce844e88ed8c16bef7febea05b003b50bd0609e Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 13:27:36 -0400
Subject: runtime: check kernel physical page size during init

The runtime hard-codes an assumed physical page size. If this is
smaller than the kernel's page size or not a multiple of it, sysUnused
may incorrectly release more memory to the system than intended.

Add a runtime startup check that the runtime's assumed physical page
is compatible with the kernel's physical page size.

For #9993.

Change-Id: Ida9d07f93c00ca9a95dd55fc59bf0d8a607f6728
Reviewed-on: https://go-review.googlesource.com/22064
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/os_linux.go | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 4645f1c33d..35b57d8a23 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -178,6 +178,7 @@ var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
 
 const (
 	_AT_NULL   = 0  // End of vector
+	_AT_PAGESZ = 6  // System physical page size
 	_AT_RANDOM = 25 // introduced in 2.6.29
 )
 
@@ -201,7 +202,21 @@ func sysargs(argc int32, argv **byte) {
 			// The kernel provides a pointer to 16-bytes
 			// worth of random data.
 			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
+
+		case _AT_PAGESZ:
+			// Check that the true physical page size is
+			// compatible with the runtime's assumed
+			// physical page size.
+			if sys.PhysPageSize < val {
+				print("runtime: kernel page size (", val, ") is larger than runtime page size (", sys.PhysPageSize, ")\n")
+				exit(1)
+			}
+			if sys.PhysPageSize%val != 0 {
+				print("runtime: runtime page size (", sys.PhysPageSize, ") is not a multiple of kernel page size (", val, ")\n")
+				exit(1)
+			}
 		}
+
 		archauxv(tag, val)
 	}
 }
-- 
cgit v1.3


From 1151473077fb03df798d4eb57a22fa820f9e41f8 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 13:40:35 -0400
Subject: runtime: check that sysUnused is always physical-page aligned

If sysUnused is passed an address or length that is not aligned to the
physical page boundary, the kernel will unmap more memory than the
caller wanted. Add a check for this.

For #9993.

Change-Id: I68ff03032e7b65cf0a853fe706ce21dc7f2aaaf8
Reviewed-on: https://go-review.googlesource.com/22065
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Dave Cheney <dave@cheney.net>
Reviewed-by: Michael Hudson-Doyle <michael.hudson@canonical.com>
---
 src/runtime/mem_linux.go | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index 1ee13bd7e6..61fdcee543 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -132,6 +132,13 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
 		}
 	}
 
+	if uintptr(v)&(sys.PhysPageSize-1) != 0 || n&(sys.PhysPageSize-1) != 0 {
+		// madvise will round this to any physical page
+		// *covered* by this range, so an unaligned madvise
+		// will release more memory than intended.
+		throw("unaligned sysUnused")
+	}
+
 	madvise(v, n, _MADV_DONTNEED)
 }
 
-- 
cgit v1.3


From 2cdcb6f8296b6528bb7d256a45e339c4aefb9109 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 14 Apr 2016 13:41:36 -0400
Subject: runtime: scavenge memory on physical page-aligned boundaries

Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.

Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.

Fixes #9993.

Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/mheap.go | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 895af9f07c..99f7b54fc8 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -824,15 +824,6 @@ func (h *mheap) busyList(npages uintptr) *mSpanList {
 }
 
 func scavengelist(list *mSpanList, now, limit uint64) uintptr {
-	if sys.PhysPageSize > _PageSize {
-		// golang.org/issue/9993
-		// If the physical page size of the machine is larger than
-		// our logical heap page size the kernel may round up the
-		// amount to be freed to its page size and corrupt the heap
-		// pages surrounding the unused block.
-		return 0
-	}
-
 	if list.isEmpty() {
 		return 0
 	}
@@ -840,11 +831,30 @@ func scavengelist(list *mSpanList, now, limit uint64) uintptr {
 	var sumreleased uintptr
 	for s := list.first; s != nil; s = s.next {
 		if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
-			released := (s.npages - s.npreleased) << _PageShift
+			start := uintptr(s.start) << _PageShift
+			end := start + s.npages<<_PageShift
+			if sys.PhysPageSize > _PageSize {
+				// We can only release pages in
+				// PhysPageSize blocks, so round start
+				// and end in. (Otherwise, madvise
+				// will round them *out* and release
+				// more memory than we want.)
+				start = (start + sys.PhysPageSize - 1) &^ (sys.PhysPageSize - 1)
+				end &^= sys.PhysPageSize - 1
+				if start == end {
+					continue
+				}
+			}
+			len := end - start
+
+			released := len - (s.npreleased << _PageShift)
+			if sys.PhysPageSize > _PageSize && released == 0 {
+				continue
+			}
 			memstats.heap_released += uint64(released)
 			sumreleased += released
-			s.npreleased = s.npages
-			sysUnused(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
+			s.npreleased = len >> _PageShift
+			sysUnused(unsafe.Pointer(start), len)
 		}
 	}
 	return sumreleased
-- 
cgit v1.3


From 95df0c6ab93f6a42bdc9fd45500fd4d56bfc9add Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Mon, 28 Mar 2016 21:51:10 -0400
Subject: cmd/compile, etc: use name offset in method tables

Introduce and start using nameOff for two encoded names. This pair
of changes is best done together because the linker's method decoder
expects the method layouts to match.

Precursor to converting all existing name and *string fields to
nameOff.

linux/amd64:
	cmd/go:  -45KB (0.5%)
	jujud:  -389KB (0.6%)

linux/amd64 PIE:
	cmd/go: -170KB (1.4%)
	jujud:  -1.5MB (1.8%)

For #6853.

Change-Id: Ia044423f010fb987ce070b94c46a16fc78666ff6
Reviewed-on: https://go-review.googlesource.com/21396
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/cmd/compile/internal/gc/reflect.go | 14 ++---
 src/cmd/link/internal/ld/decodesym.go  | 18 +++----
 src/cmd/link/internal/ld/symtab.go     |  2 +-
 src/reflect/export_test.go             |  8 +--
 src/reflect/type.go                    | 96 +++++++++++++++++++++-------------
 src/reflect/value.go                   |  8 +--
 src/runtime/iface.go                   | 17 +++---
 src/runtime/runtime1.go                |  6 +++
 src/runtime/type.go                    | 46 ++++++++++------
 9 files changed, 130 insertions(+), 85 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index b8b9369f37..f782ce0974 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -70,7 +70,7 @@ const (
 )
 
 func structfieldSize() int       { return 3 * Widthptr } // Sizeof(runtime.structfield{})
-func imethodSize() int           { return 2 * Widthptr } // Sizeof(runtime.imethod{})
+func imethodSize() int           { return 4 + 4 }        // Sizeof(runtime.imethod{})
 func uncommonSize(t *Type) int { // Sizeof(runtime.uncommontype{})
 	if t.Sym == nil && len(methods(t)) == 0 {
 		return 0
@@ -647,13 +647,11 @@ func dextratypeData(s *Sym, ot int, t *Type) int {
 			pkg = a.pkg
 		}
 		nsym := dname(a.name, "", pkg, exported)
-		ot = dsymptrLSym(lsym, ot, nsym, 0)
+
+		ot = dsymptrOffLSym(lsym, ot, nsym, 0)
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(dtypesym(a.mtype)))
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.isym))
 		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.tsym))
-		if Widthptr == 8 {
-			ot = duintxxLSym(lsym, ot, 0, 4) // pad to reflect.method size
-		}
 	}
 	return ot
 }
@@ -1226,6 +1224,7 @@ ok:
 		dataAdd := imethodSize() * n
 		ot = dextratype(s, ot, t, dataAdd)
 
+		lsym := Linksym(s)
 		for _, a := range m {
 			// ../../../../runtime/type.go:/imethod
 			exported := exportname(a.name)
@@ -1234,8 +1233,9 @@ ok:
 				pkg = a.pkg
 			}
 			nsym := dname(a.name, "", pkg, exported)
-			ot = dsymptrLSym(Linksym(s), ot, nsym, 0)
-			ot = dsymptr(s, ot, dtypesym(a.type_), 0)
+
+			ot = dsymptrOffLSym(lsym, ot, nsym, 0)
+			ot = dsymptrOffLSym(lsym, ot, Linksym(dtypesym(a.type_)), 0)
 		}
 
 	// ../../../../runtime/type.go:/mapType
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index 4725b91d01..5eb20c2fb2 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -262,8 +262,9 @@ const (
 )
 
 // decode_methodsig decodes an array of method signature information.
-// Each element of the array is size bytes. The first word is a
-// reflect.name for the name, the second word is a *rtype for the funcType.
+// Each element of the array is size bytes. The first 4 bytes is a
+// nameOff for the method name, and the next 4 bytes is a typeOff for
+// the function type.
 //
 // Conveniently this is the layout of both runtime.method and runtime.imethod.
 func decode_methodsig(s *LSym, off, size, count int) []methodsig {
@@ -271,7 +272,7 @@ func decode_methodsig(s *LSym, off, size, count int) []methodsig {
 	var methods []methodsig
 	for i := 0; i < count; i++ {
 		buf.WriteString(decodetype_name(s, off))
-		mtypSym := decode_reloc_sym(s, int32(off+SysArch.PtrSize))
+		mtypSym := decode_reloc_sym(s, int32(off+4))
 
 		buf.WriteRune('(')
 		inCount := decodetype_funcincount(mtypSym)
@@ -311,7 +312,7 @@ func decodetype_ifacemethods(s *LSym) []methodsig {
 	}
 	off := int(r.Add) // array of reflect.imethod values
 	numMethods := int(decodetype_ifacemethodcount(s))
-	sizeofIMethod := 2 * SysArch.PtrSize
+	sizeofIMethod := 4 + 4
 	return decode_methodsig(s, off, sizeofIMethod, numMethods)
 }
 
@@ -343,12 +344,7 @@ func decodetype_methods(s *LSym) []methodsig {
 
 	mcount := int(decode_inuxi(s.P[off+SysArch.PtrSize:], 2))
 	moff := int(decode_inuxi(s.P[off+SysArch.PtrSize+2:], 2))
-	off += moff          // offset to array of reflect.method values
-	var sizeofMethod int // sizeof reflect.method in program
-	if SysArch.PtrSize == 4 {
-		sizeofMethod = 4 * SysArch.PtrSize
-	} else {
-		sizeofMethod = 3 * SysArch.PtrSize
-	}
+	off += moff                // offset to array of reflect.method values
+	const sizeofMethod = 4 * 4 // sizeof reflect.method in program
 	return decode_methodsig(s, off, sizeofMethod, mcount)
 }
diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go
index 96e8de5030..1f07a4eb77 100644
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@@ -427,7 +427,7 @@ func symtab() {
 			if !DynlinkingGo() {
 				s.Attr |= AttrHidden
 			}
-			if UseRelro() && len(s.R) > 0 {
+			if UseRelro() {
 				s.Type = obj.STYPERELRO
 				s.Outer = symtyperel
 			} else {
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go
index 2769e0db40..f527434f0d 100644
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -50,7 +50,8 @@ func TypeLinks() []string {
 	for i, offs := range offset {
 		rodata := sections[i]
 		for _, off := range offs {
-			r = append(r, rtypeOff(rodata, off).string)
+			typ := (*rtype)(resolveTypeOff(unsafe.Pointer(rodata), off))
+			r = append(r, typ.string)
 		}
 	}
 	return r
@@ -91,10 +92,11 @@ func FirstMethodNameBytes(t Type) *byte {
 		panic("type has no methods")
 	}
 	m := ut.methods()[0]
-	if *m.name.data(0)&(1<<2) == 0 {
+	mname := t.(*rtype).nameOff(m.name)
+	if *mname.data(0)&(1<<2) == 0 {
 		panic("method name does not have pkgPath *string")
 	}
-	return m.name.bytes
+	return mname.bytes
 }
 
 type OtherPkgFields struct {
diff --git a/src/reflect/type.go b/src/reflect/type.go
index b8c778cc2b..0cae69a79c 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -288,7 +288,7 @@ type typeAlg struct {
 
 // Method on non-interface type
 type method struct {
-	name name    // name of method
+	name nameOff // name of method
 	mtyp typeOff // method type (without receiver)
 	ifn  textOff // fn used in interface call (one-word receiver)
 	tfn  textOff // fn used for normal method call
@@ -347,8 +347,8 @@ type funcType struct {
 
 // imethod represents a method on an interface type
 type imethod struct {
-	name name   // name of method
-	typ  *rtype // .(*FuncType) underneath
+	name nameOff // name of method
+	typ  typeOff // .(*FuncType) underneath
 }
 
 // interfaceType represents an interface type.
@@ -424,19 +424,19 @@ type name struct {
 	bytes *byte
 }
 
-func (n *name) data(off int) *byte {
+func (n name) data(off int) *byte {
 	return (*byte)(add(unsafe.Pointer(n.bytes), uintptr(off)))
 }
 
-func (n *name) isExported() bool {
+func (n name) isExported() bool {
 	return (*n.bytes)&(1<<0) != 0
 }
 
-func (n *name) nameLen() int {
+func (n name) nameLen() int {
 	return int(uint16(*n.data(1))<<8 | uint16(*n.data(2)))
 }
 
-func (n *name) tagLen() int {
+func (n name) tagLen() int {
 	if *n.data(0)&(1<<1) == 0 {
 		return 0
 	}
@@ -444,7 +444,7 @@ func (n *name) tagLen() int {
 	return int(uint16(*n.data(off))<<8 | uint16(*n.data(off + 1)))
 }
 
-func (n *name) name() (s string) {
+func (n name) name() (s string) {
 	if n.bytes == nil {
 		return ""
 	}
@@ -458,7 +458,7 @@ func (n *name) name() (s string) {
 	return s
 }
 
-func (n *name) tag() (s string) {
+func (n name) tag() (s string) {
 	tl := n.tagLen()
 	if tl == 0 {
 		return ""
@@ -470,7 +470,7 @@ func (n *name) tag() (s string) {
 	return s
 }
 
-func (n *name) pkgPath() string {
+func (n name) pkgPath() string {
 	if n.bytes == nil || *n.data(0)&(1<<2) == 0 {
 		return ""
 	}
@@ -480,7 +480,7 @@ func (n *name) pkgPath() string {
 	}
 	var nameOff int32
 	copy((*[4]byte)(unsafe.Pointer(&nameOff))[:], (*[4]byte)(unsafe.Pointer(n.data(off)))[:])
-	pkgPathName := name{(*byte)(resolveTypeOff(unsafe.Pointer(n), nameOff))}
+	pkgPathName := name{(*byte)(resolveTypeOff(unsafe.Pointer(n.bytes), nameOff))}
 	return pkgPathName.name()
 }
 
@@ -605,6 +605,11 @@ func (t *uncommonType) PkgPath() string {
 	return t.pkgPath.name()
 }
 
+// resolveNameOff resolves a name offset from a base pointer.
+// The (*rtype).nameOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveNameOff(ptrInModule unsafe.Pointer, off int32) unsafe.Pointer
+
 // resolveTypeOff resolves an *rtype offset from a base type.
 // The (*rtype).typeOff method is a convenience wrapper for this function.
 // Implemented in the runtime package.
@@ -620,6 +625,12 @@ func resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
 // be resolved correctly. Implemented in the runtime package.
 func addReflectOff(ptr unsafe.Pointer) int32
 
+// resolveReflectType adds a name to the reflection lookup map in the runtime.
+// It returns a new nameOff that can be used to refer to the pointer.
+func resolveReflectName(n name) nameOff {
+	return nameOff(addReflectOff(unsafe.Pointer(n.bytes)))
+}
+
 // resolveReflectType adds a *rtype to the reflection lookup map in the runtime.
 // It returns a new typeOff that can be used to refer to the pointer.
 func resolveReflectType(t *rtype) typeOff {
@@ -633,9 +644,17 @@ func resolveReflectText(ptr unsafe.Pointer) textOff {
 	return textOff(addReflectOff(ptr))
 }
 
+type nameOff int32 // offset to a name
 type typeOff int32 // offset to an *rtype
 type textOff int32 // offset from top of text section
 
+func (t *rtype) nameOff(off nameOff) name {
+	if off == 0 {
+		return name{}
+	}
+	return name{(*byte)(resolveNameOff(unsafe.Pointer(t), int32(off)))}
+}
+
 func (t *rtype) typeOff(off typeOff) *rtype {
 	if off == 0 {
 		return nil
@@ -753,10 +772,11 @@ func (t *rtype) Method(i int) (m Method) {
 		panic("reflect: Method index out of range")
 	}
 	p := ut.methods()[i]
-	m.Name = p.name.name()
+	pname := t.nameOff(p.name)
+	m.Name = pname.name()
 	fl := flag(Func)
-	if !p.name.isExported() {
-		m.PkgPath = p.name.pkgPath()
+	if !pname.isExported() {
+		m.PkgPath = pname.pkgPath()
 		if m.PkgPath == "" {
 			m.PkgPath = ut.pkgPath.name()
 		}
@@ -796,7 +816,8 @@ func (t *rtype) MethodByName(name string) (m Method, ok bool) {
 	utmethods := ut.methods()
 	for i := 0; i < int(ut.mcount); i++ {
 		p := utmethods[i]
-		if p.name.name() == name {
+		pname := t.nameOff(p.name)
+		if pname.name() == name {
 			return t.Method(i), true
 		}
 	}
@@ -1005,14 +1026,15 @@ func (t *interfaceType) Method(i int) (m Method) {
 		return
 	}
 	p := &t.methods[i]
-	m.Name = p.name.name()
-	if !p.name.isExported() {
-		m.PkgPath = p.name.pkgPath()
+	pname := t.nameOff(p.name)
+	m.Name = pname.name()
+	if !pname.isExported() {
+		m.PkgPath = pname.pkgPath()
 		if m.PkgPath == "" {
 			m.PkgPath = t.pkgPath.name()
 		}
 	}
-	m.Type = toType(p.typ)
+	m.Type = toType(t.typeOff(p.typ))
 	m.Index = i
 	return
 }
@@ -1028,7 +1050,7 @@ func (t *interfaceType) MethodByName(name string) (m Method, ok bool) {
 	var p *imethod
 	for i := range t.methods {
 		p = &t.methods[i]
-		if p.name.name() == name {
+		if t.nameOff(p.name).name() == name {
 			return t.Method(i), true
 		}
 	}
@@ -1468,7 +1490,7 @@ func implements(T, V *rtype) bool {
 		for j := 0; j < len(v.methods); j++ {
 			tm := &t.methods[i]
 			vm := &v.methods[j]
-			if vm.name.name() == tm.name.name() && vm.typ == tm.typ {
+			if V.nameOff(vm.name).name() == t.nameOff(tm.name).name() && V.typeOff(vm.typ) == t.typeOff(tm.typ) {
 				if i++; i >= len(t.methods) {
 					return true
 				}
@@ -1486,7 +1508,7 @@ func implements(T, V *rtype) bool {
 	for j := 0; j < int(v.mcount); j++ {
 		tm := &t.methods[i]
 		vm := vmethods[j]
-		if vm.name.name() == tm.name.name() && V.typeOff(vm.mtyp) == tm.typ {
+		if V.nameOff(vm.name).name() == t.nameOff(tm.name).name() && V.typeOff(vm.mtyp) == t.typeOff(tm.typ) {
 			if i++; i >= len(t.methods) {
 				return true
 			}
@@ -2327,12 +2349,13 @@ func StructOf(fields []StructField) Type {
 			case Interface:
 				ift := (*interfaceType)(unsafe.Pointer(ft))
 				for im, m := range ift.methods {
-					if m.name.pkgPath() != "" {
+					if ift.nameOff(m.name).pkgPath() != "" {
 						// TODO(sbinet)
 						panic("reflect: embedded interface with unexported method(s) not implemented")
 					}
 
 					var (
+						mtyp    = ift.typeOff(m.typ)
 						ifield  = i
 						imethod = im
 						ifn     Value
@@ -2340,7 +2363,7 @@ func StructOf(fields []StructField) Type {
 					)
 
 					if ft.kind&kindDirectIface != 0 {
-						tfn = MakeFunc(m.typ, func(in []Value) []Value {
+						tfn = MakeFunc(mtyp, func(in []Value) []Value {
 							var args []Value
 							var recv = in[0]
 							if len(in) > 1 {
@@ -2348,7 +2371,7 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-						ifn = MakeFunc(m.typ, func(in []Value) []Value {
+						ifn = MakeFunc(mtyp, func(in []Value) []Value {
 							var args []Value
 							var recv = in[0]
 							if len(in) > 1 {
@@ -2357,7 +2380,7 @@ func StructOf(fields []StructField) Type {
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
 					} else {
-						tfn = MakeFunc(m.typ, func(in []Value) []Value {
+						tfn = MakeFunc(mtyp, func(in []Value) []Value {
 							var args []Value
 							var recv = in[0]
 							if len(in) > 1 {
@@ -2365,7 +2388,7 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-						ifn = MakeFunc(m.typ, func(in []Value) []Value {
+						ifn = MakeFunc(mtyp, func(in []Value) []Value {
 							var args []Value
 							var recv = Indirect(in[0])
 							if len(in) > 1 {
@@ -2376,8 +2399,8 @@ func StructOf(fields []StructField) Type {
 					}
 
 					methods = append(methods, method{
-						name: m.name,
-						mtyp: resolveReflectType(m.typ),
+						name: resolveReflectName(ift.nameOff(m.name)),
+						mtyp: resolveReflectType(mtyp),
 						ifn:  resolveReflectText(unsafe.Pointer(&ifn)),
 						tfn:  resolveReflectText(unsafe.Pointer(&tfn)),
 					})
@@ -2386,12 +2409,13 @@ func StructOf(fields []StructField) Type {
 				ptr := (*ptrType)(unsafe.Pointer(ft))
 				if unt := ptr.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != "" {
+						mname := ptr.nameOff(m.name)
+						if mname.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
 						methods = append(methods, method{
-							name: m.name,
+							name: resolveReflectName(mname),
 							mtyp: resolveReflectType(ptr.typeOff(m.mtyp)),
 							ifn:  resolveReflectText(ptr.textOff(m.ifn)),
 							tfn:  resolveReflectText(ptr.textOff(m.tfn)),
@@ -2400,12 +2424,13 @@ func StructOf(fields []StructField) Type {
 				}
 				if unt := ptr.elem.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != "" {
+						mname := ptr.nameOff(m.name)
+						if mname.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
 						methods = append(methods, method{
-							name: m.name,
+							name: resolveReflectName(mname),
 							mtyp: resolveReflectType(ptr.elem.typeOff(m.mtyp)),
 							ifn:  resolveReflectText(ptr.elem.textOff(m.ifn)),
 							tfn:  resolveReflectText(ptr.elem.textOff(m.tfn)),
@@ -2415,12 +2440,13 @@ func StructOf(fields []StructField) Type {
 			default:
 				if unt := ft.uncommon(); unt != nil {
 					for _, m := range unt.methods() {
-						if m.name.pkgPath() != "" {
+						mname := ft.nameOff(m.name)
+						if mname.pkgPath() != "" {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
 						methods = append(methods, method{
-							name: m.name,
+							name: resolveReflectName(mname),
 							mtyp: resolveReflectType(ft.typeOff(m.mtyp)),
 							ifn:  resolveReflectText(ft.textOff(m.ifn)),
 							tfn:  resolveReflectText(ft.textOff(m.tfn)),
diff --git a/src/reflect/value.go b/src/reflect/value.go
index d4d317436a..e6b846e5d1 100644
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@@ -553,7 +553,7 @@ func methodReceiver(op string, v Value, methodIndex int) (rcvrtype, t *rtype, fn
 			panic("reflect: internal error: invalid method index")
 		}
 		m := &tt.methods[i]
-		if !m.name.isExported() {
+		if !tt.nameOff(m.name).isExported() {
 			panic("reflect: " + op + " of unexported method")
 		}
 		iface := (*nonEmptyInterface)(v.ptr)
@@ -562,7 +562,7 @@ func methodReceiver(op string, v Value, methodIndex int) (rcvrtype, t *rtype, fn
 		}
 		rcvrtype = iface.itab.typ
 		fn = unsafe.Pointer(&iface.itab.fun[i])
-		t = m.typ
+		t = tt.typeOff(m.typ)
 	} else {
 		rcvrtype = v.typ
 		ut := v.typ.uncommon()
@@ -570,7 +570,7 @@ func methodReceiver(op string, v Value, methodIndex int) (rcvrtype, t *rtype, fn
 			panic("reflect: internal error: invalid method index")
 		}
 		m := ut.methods()[i]
-		if !m.name.isExported() {
+		if !v.typ.nameOff(m.name).isExported() {
 			panic("reflect: " + op + " of unexported method")
 		}
 		ifn := v.typ.textOff(m.ifn)
@@ -1684,7 +1684,7 @@ func (v Value) Type() Type {
 			panic("reflect: internal error: invalid method index")
 		}
 		m := &tt.methods[i]
-		return m.typ
+		return v.typ.typeOff(m.typ)
 	}
 	// Method on concrete type.
 	ut := v.typ.uncommon()
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 84f0ee8f0c..8f179bac80 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -37,7 +37,8 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
 		if canfail {
 			return nil
 		}
-		panic(&TypeAssertionError{"", typ._string, inter.typ._string, inter.mhdr[0].name.name()})
+		name := inter.typ.nameOff(inter.mhdr[0].name)
+		panic(&TypeAssertionError{"", typ._string, inter.typ._string, name.name()})
 	}
 
 	h := itabhash(inter, typ)
@@ -98,20 +99,22 @@ func additab(m *itab, locked, canfail bool) {
 	j := 0
 	for k := 0; k < ni; k++ {
 		i := &inter.mhdr[k]
-		iname := i.name.name()
-		itype := i._type
-		ipkg := i.name.pkgPath()
+		itype := inter.typ.typeOff(i.ityp)
+		name := inter.typ.nameOff(i.name)
+		iname := name.name()
+		ipkg := name.pkgPath()
 		if ipkg == "" {
 			ipkg = inter.pkgpath.name()
 		}
 		for ; j < nt; j++ {
 			t := &xmhdr[j]
-			if typ.typeOff(t.mtyp) == itype && t.name.name() == iname {
-				pkgPath := t.name.pkgPath()
+			tname := typ.nameOff(t.name)
+			if typ.typeOff(t.mtyp) == itype && tname.name() == iname {
+				pkgPath := tname.pkgPath()
 				if pkgPath == "" {
 					pkgPath = x.pkgpath.name()
 				}
-				if t.name.isExported() || pkgPath == ipkg {
+				if tname.isExported() || pkgPath == ipkg {
 					if m != nil {
 						ifn := typ.textOff(t.ifn)
 						*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = ifn
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index 02aeedaf75..9089383904 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -487,6 +487,12 @@ func reflect_typelinks() ([]unsafe.Pointer, [][]int32) {
 	return sections, ret
 }
 
+// reflect_resolveNameOff resolves a name offset from a base pointer.
+//go:linkname reflect_resolveNameOff reflect.resolveNameOff
+func reflect_resolveNameOff(ptrInModule unsafe.Pointer, off int32) unsafe.Pointer {
+	return unsafe.Pointer(resolveNameOff(ptrInModule, nameOff(off)).bytes)
+}
+
 // reflect_resolveTypeOff resolves an *rtype offset from a base type.
 //go:linkname reflect_resolveTypeOff reflect.resolveTypeOff
 func reflect_resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 711753bab5..31f7ff81b8 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -161,11 +161,17 @@ func resolveNameOff(ptrInModule unsafe.Pointer, off nameOff) name {
 		}
 	}
 	if md == nil {
-		println("runtime: nameOff", hex(off), "base", hex(base), "not in ranges:")
-		for next := &firstmoduledata; next != nil; next = next.next {
-			println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+		lock(&reflectOffs.lock)
+		res, found := reflectOffs.m[int32(off)]
+		unlock(&reflectOffs.lock)
+		if !found {
+			println("runtime: nameOff", hex(off), "base", hex(base), "not in ranges:")
+			for next := &firstmoduledata; next != nil; next = next.next {
+				println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+			}
+			throw("runtime: name offset base pointer out of range")
 		}
-		throw("runtime: name offset base pointer out of range")
+		return name{(*byte)(res)}
 	}
 	res := md.types + uintptr(off)
 	if res > md.etypes {
@@ -175,6 +181,10 @@ func resolveNameOff(ptrInModule unsafe.Pointer, off nameOff) name {
 	return name{(*byte)(unsafe.Pointer(res))}
 }
 
+func (t *_type) nameOff(off nameOff) name {
+	return resolveNameOff(unsafe.Pointer(t), off)
+}
+
 func (t *_type) typeOff(off typeOff) *_type {
 	if off == 0 {
 		return nil
@@ -269,7 +279,7 @@ type typeOff int32
 type textOff int32
 
 type method struct {
-	name name
+	name nameOff
 	mtyp typeOff
 	ifn  textOff
 	tfn  textOff
@@ -282,8 +292,8 @@ type uncommontype struct {
 }
 
 type imethod struct {
-	name  name
-	_type *_type
+	name nameOff
+	ityp typeOff
 }
 
 type interfacetype struct {
@@ -354,19 +364,19 @@ type name struct {
 	bytes *byte
 }
 
-func (n *name) data(off int) *byte {
+func (n name) data(off int) *byte {
 	return (*byte)(add(unsafe.Pointer(n.bytes), uintptr(off)))
 }
 
-func (n *name) isExported() bool {
+func (n name) isExported() bool {
 	return (*n.bytes)&(1<<0) != 0
 }
 
-func (n *name) nameLen() int {
+func (n name) nameLen() int {
 	return int(uint16(*n.data(1))<<8 | uint16(*n.data(2)))
 }
 
-func (n *name) tagLen() int {
+func (n name) tagLen() int {
 	if *n.data(0)&(1<<1) == 0 {
 		return 0
 	}
@@ -374,7 +384,7 @@ func (n *name) tagLen() int {
 	return int(uint16(*n.data(off))<<8 | uint16(*n.data(off + 1)))
 }
 
-func (n *name) name() (s string) {
+func (n name) name() (s string) {
 	if n.bytes == nil {
 		return ""
 	}
@@ -388,7 +398,7 @@ func (n *name) name() (s string) {
 	return s
 }
 
-func (n *name) tag() (s string) {
+func (n name) tag() (s string) {
 	tl := n.tagLen()
 	if tl == 0 {
 		return ""
@@ -400,7 +410,7 @@ func (n *name) tag() (s string) {
 	return s
 }
 
-func (n *name) pkgPath() string {
+func (n name) pkgPath() string {
 	if n.bytes == nil || *n.data(0)&(1<<2) == 0 {
 		return ""
 	}
@@ -545,13 +555,15 @@ func typesEqual(t, v *_type) bool {
 		for i := range it.mhdr {
 			tm := &it.mhdr[i]
 			vm := &iv.mhdr[i]
-			if tm.name.name() != vm.name.name() {
+			tname := it.typ.nameOff(tm.name)
+			vname := iv.typ.nameOff(vm.name)
+			if tname.name() != vname.name() {
 				return false
 			}
-			if tm.name.pkgPath() != vm.name.pkgPath() {
+			if tname.pkgPath() != vname.pkgPath() {
 				return false
 			}
-			if !typesEqual(tm._type, vm._type) {
+			if !typesEqual(it.typ.typeOff(tm.ityp), iv.typ.typeOff(vm.ityp)) {
 				return false
 			}
 		}
-- 
cgit v1.3


From 411a0adc9bbee3a981af93de5f83b13f26f0413f Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sun, 17 Apr 2016 19:53:39 -0700
Subject: runtime: add benchmarks for in-place append

Change-Id: I2b43cc976d2efbf8b41170be536fdd10364b65e5
Reviewed-on: https://go-review.googlesource.com/22190
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/append_test.go | 129 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go
index 6d7836a351..cd28e3dca6 100644
--- a/src/runtime/append_test.go
+++ b/src/runtime/append_test.go
@@ -234,3 +234,132 @@ func BenchmarkCopy16String(b *testing.B)   { benchmarkCopyStr(b, 16) }
 func BenchmarkCopy32String(b *testing.B)   { benchmarkCopyStr(b, 32) }
 func BenchmarkCopy128String(b *testing.B)  { benchmarkCopyStr(b, 128) }
 func BenchmarkCopy1024String(b *testing.B) { benchmarkCopyStr(b, 1024) }
+
+var (
+	sByte []byte
+	s1Ptr []uintptr
+	s2Ptr [][2]uintptr
+	s3Ptr [][3]uintptr
+	s4Ptr [][4]uintptr
+)
+
+// BenchmarkAppendInPlace tests the performance of append
+// when the result is being written back to the same slice.
+// In order for the in-place optimization to occur,
+// the slice must be referred to by address;
+// using a global is an easy way to trigger that.
+// We test the "grow" and "no grow" paths separately,
+// but not the "normal" (occasionally grow) path,
+// because it is a blend of the other two.
+// We use small numbers and small sizes in an attempt
+// to avoid benchmarking memory allocation and copying.
+// We use scalars instead of pointers in an attempt
+// to avoid benchmarking the write barriers.
+// We benchmark four common sizes (byte, pointer, string/interface, slice),
+// and one larger size.
+func BenchmarkAppendInPlace(b *testing.B) {
+	b.Run("NoGrow", func(b *testing.B) {
+		const C = 128
+
+		b.Run("Byte", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				sByte = make([]byte, C)
+				for j := 0; j < C; j++ {
+					sByte = append(sByte, 0x77)
+				}
+			}
+		})
+
+		b.Run("1Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s1Ptr = make([]uintptr, C)
+				for j := 0; j < C; j++ {
+					s1Ptr = append(s1Ptr, 0x77)
+				}
+			}
+		})
+
+		b.Run("2Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s2Ptr = make([][2]uintptr, C)
+				for j := 0; j < C; j++ {
+					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
+				}
+			}
+		})
+
+		b.Run("3Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s3Ptr = make([][3]uintptr, C)
+				for j := 0; j < C; j++ {
+					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
+				}
+			}
+		})
+
+		b.Run("4Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s4Ptr = make([][4]uintptr, C)
+				for j := 0; j < C; j++ {
+					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
+				}
+			}
+		})
+
+	})
+
+	b.Run("Grow", func(b *testing.B) {
+		const C = 5
+
+		b.Run("Byte", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				sByte = make([]byte, 0)
+				for j := 0; j < C; j++ {
+					sByte = append(sByte, 0x77)
+					sByte = sByte[:cap(sByte)]
+				}
+			}
+		})
+
+		b.Run("1Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s1Ptr = make([]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s1Ptr = append(s1Ptr, 0x77)
+					s1Ptr = s1Ptr[:cap(s1Ptr)]
+				}
+			}
+		})
+
+		b.Run("2Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s2Ptr = make([][2]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
+					s2Ptr = s2Ptr[:cap(s2Ptr)]
+				}
+			}
+		})
+
+		b.Run("3Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s3Ptr = make([][3]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
+					s3Ptr = s3Ptr[:cap(s3Ptr)]
+				}
+			}
+		})
+
+		b.Run("4Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s4Ptr = make([][4]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
+					s4Ptr = s4Ptr[:cap(s4Ptr)]
+				}
+			}
+		})
+
+	})
+}
-- 
cgit v1.3


From a4dd6ea1524901fab5deac60772345babd058ae7 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Tue, 19 Apr 2016 09:18:59 -0700
Subject: runtime: add maxSliceCap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This avoids expensive division calculations
for many common slice element sizes.

name                      old time/op  new time/op  delta
MakeSlice-8               51.9ns ± 3%  35.1ns ± 2%  -32.41%  (p=0.000 n=10+10)
GrowSliceBytes-8          44.1ns ± 2%  44.1ns ± 1%     ~     (p=0.984 n=10+10)
GrowSliceInts-8           60.9ns ± 3%  60.9ns ± 3%     ~     (p=0.698 n=10+10)
GrowSlicePtr-8             131ns ± 1%   120ns ± 2%   -8.41%   (p=0.000 n=8+10)
GrowSliceStruct24Bytes-8   111ns ± 2%   103ns ± 3%   -7.23%    (p=0.000 n=8+8)

Change-Id: I2630eb3d73c814db030cad16e620ea7fecbbd312
Reviewed-on: https://go-review.googlesource.com/22223
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/malloc.go |  2 +-
 src/runtime/slice.go  | 35 +++++++++++++++++++++++++----------
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index ee4728c9a5..a3e55ec2fb 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -792,7 +792,7 @@ func newarray(typ *_type, n uintptr) unsafe.Pointer {
 	if typ.kind&kindNoPointers != 0 {
 		flags |= flagNoScan
 	}
-	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/typ.size) {
+	if int(n) < 0 || n > maxSliceCap(typ.size) {
 		panic(plainError("runtime: allocation size out of range"))
 	}
 	return mallocgc(typ.size*n, typ, flags)
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index f36ec0b466..f9414d7658 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -14,6 +14,28 @@ type slice struct {
 	cap   int
 }
 
+// maxElems is a lookup table containing the maximum capacity for a slice.
+// The index is the size of the slice element.
+var maxElems = [...]uintptr{
+	^uintptr(0),
+	_MaxMem / 1, _MaxMem / 2, _MaxMem / 3, _MaxMem / 4,
+	_MaxMem / 5, _MaxMem / 6, _MaxMem / 7, _MaxMem / 8,
+	_MaxMem / 9, _MaxMem / 10, _MaxMem / 11, _MaxMem / 12,
+	_MaxMem / 13, _MaxMem / 14, _MaxMem / 15, _MaxMem / 16,
+	_MaxMem / 17, _MaxMem / 18, _MaxMem / 19, _MaxMem / 20,
+	_MaxMem / 21, _MaxMem / 22, _MaxMem / 23, _MaxMem / 24,
+	_MaxMem / 25, _MaxMem / 26, _MaxMem / 27, _MaxMem / 28,
+	_MaxMem / 29, _MaxMem / 30, _MaxMem / 31, _MaxMem / 32,
+}
+
+// maxSliceCap returns the maximum capacity for a slice.
+func maxSliceCap(elemsize uintptr) uintptr {
+	if elemsize < uintptr(len(maxElems)) {
+		return maxElems[elemsize]
+	}
+	return _MaxMem / elemsize
+}
+
 // TODO: take uintptrs instead of int64s?
 func makeslice(t *slicetype, len64, cap64 int64) slice {
 	// NOTE: The len > maxElements check here is not strictly necessary,
@@ -22,11 +44,7 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
 
-	maxElements := ^uintptr(0)
-	if t.elem.size > 0 {
-		maxElements = _MaxMem / t.elem.size
-	}
-
+	maxElements := maxSliceCap(t.elem.size)
 	len := int(len64)
 	if len64 < 0 || int64(len) != len64 || uintptr(len) > maxElements {
 		panic(errorString("makeslice: len out of range"))
@@ -84,27 +102,24 @@ func growslice(t *slicetype, old slice, cap int) slice {
 		}
 	}
 
-	var lenmem, capmem, maxcap uintptr
+	var lenmem, capmem uintptr
 	const ptrSize = unsafe.Sizeof((*byte)(nil))
 	switch et.size {
 	case 1:
 		lenmem = uintptr(old.len)
 		capmem = roundupsize(uintptr(newcap))
 		newcap = int(capmem)
-		maxcap = _MaxMem
 	case ptrSize:
 		lenmem = uintptr(old.len) * ptrSize
 		capmem = roundupsize(uintptr(newcap) * ptrSize)
 		newcap = int(capmem / ptrSize)
-		maxcap = _MaxMem / ptrSize
 	default:
 		lenmem = uintptr(old.len) * et.size
 		capmem = roundupsize(uintptr(newcap) * et.size)
 		newcap = int(capmem / et.size)
-		maxcap = _MaxMem / et.size
 	}
 
-	if cap < old.cap || uintptr(newcap) > maxcap {
+	if cap < old.cap || uintptr(newcap) > maxSliceCap(et.size) {
 		panic(errorString("growslice: cap out of range"))
 	}
 
-- 
cgit v1.3


From 998c8e034c98fccb52b0692b97d36a5a6d3bd31a Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 19 Apr 2016 12:48:09 -0700
Subject: cmd/compile: convT2{I,E} don't handle direct interfaces

We now inline type to interface conversions when the type
is pointer-shaped.  No need to keep code to handle that in
convT2{I,E}.

Change-Id: I3a6668259556077cbb2986a9e8fe42a625d506c9
Reviewed-on: https://go-review.googlesource.com/22249
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michel Lespinasse <walken@google.com>
---
 src/runtime/iface.go | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 8f179bac80..352ff77465 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -160,18 +160,16 @@ func convT2E(t *_type, elem unsafe.Pointer, x unsafe.Pointer) (e eface) {
 		msanread(elem, t.size)
 	}
 	if isDirectIface(t) {
-		e._type = t
-		typedmemmove(t, unsafe.Pointer(&e.data), elem)
-	} else {
-		if x == nil {
-			x = newobject(t)
-		}
+		throw("direct convT2E")
+	}
+	if x == nil {
+		x = newobject(t)
 		// TODO: We allocate a zeroed object only to overwrite it with
 		// actual data. Figure out how to avoid zeroing. Also below in convT2I.
-		typedmemmove(t, x, elem)
-		e._type = t
-		e.data = x
 	}
+	typedmemmove(t, x, elem)
+	e._type = t
+	e.data = x
 	return
 }
 
@@ -184,16 +182,14 @@ func convT2I(tab *itab, elem unsafe.Pointer, x unsafe.Pointer) (i iface) {
 		msanread(elem, t.size)
 	}
 	if isDirectIface(t) {
-		i.tab = tab
-		typedmemmove(t, unsafe.Pointer(&i.data), elem)
-	} else {
-		if x == nil {
-			x = newobject(t)
-		}
-		typedmemmove(t, x, elem)
-		i.tab = tab
-		i.data = x
+		throw("direct convT2I")
+	}
+	if x == nil {
+		x = newobject(t)
 	}
+	typedmemmove(t, x, elem)
+	i.tab = tab
+	i.data = x
 	return
 }
 
-- 
cgit v1.3


From 58012ea785851052a28f20544d7e2da0e0e6eefa Mon Sep 17 00:00:00 2001
From: Julia Hansbrough <flowerhack@google.com>
Date: Mon, 18 Apr 2016 15:53:29 -0700
Subject: runtime: updated SIGSYS to cause a panic + stacktrace

On GNU/Linux, SIGSYS is specified to cause the process to terminate
without a core dump. In https://codereview.appspot.com/3749041 , it
appears that Golang accidentally introduced incorrect behavior for
this signal, which caused Golang processes to keep running after
receiving SIGSYS. This change reverts it to the old/correct behavior.

Updates #15204

Change-Id: I3aa48a9499c1bc36fa5d3f40c088fdd7599e0db5
Reviewed-on: https://go-review.googlesource.com/22202
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/sigtab_linux_generic.go | 2 +-
 src/runtime/sigtab_linux_mips64x.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/sigtab_linux_generic.go b/src/runtime/sigtab_linux_generic.go
index 32c40c4768..e97497f18c 100644
--- a/src/runtime/sigtab_linux_generic.go
+++ b/src/runtime/sigtab_linux_generic.go
@@ -45,7 +45,7 @@ var sigtable = [...]sigTabT{
 	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
 	/* 29 */ {_SigNotify, "SIGIO: i/o now possible"},
 	/* 30 */ {_SigNotify, "SIGPWR: power failure restart"},
-	/* 31 */ {_SigNotify, "SIGSYS: bad system call"},
+	/* 31 */ {_SigThrow + _SigUnblock, "SIGSYS: bad system call"},
 	/* 32 */ {_SigSetStack + _SigUnblock, "signal 32"}, /* SIGCANCEL; see issue 6997 */
 	/* 33 */ {_SigSetStack + _SigUnblock, "signal 33"}, /* SIGSETXID; see issues 3871, 9400, 12498 */
 	/* 34 */ {_SigNotify, "signal 34"},
diff --git a/src/runtime/sigtab_linux_mips64x.go b/src/runtime/sigtab_linux_mips64x.go
index dbd50f7b1f..f7d81811ba 100644
--- a/src/runtime/sigtab_linux_mips64x.go
+++ b/src/runtime/sigtab_linux_mips64x.go
@@ -25,7 +25,7 @@ var sigtable = [...]sigTabT{
 	/* 9 */ {0, "SIGKILL: kill"},
 	/* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
 	/* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
-	/* 12 */ {_SigNotify, "SIGSYS: bad system call"},
+	/* 12 */ {_SigThrow + _SigUnblock, "SIGSYS: bad system call"},
 	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
 	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
 	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
-- 
cgit v1.3


From 0150f15a924a7b4ac0c794012f6b12c8aa406b54 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Tue, 19 Apr 2016 15:14:26 -0700
Subject: runtime: call mallocgc directly from makeslice and growslice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The extra checks provided by newarray are
redundant in these cases.

This shrinks by one frame the call stack expected
by the pprof test.

name                      old time/op  new time/op  delta
MakeSlice-8               34.3ns ± 2%  30.5ns ± 3%  -11.03%  (p=0.000 n=24+22)
GrowSlicePtr-8             134ns ± 2%   129ns ± 3%   -3.25%  (p=0.000 n=25+24)

Change-Id: Icd828655906b921c732701fd9d61da3fa217b0af
Reviewed-on: https://go-review.googlesource.com/22276
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/runtime/pprof/mprof_test.go | 2 +-
 src/runtime/slice.go            | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go
index d15102c703..0fff9d46d9 100644
--- a/src/runtime/pprof/mprof_test.go
+++ b/src/runtime/pprof/mprof_test.go
@@ -82,7 +82,7 @@ func TestMemoryProfiler(t *testing.T) {
 #	0x[0-9,a-f]+	runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:61
 `, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
 
-		fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+		fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
 #	0x[0-9,a-f]+	runtime/pprof_test\.allocateTransient2M\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:27
 #	0x[0-9,a-f]+	runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:62
 `, memoryProfilerRun, (2<<20)*memoryProfilerRun),
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index f9414d7658..873e97ebff 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -55,7 +55,12 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 		panic(errorString("makeslice: cap out of range"))
 	}
 
-	p := newarray(t.elem, uintptr(cap))
+	et := t.elem
+	var flags uint32
+	if et.kind&kindNoPointers != 0 {
+		flags = flagNoScan
+	}
+	p := mallocgc(et.size*uintptr(cap), et, flags)
 	return slice{p, len, cap}
 }
 
@@ -130,7 +135,7 @@ func growslice(t *slicetype, old slice, cap int) slice {
 		memclr(add(p, lenmem), capmem-lenmem)
 	} else {
 		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
-		p = newarray(et, uintptr(newcap))
+		p = mallocgc(capmem, et, 0)
 		if !writeBarrier.enabled {
 			memmove(p, old.array, lenmem)
 		} else {
-- 
cgit v1.3


From bfe0cbdc50cbc6a632d1e5ebbdcc625d69451935 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 19 Apr 2016 15:38:59 -0700
Subject: cmd/compile,runtime: pass elem type to {make,grow}slice

No point in passing the slice type to these functions.
All they need is the element type.  One less indirection,
maybe a few less []T type descriptors in the binary.

Change-Id: Ib0b83b5f14ca21d995ecc199ce8ac00c4eb375e6
Reviewed-on: https://go-review.googlesource.com/22275
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
---
 src/cmd/compile/internal/gc/cgen.go |  2 +-
 src/cmd/compile/internal/gc/ssa.go  |  2 +-
 src/cmd/compile/internal/gc/walk.go |  8 ++++----
 src/runtime/slice.go                | 16 +++++++---------
 4 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go
index 658cc8a50e..5c5bedaa31 100644
--- a/src/cmd/compile/internal/gc/cgen.go
+++ b/src/cmd/compile/internal/gc/cgen.go
@@ -2876,7 +2876,7 @@ func cgen_append(n, res *Node) {
 	arg.Addable = true
 	arg.Xoffset = Ctxt.FixedFrameSize()
 	arg.Type = Ptrto(Types[TUINT8])
-	Cgen(typename(res.Type), &arg)
+	Cgen(typename(res.Type.Elem()), &arg)
 	arg.Xoffset += int64(Widthptr)
 
 	arg.Type = Types[Tptr]
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index c4008c9ce1..11e362c116 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2185,7 +2185,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 
 	// Call growslice
 	s.startBlock(grow)
-	taddr := s.newValue1A(ssa.OpAddr, Types[TUINTPTR], &ssa.ExternSymbol{Types[TUINTPTR], typenamesym(n.Type)}, s.sb)
+	taddr := s.newValue1A(ssa.OpAddr, Types[TUINTPTR], &ssa.ExternSymbol{Types[TUINTPTR], typenamesym(n.Type.Elem())}, s.sb)
 
 	r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl)
 
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index e4d93339a9..82ac74ae33 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -1420,11 +1420,11 @@ opswitch:
 			r = walkexpr(r, init)
 			n = r
 		} else {
-			// makeslice(t *Type, nel int64, max int64) (ary []any)
+			// makeslice(et *Type, nel int64, max int64) (ary []any)
 			fn := syslook("makeslice")
 
 			fn = substArgTypes(fn, t.Elem()) // any-1
-			n = mkcall1(fn, n.Type, init, typename(n.Type), conv(l, Types[TINT64]), conv(r, Types[TINT64]))
+			n = mkcall1(fn, n.Type, init, typename(t.Elem()), conv(l, Types[TINT64]), conv(r, Types[TINT64]))
 		}
 
 	case ORUNESTR:
@@ -2799,7 +2799,7 @@ func appendslice(n *Node, init *Nodes) *Node {
 	fn = substArgTypes(fn, s.Type.Elem(), s.Type.Elem())
 
 	// s = growslice(T, s, n)
-	nif.Nbody.Set1(Nod(OAS, s, mkcall1(fn, s.Type, &nif.Ninit, typename(s.Type), s, nn)))
+	nif.Nbody.Set1(Nod(OAS, s, mkcall1(fn, s.Type, &nif.Ninit, typename(s.Type.Elem()), s, nn)))
 	l = append(l, nif)
 
 	// s = s[:n]
@@ -2929,7 +2929,7 @@ func walkappend(n *Node, init *Nodes, dst *Node) *Node {
 	fn = substArgTypes(fn, ns.Type.Elem(), ns.Type.Elem())
 
 	nx.Nbody.Set1(Nod(OAS, ns,
-		mkcall1(fn, ns.Type, &nx.Ninit, typename(ns.Type), ns,
+		mkcall1(fn, ns.Type, &nx.Ninit, typename(ns.Type.Elem()), ns,
 			Nod(OADD, Nod(OLEN, ns, nil), na))))
 
 	l = append(l, nx)
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 873e97ebff..e86c1ce2c8 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -37,14 +37,14 @@ func maxSliceCap(elemsize uintptr) uintptr {
 }
 
 // TODO: take uintptrs instead of int64s?
-func makeslice(t *slicetype, len64, cap64 int64) slice {
+func makeslice(et *_type, len64, cap64 int64) slice {
 	// NOTE: The len > maxElements check here is not strictly necessary,
 	// but it produces a 'len out of range' error instead of a 'cap out of range' error
 	// when someone does make([]T, bignumber). 'cap out of range' is true too,
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
 
-	maxElements := maxSliceCap(t.elem.size)
+	maxElements := maxSliceCap(et.size)
 	len := int(len64)
 	if len64 < 0 || int64(len) != len64 || uintptr(len) > maxElements {
 		panic(errorString("makeslice: len out of range"))
@@ -55,7 +55,6 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 		panic(errorString("makeslice: cap out of range"))
 	}
 
-	et := t.elem
 	var flags uint32
 	if et.kind&kindNoPointers != 0 {
 		flags = flagNoScan
@@ -65,7 +64,7 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 }
 
 // growslice handles slice growth during append.
-// It is passed the slice type, the old slice, and the desired new minimum capacity,
+// It is passed the slice element type, the old slice, and the desired new minimum capacity,
 // and it returns a new slice with at least that capacity, with the old data
 // copied into it.
 // The new slice's length is set to the old slice's length,
@@ -74,16 +73,15 @@ func makeslice(t *slicetype, len64, cap64 int64) slice {
 // to calculate where to write new values during an append.
 // TODO: When the old backend is gone, reconsider this decision.
 // The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
-func growslice(t *slicetype, old slice, cap int) slice {
+func growslice(et *_type, old slice, cap int) slice {
 	if raceenabled {
-		callerpc := getcallerpc(unsafe.Pointer(&t))
-		racereadrangepc(old.array, uintptr(old.len*int(t.elem.size)), callerpc, funcPC(growslice))
+		callerpc := getcallerpc(unsafe.Pointer(&et))
+		racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, funcPC(growslice))
 	}
 	if msanenabled {
-		msanread(old.array, uintptr(old.len*int(t.elem.size)))
+		msanread(old.array, uintptr(old.len*int(et.size)))
 	}
 
-	et := t.elem
 	if et.size == 0 {
 		if cap < old.cap {
 			panic(errorString("growslice: cap out of range"))
-- 
cgit v1.3


From 001e8e8070e8ed3a578dbad93cc3f70257e965bd Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 19 Apr 2016 19:35:10 -0700
Subject: runtime: simplify mallocgc flag argument

mallocgc can calculate noscan itself.  The only remaining
flag argument is needzero, so we just make that a boolean arg.

Fixes #15379

Change-Id: I839a70790b2a0c9dbcee2600052bfbd6c8148e20
Reviewed-on: https://go-review.googlesource.com/22290
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/chan.go      |  2 +-
 src/runtime/malloc.go    | 48 +++++++++++-------------------------------------
 src/runtime/mfinal.go    |  2 +-
 src/runtime/os1_plan9.go |  4 ++--
 src/runtime/panic.go     |  2 +-
 src/runtime/select.go    |  2 +-
 src/runtime/slice.go     | 11 +++--------
 src/runtime/string.go    |  6 +++---
 8 files changed, 23 insertions(+), 54 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 8543cb4c9c..3fb0236785 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -74,7 +74,7 @@ func makechan(t *chantype, size int64) *hchan {
 		// buf points into the same allocation, elemtype is persistent.
 		// SudoG's are referenced from their owning thread so they can't be collected.
 		// TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
-		c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, flagNoScan))
+		c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true))
 		if size > 0 && elem.size != 0 {
 			c.buf = add(unsafe.Pointer(c), hchanSize)
 		} else {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index a3e55ec2fb..95d24a467a 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -87,9 +87,6 @@ import (
 const (
 	debugMalloc = false
 
-	flagNoScan = _FlagNoScan
-	flagNoZero = _FlagNoZero
-
 	maxTinySize   = _TinySize
 	tinySizeClass = _TinySizeClass
 	maxSmallSize  = _MaxSmallSize
@@ -487,16 +484,10 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
 // base address for all 0-byte allocations
 var zerobase uintptr
 
-const (
-	// flags to malloc
-	_FlagNoScan = 1 << 0 // GC doesn't have to scan object
-	_FlagNoZero = 1 << 1 // don't zero memory
-)
-
 // Allocate an object of size bytes.
 // Small objects are allocated from the per-P cache's free lists.
 // Large objects (> 32 kB) are allocated straight from the heap.
-func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
+func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	if gcphase == _GCmarktermination {
 		throw("mallocgc called with gcphase == _GCmarktermination")
 	}
@@ -505,10 +496,6 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
 		return unsafe.Pointer(&zerobase)
 	}
 
-	if flags&flagNoScan == 0 && typ == nil {
-		throw("malloc missing type")
-	}
-
 	if debug.sbrk != 0 {
 		align := uintptr(16)
 		if typ != nil {
@@ -553,14 +540,15 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
 	c := gomcache()
 	var s *mspan
 	var x unsafe.Pointer
+	noscan := typ == nil || typ.kind&kindNoPointers != 0
 	if size <= maxSmallSize {
-		if flags&flagNoScan != 0 && size < maxTinySize {
+		if noscan && size < maxTinySize {
 			// Tiny allocator.
 			//
 			// Tiny allocator combines several tiny allocation requests
 			// into a single memory block. The resulting memory block
 			// is freed when all subobjects are unreachable. The subobjects
-			// must be FlagNoScan (don't have pointers), this ensures that
+			// must be noscan (don't have pointers), this ensures that
 			// the amount of potentially wasted memory is bounded.
 			//
 			// Size of the memory block used for combining (maxTinySize) is tunable.
@@ -650,7 +638,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
 			// prefetchnta offers best performance, see change list message.
 			prefetchnta(uintptr(v.ptr().next))
 			x = unsafe.Pointer(v)
-			if flags&flagNoZero == 0 {
+			if needzero {
 				v.ptr().next = 0
 				if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
 					memclr(unsafe.Pointer(v), size)
@@ -661,13 +649,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
 		var s *mspan
 		shouldhelpgc = true
 		systemstack(func() {
-			s = largeAlloc(size, flags)
+			s = largeAlloc(size, needzero)
 		})
 		x = unsafe.Pointer(uintptr(s.start << pageShift))
 		size = s.elemsize
 	}
 
-	if flags&flagNoScan != 0 {
+	if noscan {
 		// All objects are pre-marked as noscan. Nothing to do.
 	} else {
 		// If allocating a defer+arg block, now that we've picked a malloc size
@@ -747,7 +735,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
 	return x
 }
 
-func largeAlloc(size uintptr, flag uint32) *mspan {
+func largeAlloc(size uintptr, needzero bool) *mspan {
 	// print("largeAlloc size=", size, "\n")
 
 	if size+_PageSize < size {
@@ -763,7 +751,7 @@ func largeAlloc(size uintptr, flag uint32) *mspan {
 	// pays the debt down to npage pages.
 	deductSweepCredit(npages*_PageSize, npages)
 
-	s := mheap_.alloc(npages, 0, true, flag&_FlagNoZero == 0)
+	s := mheap_.alloc(npages, 0, true, needzero)
 	if s == nil {
 		throw("out of memory")
 	}
@@ -774,11 +762,7 @@ func largeAlloc(size uintptr, flag uint32) *mspan {
 
 // implementation of new builtin
 func newobject(typ *_type) unsafe.Pointer {
-	flags := uint32(0)
-	if typ.kind&kindNoPointers != 0 {
-		flags |= flagNoScan
-	}
-	return mallocgc(typ.size, typ, flags)
+	return mallocgc(typ.size, typ, true)
 }
 
 //go:linkname reflect_unsafe_New reflect.unsafe_New
@@ -788,14 +772,10 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer {
 
 // implementation of make builtin for slices
 func newarray(typ *_type, n uintptr) unsafe.Pointer {
-	flags := uint32(0)
-	if typ.kind&kindNoPointers != 0 {
-		flags |= flagNoScan
-	}
 	if int(n) < 0 || n > maxSliceCap(typ.size) {
 		panic(plainError("runtime: allocation size out of range"))
 	}
-	return mallocgc(typ.size*n, typ, flags)
+	return mallocgc(typ.size*n, typ, true)
 }
 
 //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
@@ -803,12 +783,6 @@ func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer {
 	return newarray(typ, n)
 }
 
-// rawmem returns a chunk of pointerless memory. It is
-// not zeroed.
-func rawmem(size uintptr) unsafe.Pointer {
-	return mallocgc(size, nil, flagNoScan|flagNoZero)
-}
-
 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
 	mp.mcache.next_sample = nextSample()
 	mProf_Malloc(x, size)
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index b862f019b6..f698e72709 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -172,7 +172,7 @@ func runfinq() {
 					// all not yet finalized objects are stored in finq.
 					// If we do not mark it as FlagNoScan,
 					// the last finalized object is not collected.
-					frame = mallocgc(framesz, nil, flagNoScan)
+					frame = mallocgc(framesz, nil, true)
 					framecap = framesz
 				}
 
diff --git a/src/runtime/os1_plan9.go b/src/runtime/os1_plan9.go
index 2c257442ba..eb7a0c6481 100644
--- a/src/runtime/os1_plan9.go
+++ b/src/runtime/os1_plan9.go
@@ -17,10 +17,10 @@ func mpreinit(mp *m) {
 	// Initialize stack and goroutine for note handling.
 	mp.gsignal = malg(32 * 1024)
 	mp.gsignal.m = mp
-	mp.notesig = (*int8)(mallocgc(_ERRMAX, nil, _FlagNoScan))
+	mp.notesig = (*int8)(mallocgc(_ERRMAX, nil, true))
 	// Initialize stack for handling strings from the
 	// errstr system call, as used in package syscall.
-	mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, _FlagNoScan))
+	mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, true))
 }
 
 func msigsave(mp *m) {
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 59fbc83369..382a20e4e7 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -205,7 +205,7 @@ func newdefer(siz int32) *_defer {
 	if d == nil {
 		// Allocate new defer+args.
 		total := roundupsize(totaldefersize(uintptr(siz)))
-		d = (*_defer)(mallocgc(total, deferType, 0))
+		d = (*_defer)(mallocgc(total, deferType, true))
 	}
 	d.siz = siz
 	gp := mp.curg
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 9810db5453..433048fb79 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -626,7 +626,7 @@ const (
 func reflect_rselect(cases []runtimeSelect) (chosen int, recvOK bool) {
 	// flagNoScan is safe here, because all objects are also referenced from cases.
 	size := selectsize(uintptr(len(cases)))
-	sel := (*hselect)(mallocgc(size, nil, flagNoScan))
+	sel := (*hselect)(mallocgc(size, nil, true))
 	newselect(sel, int64(size), int32(len(cases)))
 	r := new(bool)
 	for i := range cases {
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index e86c1ce2c8..e15e6c4dc6 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -43,7 +43,6 @@ func makeslice(et *_type, len64, cap64 int64) slice {
 	// when someone does make([]T, bignumber). 'cap out of range' is true too,
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
-
 	maxElements := maxSliceCap(et.size)
 	len := int(len64)
 	if len64 < 0 || int64(len) != len64 || uintptr(len) > maxElements {
@@ -55,11 +54,7 @@ func makeslice(et *_type, len64, cap64 int64) slice {
 		panic(errorString("makeslice: cap out of range"))
 	}
 
-	var flags uint32
-	if et.kind&kindNoPointers != 0 {
-		flags = flagNoScan
-	}
-	p := mallocgc(et.size*uintptr(cap), et, flags)
+	p := mallocgc(et.size*uintptr(cap), et, true)
 	return slice{p, len, cap}
 }
 
@@ -128,12 +123,12 @@ func growslice(et *_type, old slice, cap int) slice {
 
 	var p unsafe.Pointer
 	if et.kind&kindNoPointers != 0 {
-		p = rawmem(capmem)
+		p = mallocgc(capmem, nil, false)
 		memmove(p, old.array, lenmem)
 		memclr(add(p, lenmem), capmem-lenmem)
 	} else {
 		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
-		p = mallocgc(capmem, et, 0)
+		p = mallocgc(capmem, et, true)
 		if !writeBarrier.enabled {
 			memmove(p, old.array, lenmem)
 		} else {
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 2d20e0a9c3..112ce5d588 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -284,7 +284,7 @@ func stringiter2(s string, k int) (int, rune) {
 // The storage is not zeroed. Callers should use
 // b to set the string contents and then drop b.
 func rawstring(size int) (s string, b []byte) {
-	p := mallocgc(uintptr(size), nil, flagNoScan|flagNoZero)
+	p := mallocgc(uintptr(size), nil, false)
 
 	stringStructOf(&s).str = p
 	stringStructOf(&s).len = size
@@ -302,7 +302,7 @@ func rawstring(size int) (s string, b []byte) {
 // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
 func rawbyteslice(size int) (b []byte) {
 	cap := roundupsize(uintptr(size))
-	p := mallocgc(cap, nil, flagNoScan|flagNoZero)
+	p := mallocgc(cap, nil, false)
 	if cap != uintptr(size) {
 		memclr(add(p, uintptr(size)), cap-uintptr(size))
 	}
@@ -317,7 +317,7 @@ func rawruneslice(size int) (b []rune) {
 		throw("out of memory")
 	}
 	mem := roundupsize(uintptr(size) * 4)
-	p := mallocgc(mem, nil, flagNoScan|flagNoZero)
+	p := mallocgc(mem, nil, false)
 	if mem != uintptr(size)*4 {
 		memclr(add(p, uintptr(size)*4), mem-uintptr(size)*4)
 	}
-- 
cgit v1.3


From 60fd32a47fdffb95d3646c9fc75acc9beff67183 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 19 Apr 2016 08:31:04 -0700
Subject: cmd/compile: change the way we handle large map values

mapaccess{1,2} returns a pointer to the value.  When the key
is not in the map, it returns a pointer to zeroed memory.
Currently, for large map values we have a complicated scheme which
dynamically allocates zeroed memory for this purpose.  It is ugly
code and requires an atomic.Load in a bunch of places we'd rather
not have it.

Switch to a scheme where callsites of mapaccess{1,2} which expect
large return values pass in a pointer to zeroed memory that
mapaccess can return if the key is not found.  This avoids the
atomic.Load on all map accesses with a few extra instructions only
for the large value acccesses, plus a bit of bss space.

There was a time (1.4 & 1.5?) where we did something like this but
all the tricks to make the right size zero value were done by the
linker.  That scheme broke in the presence of dyamic linking.
The scheme in this CL works even when dynamic linking.

Fixes #12337

Change-Id: Ic2d0319944af33bbb59785938d9ab80958d1b4b1
Reviewed-on: https://go-review.googlesource.com/22221
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Hudson-Doyle <michael.hudson@canonical.com>
---
 src/cmd/compile/internal/gc/builtin.go         |  2 +
 src/cmd/compile/internal/gc/builtin/runtime.go |  2 +
 src/cmd/compile/internal/gc/go.go              |  3 ++
 src/cmd/compile/internal/gc/main.go            |  5 ++
 src/cmd/compile/internal/gc/obj.go             |  5 ++
 src/cmd/compile/internal/gc/reflect.go         | 24 ++++++++++
 src/cmd/compile/internal/gc/walk.go            | 18 +++++--
 src/runtime/hashmap.go                         | 65 +++++++++-----------------
 src/runtime/hashmap_fast.go                    | 33 +++++++------
 src/runtime/map_test.go                        | 16 +++++++
 10 files changed, 110 insertions(+), 63 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go
index 411c7b8605..b593d11296 100644
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -70,10 +70,12 @@ const runtimeimport = "" +
 	"func @\"\".mapaccess1_fast32 (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 any) (@\"\".val·1 *any)\n" +
 	"func @\"\".mapaccess1_fast64 (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 any) (@\"\".val·1 *any)\n" +
 	"func @\"\".mapaccess1_faststr (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 any) (@\"\".val·1 *any)\n" +
+	"func @\"\".mapaccess1_fat (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 *any, @\"\".zero·5 *byte) (@\"\".val·1 *any)\n" +
 	"func @\"\".mapaccess2 (@\"\".mapType·3 *byte, @\"\".hmap·4 map[any]any, @\"\".key·5 *any) (@\"\".val·1 *any, @\"\".pres·2 bool)\n" +
 	"func @\"\".mapaccess2_fast32 (@\"\".mapType·3 *byte, @\"\".hmap·4 map[any]any, @\"\".key·5 any) (@\"\".val·1 *any, @\"\".pres·2 bool)\n" +
 	"func @\"\".mapaccess2_fast64 (@\"\".mapType·3 *byte, @\"\".hmap·4 map[any]any, @\"\".key·5 any) (@\"\".val·1 *any, @\"\".pres·2 bool)\n" +
 	"func @\"\".mapaccess2_faststr (@\"\".mapType·3 *byte, @\"\".hmap·4 map[any]any, @\"\".key·5 any) (@\"\".val·1 *any, @\"\".pres·2 bool)\n" +
+	"func @\"\".mapaccess2_fat (@\"\".mapType·3 *byte, @\"\".hmap·4 map[any]any, @\"\".key·5 *any, @\"\".zero·6 *byte) (@\"\".val·1 *any, @\"\".pres·2 bool)\n" +
 	"func @\"\".mapassign1 (@\"\".mapType·1 *byte, @\"\".hmap·2 map[any]any, @\"\".key·3 *any, @\"\".val·4 *any)\n" +
 	"func @\"\".mapiterinit (@\"\".mapType·1 *byte, @\"\".hmap·2 map[any]any, @\"\".hiter·3 *any)\n" +
 	"func @\"\".mapdelete (@\"\".mapType·1 *byte, @\"\".hmap·2 map[any]any, @\"\".key·3 *any)\n" +
diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go
index 584368a144..e9316cb313 100644
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -89,10 +89,12 @@ func mapaccess1(mapType *byte, hmap map[any]any, key *any) (val *any)
 func mapaccess1_fast32(mapType *byte, hmap map[any]any, key any) (val *any)
 func mapaccess1_fast64(mapType *byte, hmap map[any]any, key any) (val *any)
 func mapaccess1_faststr(mapType *byte, hmap map[any]any, key any) (val *any)
+func mapaccess1_fat(mapType *byte, hmap map[any]any, key *any, zero *byte) (val *any)
 func mapaccess2(mapType *byte, hmap map[any]any, key *any) (val *any, pres bool)
 func mapaccess2_fast32(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
 func mapaccess2_fast64(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
 func mapaccess2_faststr(mapType *byte, hmap map[any]any, key any) (val *any, pres bool)
+func mapaccess2_fat(mapType *byte, hmap map[any]any, key *any, zero *byte) (val *any, pres bool)
 func mapassign1(mapType *byte, hmap map[any]any, key *any, val *any)
 func mapiterinit(mapType *byte, hmap map[any]any, hiter *any)
 func mapdelete(mapType *byte, hmap map[any]any, key *any)
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index af9aaf0dae..87b6121c8e 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -175,6 +175,9 @@ var unsafepkg *Pkg // package unsafe
 
 var trackpkg *Pkg // fake package for field tracking
 
+var mappkg *Pkg // fake package for map zero value
+var zerosize int64
+
 var Tptr EType // either TPTR32 or TPTR64
 
 var myimportpath string
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 37e8a17886..2afd262fed 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -137,6 +137,11 @@ func Main() {
 	typepkg = mkpkg("type")
 	typepkg.Name = "type"
 
+	// pseudo-package used for map zero values
+	mappkg = mkpkg("go.map")
+	mappkg.Name = "go.map"
+	mappkg.Prefix = "go.map"
+
 	goroot = obj.Getgoroot()
 	goos = obj.Getgoos()
 
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index b60f78f638..fab611fdb5 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -87,6 +87,11 @@ func dumpobj() {
 	dumpglobls()
 	externdcl = tmp
 
+	if zerosize > 0 {
+		zero := Pkglookup("zero", mappkg)
+		ggloblsym(zero, int32(zerosize), obj.DUPOK|obj.RODATA)
+	}
+
 	dumpdata()
 	obj.Writeobjdirect(Ctxt, bout.Writer)
 
diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index 5031045c64..4792f88abe 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -1689,3 +1689,27 @@ func (p *GCProg) emit(t *Type, offset int64) {
 		}
 	}
 }
+
+// zeroaddr returns the address of a symbol with at least
+// size bytes of zeros.
+func zeroaddr(size int64) *Node {
+	if size >= 1<<31 {
+		Fatalf("map value too big %d", size)
+	}
+	if zerosize < size {
+		zerosize = size
+	}
+	s := Pkglookup("zero", mappkg)
+	if s.Def == nil {
+		x := newname(s)
+		x.Type = Types[TUINT8]
+		x.Class = PEXTERN
+		x.Typecheck = 1
+		s.Def = x
+	}
+	z := Nod(OADDR, s.Def, nil)
+	z.Type = Ptrto(Types[TUINT8])
+	z.Addable = true
+	z.Typecheck = 1
+	return z
+}
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 82ac74ae33..8cce85de9a 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -864,8 +864,14 @@ opswitch:
 		//   a = *var
 		a := n.List.First()
 
-		fn := mapfn(p, t)
-		r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key)
+		if w := t.Val().Width; w <= 1024 { // 1024 must match ../../../../runtime/hashmap.go:maxZero
+			fn := mapfn(p, t)
+			r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key)
+		} else {
+			fn := mapfn("mapaccess2_fat", t)
+			z := zeroaddr(w)
+			r = mkcall1(fn, fn.Type.Results(), init, typename(t), r.Left, key, z)
+		}
 
 		// mapaccess2* returns a typed bool, but due to spec changes,
 		// the boolean result of i.(T) is now untyped so we make it the
@@ -1222,7 +1228,13 @@ opswitch:
 			p = "mapaccess1"
 		}
 
-		n = mkcall1(mapfn(p, t), Ptrto(t.Val()), init, typename(t), n.Left, key)
+		if w := t.Val().Width; w <= 1024 { // 1024 must match ../../../../runtime/hashmap.go:maxZero
+			n = mkcall1(mapfn(p, t), Ptrto(t.Val()), init, typename(t), n.Left, key)
+		} else {
+			p = "mapaccess1_fat"
+			z := zeroaddr(w)
+			n = mkcall1(mapfn(p, t), Ptrto(t.Val()), init, typename(t), n.Left, key, z)
+		}
 		n = Nod(OIND, n, nil)
 		n.Type = t.Val()
 		n.Typecheck = 1
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 4f5d03d983..ff59faab5d 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -236,9 +236,6 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 		throw("need padding in bucket (value)")
 	}
 
-	// make sure zeroptr is large enough
-	mapzero(t.elem)
-
 	// find size parameter which will hold the requested # of elements
 	B := uint8(0)
 	for ; hint > bucketCnt && float32(hint) > loadFactor*float32(uintptr(1)<<B); B++ {
@@ -283,7 +280,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 		msanread(key, t.key.size)
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr))
+		return unsafe.Pointer(&zeroVal[0])
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -321,7 +318,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr))
+			return unsafe.Pointer(&zeroVal[0])
 		}
 	}
 }
@@ -337,7 +334,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
 		msanread(key, t.key.size)
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+		return unsafe.Pointer(&zeroVal[0]), false
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -375,7 +372,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+			return unsafe.Pointer(&zeroVal[0]), false
 		}
 	}
 }
@@ -426,6 +423,22 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
 	}
 }
 
+func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer {
+	v := mapaccess1(t, h, key)
+	if v == unsafe.Pointer(&zeroVal[0]) {
+		return zero
+	}
+	return v
+}
+
+func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Pointer, bool) {
+	v := mapaccess1(t, h, key)
+	if v == unsafe.Pointer(&zeroVal[0]) {
+		return zero, false
+	}
+	return v, true
+}
+
 func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
 	if h == nil {
 		panic(plainError("assignment to entry in nil map"))
@@ -1044,39 +1057,5 @@ func reflect_ismapkey(t *_type) bool {
 	return ismapkey(t)
 }
 
-var zerolock mutex
-
-const initialZeroSize = 1024
-
-var zeroinitial [initialZeroSize]byte
-
-// All accesses to zeroptr and zerosize must be atomic so that they
-// can be accessed without locks in the common case.
-var zeroptr unsafe.Pointer = unsafe.Pointer(&zeroinitial)
-var zerosize uintptr = initialZeroSize
-
-// mapzero ensures that zeroptr points to a buffer large enough to
-// serve as the zero value for t.
-func mapzero(t *_type) {
-	// Is the type small enough for existing buffer?
-	cursize := uintptr(atomic.Loadp(unsafe.Pointer(&zerosize)))
-	if t.size <= cursize {
-		return
-	}
-
-	// Allocate a new buffer.
-	lock(&zerolock)
-	cursize = uintptr(atomic.Loadp(unsafe.Pointer(&zerosize)))
-	if cursize < t.size {
-		for cursize < t.size {
-			cursize *= 2
-			if cursize == 0 {
-				// need >2GB zero on 32-bit machine
-				throw("map element too large")
-			}
-		}
-		atomic.StorepNoWB(unsafe.Pointer(&zeroptr), persistentalloc(cursize, 64, &memstats.other_sys))
-		atomic.StorepNoWB(unsafe.Pointer(&zerosize), unsafe.Pointer(zerosize))
-	}
-	unlock(&zerolock)
-}
+const maxZero = 1024 // must match value in ../cmd/compile/internal/gc/walk.go
+var zeroVal [maxZero]byte
diff --git a/src/runtime/hashmap_fast.go b/src/runtime/hashmap_fast.go
index 6a5484edee..8f9bb5a6fc 100644
--- a/src/runtime/hashmap_fast.go
+++ b/src/runtime/hashmap_fast.go
@@ -5,7 +5,6 @@
 package runtime
 
 import (
-	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -16,7 +15,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr))
+		return unsafe.Pointer(&zeroVal[0])
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -50,7 +49,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr))
+			return unsafe.Pointer(&zeroVal[0])
 		}
 	}
 }
@@ -61,7 +60,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+		return unsafe.Pointer(&zeroVal[0]), false
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -95,7 +94,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+			return unsafe.Pointer(&zeroVal[0]), false
 		}
 	}
 }
@@ -106,7 +105,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr))
+		return unsafe.Pointer(&zeroVal[0])
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -140,7 +139,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr))
+			return unsafe.Pointer(&zeroVal[0])
 		}
 	}
 }
@@ -151,7 +150,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+		return unsafe.Pointer(&zeroVal[0]), false
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -185,7 +184,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+			return unsafe.Pointer(&zeroVal[0]), false
 		}
 	}
 }
@@ -196,7 +195,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr))
+		return unsafe.Pointer(&zeroVal[0])
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -220,7 +219,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
 				}
 			}
-			return atomic.Loadp(unsafe.Pointer(&zeroptr))
+			return unsafe.Pointer(&zeroVal[0])
 		}
 		// long key, try not to do more comparisons than necessary
 		keymaybe := uintptr(bucketCnt)
@@ -258,7 +257,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize))
 			}
 		}
-		return atomic.Loadp(unsafe.Pointer(&zeroptr))
+		return unsafe.Pointer(&zeroVal[0])
 	}
 dohash:
 	hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
@@ -290,7 +289,7 @@ dohash:
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr))
+			return unsafe.Pointer(&zeroVal[0])
 		}
 	}
 }
@@ -301,7 +300,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
 	}
 	if h == nil || h.count == 0 {
-		return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+		return unsafe.Pointer(&zeroVal[0]), false
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map read and map write")
@@ -325,7 +324,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
 				}
 			}
-			return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+			return unsafe.Pointer(&zeroVal[0]), false
 		}
 		// long key, try not to do more comparisons than necessary
 		keymaybe := uintptr(bucketCnt)
@@ -361,7 +360,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true
 			}
 		}
-		return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+		return unsafe.Pointer(&zeroVal[0]), false
 	}
 dohash:
 	hash := t.key.alg.hash(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
@@ -393,7 +392,7 @@ dohash:
 		}
 		b = b.overflow(t)
 		if b == nil {
-			return atomic.Loadp(unsafe.Pointer(&zeroptr)), false
+			return unsafe.Pointer(&zeroVal[0]), false
 		}
 	}
 }
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index 9d2894cb6f..496f8e8868 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -317,6 +317,22 @@ func TestBigItems(t *testing.T) {
 	}
 }
 
+func TestMapHugeZero(t *testing.T) {
+	type T [4000]byte
+	m := map[int]T{}
+	x := m[0]
+	if x != (T{}) {
+		t.Errorf("map value not zero")
+	}
+	y, ok := m[0]
+	if ok {
+		t.Errorf("map value should be missing")
+	}
+	if y != (T{}) {
+		t.Errorf("map value not zero")
+	}
+}
+
 type empty struct {
 }
 
-- 
cgit v1.3


From 7e460e70d90295cf08ea627c0a0fff170aba5518 Mon Sep 17 00:00:00 2001
From: Martin Möhrmann <martisch@uos.de>
Date: Wed, 20 Apr 2016 18:00:52 +0200
Subject: runtime: use type int to specify size for newarray
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consistently use type int for the size argument of
runtime.newarray, runtime.reflect_unsafe_NewArray
and reflect.unsafe_NewArray.

Change-Id: Ic77bf2dde216c92ca8c49462f8eedc0385b6314e
Reviewed-on: https://go-review.googlesource.com/22311
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Martin Möhrmann <martisch@uos.de>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/chan.go    |  2 +-
 src/runtime/hashmap.go |  4 ++--
 src/runtime/malloc.go  | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 3fb0236785..712ad8cef9 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -84,7 +84,7 @@ func makechan(t *chantype, size int64) *hchan {
 		}
 	} else {
 		c = new(hchan)
-		c.buf = newarray(elem, uintptr(size))
+		c.buf = newarray(elem, int(size))
 	}
 	c.elemsize = uint16(elem.size)
 	c.elemtype = elem
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index ff59faab5d..509cab2f0f 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -246,7 +246,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	// If hint is large zeroing this memory could take a while.
 	buckets := bucket
 	if B != 0 {
-		buckets = newarray(t.bucket, uintptr(1)<<B)
+		buckets = newarray(t.bucket, 1<<B)
 	}
 
 	// initialize Hmap
@@ -821,7 +821,7 @@ func hashGrow(t *maptype, h *hmap) {
 		throw("evacuation not done in time")
 	}
 	oldbuckets := h.buckets
-	newbuckets := newarray(t.bucket, uintptr(1)<<(h.B+1))
+	newbuckets := newarray(t.bucket, 1<<(h.B+1))
 	flags := h.flags &^ (iterator | oldIterator)
 	if h.flags&iterator != 0 {
 		flags |= oldIterator
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 95d24a467a..30f2a4fca5 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -770,16 +770,16 @@ func reflect_unsafe_New(typ *_type) unsafe.Pointer {
 	return newobject(typ)
 }
 
-// implementation of make builtin for slices
-func newarray(typ *_type, n uintptr) unsafe.Pointer {
-	if int(n) < 0 || n > maxSliceCap(typ.size) {
+// newarray allocates an array of n elements of type typ.
+func newarray(typ *_type, n int) unsafe.Pointer {
+	if n < 0 || uintptr(n) > maxSliceCap(typ.size) {
 		panic(plainError("runtime: allocation size out of range"))
 	}
-	return mallocgc(typ.size*n, typ, true)
+	return mallocgc(typ.size*uintptr(n), typ, true)
 }
 
 //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
-func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer {
+func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer {
 	return newarray(typ, n)
 }
 
-- 
cgit v1.3


From 479501c14c9d36e27727bc4b4294d57c5ddc29d0 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sat, 16 Apr 2016 18:27:38 -0400
Subject: runtime: count black allocations toward scan work

Currently we count black allocations toward the scannable heap size,
but not toward the scan work we've done so far. This is clearly
inconsistent (we have, in effect, scanned these allocations and since
they're already black, we're not going to scan them again). Worse, it
means we don't count black allocations toward the scannable heap size
as of the *next* GC because this is based on the amount of scan work
we did in this cycle.

Fix this by counting black allocations as scan work. Currently the GC
spends very little time in allocate-black mode, so this probably
hasn't been a problem, but this will become important when we switch
to always allocating black.

Change-Id: If6ff693b070c385b65b6ecbbbbf76283a0f9d990
Reviewed-on: https://go-review.googlesource.com/22119
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/malloc.go  | 8 +++++---
 src/runtime/mgc.go     | 9 +++++++--
 src/runtime/mgcmark.go | 4 +++-
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 30f2a4fca5..3f437bc02f 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -655,6 +655,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		size = s.elemsize
 	}
 
+	var scanSize uintptr
 	if noscan {
 		// All objects are pre-marked as noscan. Nothing to do.
 	} else {
@@ -673,11 +674,12 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 			// pointers, GC has to scan to the last
 			// element.
 			if typ.ptrdata != 0 {
-				c.local_scan += dataSize - typ.size + typ.ptrdata
+				scanSize = dataSize - typ.size + typ.ptrdata
 			}
 		} else {
-			c.local_scan += typ.ptrdata
+			scanSize = typ.ptrdata
 		}
+		c.local_scan += scanSize
 
 		// Ensure that the stores above that initialize x to
 		// type-safe memory and set the heap bits occur before
@@ -694,7 +696,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	// a race marking the bit.
 	if gcphase == _GCmarktermination || gcBlackenPromptly {
 		systemstack(func() {
-			gcmarknewobject_m(uintptr(x), size)
+			gcmarknewobject_m(uintptr(x), size, scanSize)
 		})
 	}
 
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 1c184db10b..d120dae05a 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -304,7 +304,8 @@ type gcControllerState struct {
 	// scanWork is the total scan work performed this cycle. This
 	// is updated atomically during the cycle. Updates occur in
 	// bounded batches, since it is both written and read
-	// throughout the cycle.
+	// throughout the cycle. At the end of the cycle, this is how
+	// much of the retained heap is scannable.
 	//
 	// Currently this is the bytes of heap scanned. For most uses,
 	// this is an opaque unit of work, but for estimation the
@@ -1578,9 +1579,13 @@ func gcMark(start_time int64) {
 	work.markrootDone = true
 
 	for i := 0; i < int(gomaxprocs); i++ {
-		if !allp[i].gcw.empty() {
+		gcw := &allp[i].gcw
+		if !gcw.empty() {
 			throw("P has cached GC work at end of mark termination")
 		}
+		if gcw.scanWork != 0 || gcw.bytesMarked != 0 {
+			throw("P has unflushed stats at end of mark termination")
+		}
 	}
 
 	if trace.enabled {
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 1ab8315a29..0d05838987 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -1134,12 +1134,14 @@ func gcDumpObject(label string, obj, off uintptr) {
 
 // If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
 //go:nowritebarrier
-func gcmarknewobject_m(obj, size uintptr) {
+func gcmarknewobject_m(obj, size, scanSize uintptr) {
 	if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
 		throw("gcmarknewobject called while doing checkmark")
 	}
 	heapBitsForAddr(obj).setMarked()
 	atomic.Xadd64(&work.bytesMarked, int64(size))
+	gcw := &getg().m.p.ptr().gcw
+	gcw.scanWork += int64(scanSize)
 }
 
 // Checkmarking
-- 
cgit v1.3


From 64a26b79ac781118d4fa364f884ce8080ba97870 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 17 Apr 2016 11:42:37 -0400
Subject: runtime: simplify/optimize allocate-black a bit

Currently allocating black switches to the system stack (which is
probably a historical accident) and atomically updates the global
bytes marked stat. Since we're about to depend on this much more,
optimize it a bit by putting it back on the regular stack and updating
the per-P bytes marked stat, which gets lazily folded into the global
bytes marked stat.

Change-Id: Ibbe16e5382d3fd2256e4381f88af342bf7020b04
Reviewed-on: https://go-review.googlesource.com/22170
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/malloc.go  |  4 +---
 src/runtime/mgcmark.go | 11 ++++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 3f437bc02f..9e1f47e1e6 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -695,9 +695,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	// This may be racing with GC so do it atomically if there can be
 	// a race marking the bit.
 	if gcphase == _GCmarktermination || gcBlackenPromptly {
-		systemstack(func() {
-			gcmarknewobject_m(uintptr(x), size, scanSize)
-		})
+		gcmarknewobject(uintptr(x), size, scanSize)
 	}
 
 	if raceenabled {
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 0d05838987..ad64b735a5 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -1132,15 +1132,20 @@ func gcDumpObject(label string, obj, off uintptr) {
 	}
 }
 
-// If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
+// gcmarknewobject marks a newly allocated object black. obj must
+// not contain any non-nil pointers.
+//
+// This is nosplit so it can manipulate a gcWork without preemption.
+//
 //go:nowritebarrier
-func gcmarknewobject_m(obj, size, scanSize uintptr) {
+//go:nosplit
+func gcmarknewobject(obj, size, scanSize uintptr) {
 	if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
 		throw("gcmarknewobject called while doing checkmark")
 	}
 	heapBitsForAddr(obj).setMarked()
-	atomic.Xadd64(&work.bytesMarked, int64(size))
 	gcw := &getg().m.p.ptr().gcw
+	gcw.bytesMarked += uint64(size)
 	gcw.scanWork += int64(scanSize)
 }
 
-- 
cgit v1.3


From 6002e01e34f5b847eb4c49ca84e9623d7204f5f2 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 30 Mar 2016 17:02:23 -0400
Subject: runtime: allocate black during GC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we allocate white for most of concurrent marking. This is
based on the classical argument that it produces less floating
garbage, since allocations during GC may not get linked into the heap
and allocating white lets us reclaim these. However, it's not clear
how often this actually happens, especially since our write barrier
shades any pointer as soon as it's installed in the heap regardless of
the color of the slot.

On the other hand, allocating black has several advantages that seem
to significantly outweigh this downside.

1) It naturally bounds the total scan work to the live heap size at
the start of a GC cycle. Allocating white does not, and thus depends
entirely on assists to prevent the heap from growing faster than it
can be scanned.

2) It reduces the total amount of scan work per GC cycle by the size
of newly allocated objects that are linked into the heap graph, since
objects allocated black never need to be scanned.

3) It reduces total write barrier work since more objects will already
be black when they are linked into the heap graph.

This gives a slight overall improvement in benchmarks.

name              old time/op  new time/op  delta
XBenchGarbage-12  2.24ms ± 0%  2.21ms ± 1%  -1.32%  (p=0.000 n=18+17)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.60s ± 3%     2.53s ± 3%  -2.56%  (p=0.000 n=20+20)
Fannkuch11-12                2.08s ± 1%     2.08s ± 0%    ~     (p=0.452 n=19+19)
FmtFprintfEmpty-12          45.1ns ± 2%    45.3ns ± 2%    ~     (p=0.367 n=19+20)
FmtFprintfString-12          131ns ± 3%     129ns ± 0%  -1.60%  (p=0.000 n=20+16)
FmtFprintfInt-12             122ns ± 0%     121ns ± 2%  -0.86%  (p=0.000 n=16+19)
FmtFprintfIntInt-12          187ns ± 1%     186ns ± 1%    ~     (p=0.514 n=18+19)
FmtFprintfPrefixedInt-12     189ns ± 0%     188ns ± 1%  -0.54%  (p=0.000 n=16+18)
FmtFprintfFloat-12           256ns ± 0%     254ns ± 1%  -0.43%  (p=0.000 n=17+19)
FmtManyArgs-12               769ns ± 0%     763ns ± 0%  -0.72%  (p=0.000 n=18+18)
GobDecode-12                7.08ms ± 2%    7.00ms ± 1%  -1.22%  (p=0.000 n=20+20)
GobEncode-12                5.88ms ± 0%    5.88ms ± 1%    ~     (p=0.406 n=18+18)
Gzip-12                      214ms ± 0%     214ms ± 1%    ~     (p=0.103 n=17+18)
Gunzip-12                   37.6ms ± 0%    37.6ms ± 0%    ~     (p=0.563 n=17+17)
HTTPClientServer-12         77.2µs ± 3%    76.9µs ± 2%    ~     (p=0.606 n=20+20)
JSONEncode-12               15.1ms ± 1%    15.2ms ± 2%    ~     (p=0.138 n=19+19)
JSONDecode-12               53.3ms ± 1%    53.1ms ± 1%  -0.33%  (p=0.000 n=19+18)
Mandelbrot200-12            4.04ms ± 1%    4.04ms ± 1%    ~     (p=0.075 n=19+18)
GoParse-12                  3.30ms ± 1%    3.29ms ± 1%  -0.57%  (p=0.000 n=18+16)
RegexpMatchEasy0_32-12      69.5ns ± 1%    69.9ns ± 3%    ~     (p=0.822 n=18+20)
RegexpMatchEasy0_1K-12       237ns ± 1%     237ns ± 0%    ~     (p=0.398 n=19+18)
RegexpMatchEasy1_32-12      69.8ns ± 2%    69.5ns ± 1%    ~     (p=0.090 n=20+16)
RegexpMatchEasy1_1K-12       371ns ± 1%     372ns ± 1%    ~     (p=0.178 n=19+20)
RegexpMatchMedium_32-12      108ns ± 2%     108ns ± 3%    ~     (p=0.124 n=20+19)
RegexpMatchMedium_1K-12     33.9µs ± 2%    34.2µs ± 4%    ~     (p=0.309 n=20+19)
RegexpMatchHard_32-12       1.75µs ± 2%    1.77µs ± 4%  +1.28%  (p=0.018 n=19+18)
RegexpMatchHard_1K-12       52.7µs ± 1%    53.4µs ± 4%  +1.23%  (p=0.013 n=15+18)
Revcomp-12                   354ms ± 1%     359ms ± 4%  +1.27%  (p=0.043 n=20+20)
Template-12                 63.6ms ± 2%    63.7ms ± 2%    ~     (p=0.654 n=20+18)
TimeParse-12                 313ns ± 1%     316ns ± 2%  +0.80%  (p=0.014 n=17+20)
TimeFormat-12                332ns ± 0%     329ns ± 0%  -0.66%  (p=0.000 n=16+16)
[Geo mean]                  51.7µs         51.6µs       -0.09%

Change-Id: I2214a6a0e4f544699ea166073249a8efdf080dc0
Reviewed-on: https://go-review.googlesource.com/21323
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/malloc.go |  4 ++--
 src/runtime/mgc.go    | 20 ++++++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 9e1f47e1e6..081d1419cb 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -690,11 +690,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		publicationBarrier()
 	}
 
-	// GCmarkterminate allocates black
+	// Allocate black during GC.
 	// All slots hold nil so no scanning is needed.
 	// This may be racing with GC so do it atomically if there can be
 	// a race marking the bit.
-	if gcphase == _GCmarktermination || gcBlackenPromptly {
+	if gcphase != _GCoff {
 		gcmarknewobject(uintptr(x), size, scanSize)
 	}
 
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index d120dae05a..194439337b 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -24,6 +24,10 @@
 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world.
 // Concurrency and Computation: Practice and Experience 15(3-5), 2003.
 //
+// TODO(austin): The rest of this comment is woefully out of date and
+// needs to be rewritten. There is no distinct scan phase any more and
+// we allocate black during GC.
+//
 //  0. Set phase = GCscan from GCoff.
 //  1. Wait for all P's to acknowledge phase change.
 //         At this point all goroutines have passed through a GC safepoint and
@@ -244,7 +248,7 @@ var gcBlackenPromptly bool
 
 const (
 	_GCoff             = iota // GC not running; sweeping in background, write barrier disabled
-	_GCmark                   // GC marking roots and workbufs, write barrier ENABLED
+	_GCmark                   // GC marking roots and workbufs: allocate black, write barrier ENABLED
 	_GCmarktermination        // GC mark termination: allocate black, P's help GC, write barrier ENABLED
 )
 
@@ -467,14 +471,18 @@ func (c *gcControllerState) startCycle() {
 // It should only be called when gcBlackenEnabled != 0 (because this
 // is when assists are enabled and the necessary statistics are
 // available).
+//
+// TODO: Consider removing the periodic controller update altogether.
+// Since we switched to allocating black, in theory we shouldn't have
+// to change the assist ratio. However, this is still a useful hook
+// that we've found many uses for when experimenting.
 func (c *gcControllerState) revise() {
 	// Compute the expected scan work remaining.
 	//
-	// Note that the scannable heap size is likely to increase
-	// during the GC cycle. This is why it's important to revise
-	// the assist ratio throughout the cycle: if the scannable
-	// heap size increases, the assist ratio based on the initial
-	// scannable heap size may target too little scan work.
+	// Note that we currently count allocations during GC as both
+	// scannable heap (heap_scan) and scan work completed
+	// (scanWork), so this difference won't be changed by
+	// allocations during GC.
 	//
 	// This particular estimate is a strict upper bound on the
 	// possible remaining scan work for the current heap.
-- 
cgit v1.3


From c8bd293e56d17c5599ec62aee63fe819366adcab Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 30 Mar 2016 17:15:15 -0400
Subject: runtime: eliminate floating garbage estimate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently when we compute the trigger for the next GC, we do it based
on an estimate of the reachable heap size at the start of the GC
cycle, which is itself based on an estimate of the floating garbage.
This was introduced by 4655aad to fix a bad feedback loop that allowed
the heap to grow to many times the true reachable size.

However, this estimate gets easily confused by rapidly allocating
applications, and, worse it's different than the heap size the trigger
controller uses to compute the trigger itself. This results in the
trigger controller often thinking that GC finished before it started.
Since this would be a pretty great outcome from it's perspective, it
sets the trigger for the next cycle as close to the next goal as
possible (which is limited to 95% of the goal).

Furthermore, the bad feedback loop this estimate originally fixed
seems not to happen any more, suggesting it was fixed more correctly
by some other change in the mean time. Finally, with the change to
allocate black, it shouldn't even be theoretically possible for this
bad feedback loop to occur.

Hence, eliminate the floating garbage estimate and simply consider the
reachable heap to be the marked heap. This harms overall throughput
slightly for allocation-heavy benchmarks, but significantly improves
mutator availability.

Fixes #12204. This brings the average trigger in this benchmark from
0.95 (the cap) to 0.7 and the active GC utilization from ~90% to ~45%.

Updates #14951. This makes the trigger controller much better behaved,
so it pulls the trigger lower if assists are consuming a lot of CPU
like it's supposed to, increasing mutator availability.

name              old time/op  new time/op  delta
XBenchGarbage-12  2.21ms ± 1%  2.28ms ± 3%  +3.29%  (p=0.000 n=17+17)

Some of this slow down we paid for in earlier commits. Relative to the
start of the series to switch to allocate-black (the parent of "count
black allocations toward scan work"), the garbage benchmark is 2.62%
slower.

name                      old time/op    new time/op    delta
BinaryTree17-12              2.53s ± 3%     2.53s ± 3%    ~     (p=0.708 n=20+19)
Fannkuch11-12                2.08s ± 0%     2.08s ± 0%  -0.22%  (p=0.002 n=19+18)
FmtFprintfEmpty-12          45.3ns ± 2%    45.2ns ± 3%    ~     (p=0.505 n=20+20)
FmtFprintfString-12          129ns ± 0%     131ns ± 2%  +1.80%  (p=0.000 n=16+19)
FmtFprintfInt-12             121ns ± 2%     121ns ± 2%    ~     (p=0.768 n=19+19)
FmtFprintfIntInt-12          186ns ± 1%     188ns ± 3%  +0.99%  (p=0.000 n=19+19)
FmtFprintfPrefixedInt-12     188ns ± 1%     188ns ± 1%    ~     (p=0.947 n=18+16)
FmtFprintfFloat-12           254ns ± 1%     255ns ± 1%  +0.30%  (p=0.002 n=19+17)
FmtManyArgs-12               763ns ± 0%     770ns ± 0%  +0.92%  (p=0.000 n=18+18)
GobDecode-12                7.00ms ± 1%    7.04ms ± 1%  +0.61%  (p=0.049 n=20+20)
GobEncode-12                5.88ms ± 1%    5.88ms ± 0%    ~     (p=0.641 n=18+19)
Gzip-12                      214ms ± 1%     215ms ± 1%  +0.43%  (p=0.002 n=18+19)
Gunzip-12                   37.6ms ± 0%    37.6ms ± 0%  +0.11%  (p=0.015 n=17+18)
HTTPClientServer-12         76.9µs ± 2%    78.1µs ± 2%  +1.44%  (p=0.000 n=20+18)
JSONEncode-12               15.2ms ± 2%    15.1ms ± 1%    ~     (p=0.271 n=19+18)
JSONDecode-12               53.1ms ± 1%    53.3ms ± 0%  +0.49%  (p=0.000 n=18+19)
Mandelbrot200-12            4.04ms ± 1%    4.03ms ± 0%  -0.33%  (p=0.005 n=18+18)
GoParse-12                  3.29ms ± 1%    3.28ms ± 1%    ~     (p=0.146 n=16+17)
RegexpMatchEasy0_32-12      69.9ns ± 3%    69.5ns ± 1%    ~     (p=0.785 n=20+19)
RegexpMatchEasy0_1K-12       237ns ± 0%     237ns ± 0%    ~     (p=1.000 n=18+18)
RegexpMatchEasy1_32-12      69.5ns ± 1%    69.2ns ± 1%  -0.44%  (p=0.020 n=16+19)
RegexpMatchEasy1_1K-12       372ns ± 1%     371ns ± 2%    ~     (p=0.086 n=20+19)
RegexpMatchMedium_32-12      108ns ± 3%     107ns ± 1%  -1.00%  (p=0.004 n=19+14)
RegexpMatchMedium_1K-12     34.2µs ± 4%    34.0µs ± 2%    ~     (p=0.380 n=19+20)
RegexpMatchHard_32-12       1.77µs ± 4%    1.76µs ± 3%    ~     (p=0.558 n=18+20)
RegexpMatchHard_1K-12       53.4µs ± 4%    52.8µs ± 2%  -1.10%  (p=0.020 n=18+20)
Revcomp-12                   359ms ± 4%     377ms ± 0%  +5.19%  (p=0.000 n=20+18)
Template-12                 63.7ms ± 2%    62.9ms ± 2%  -1.27%  (p=0.005 n=18+20)
TimeParse-12                 316ns ± 2%     313ns ± 1%    ~     (p=0.059 n=20+16)
TimeFormat-12                329ns ± 0%     331ns ± 0%  +0.39%  (p=0.000 n=16+18)
[Geo mean]                  51.6µs         51.7µs       +0.18%

Change-Id: I1dce4640c8205d41717943b021039fffea863c57
Reviewed-on: https://go-review.googlesource.com/21324
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgc.go | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 194439337b..bc157cddbb 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1602,27 +1602,8 @@ func gcMark(start_time int64) {
 
 	cachestats()
 
-	// Compute the reachable heap size at the beginning of the
-	// cycle. This is approximately the marked heap size at the
-	// end (which we know) minus the amount of marked heap that
-	// was allocated after marking began (which we don't know, but
-	// is approximately the amount of heap that was allocated
-	// since marking began).
-	allocatedDuringCycle := memstats.heap_live - work.initialHeapLive
-	if memstats.heap_live < work.initialHeapLive {
-		// This can happen if mCentral_UncacheSpan tightens
-		// the heap_live approximation.
-		allocatedDuringCycle = 0
-	}
-	if work.bytesMarked >= allocatedDuringCycle {
-		memstats.heap_reachable = work.bytesMarked - allocatedDuringCycle
-	} else {
-		// This can happen if most of the allocation during
-		// the cycle never became reachable from the heap.
-		// Just set the reachable heap approximation to 0 and
-		// let the heapminimum kick in below.
-		memstats.heap_reachable = 0
-	}
+	// Update the reachable heap stat.
+	memstats.heap_reachable = work.bytesMarked
 
 	// Trigger the next GC cycle when the allocated heap has grown
 	// by triggerRatio over the reachable heap size. Assume that
-- 
cgit v1.3


From 1492e7db059ea7903110b0725d5ced3134558e73 Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Thu, 7 Apr 2016 16:29:16 -0400
Subject: cmd/compile, etc: use nameOff for rtype string

linux/amd64:
	cmd/go:   -8KB (basically nothing)

linux/amd64 PIE:
	cmd/go: -191KB (1.6%)
	jujud:  -1.5MB (1.9%)

Updates #6853
Fixes #15064

Change-Id: I0adbb95685e28be92e8548741df0e11daa0a9b5f
Reviewed-on: https://go-review.googlesource.com/21777
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/cmd/compile/internal/gc/reflect.go |  58 +++++++++------
 src/cmd/link/internal/ld/data.go       |   2 +-
 src/cmd/link/internal/ld/decodesym.go  |  35 +++++----
 src/reflect/all_test.go                |  40 +++++++++-
 src/reflect/export_test.go             |   8 +-
 src/reflect/type.go                    | 129 +++++++++++++++++----------------
 src/runtime/alg.go                     |   8 +-
 src/runtime/error.go                   |   2 +-
 src/runtime/heapdump.go                |   2 +-
 src/runtime/iface.go                   |  24 +++---
 src/runtime/mbitmap.go                 |  10 +--
 src/runtime/mfinal.go                  |  10 +--
 src/runtime/mprof.go                   |   2 +-
 src/runtime/type.go                    |  46 ++++++++----
 14 files changed, 231 insertions(+), 145 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index ac36f912b6..1643c2ce4b 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -788,14 +788,21 @@ func typeptrdata(t *Type) int64 {
 	}
 }
 
-// tflag is documented in ../../../../reflect/type.go.
-const tflagUncommon = 1
-
-// commonType
-// ../../../../runtime/type.go:/commonType
+// tflag is documented in reflect/type.go.
+//
+// tflag values must be kept in sync with copies in:
+//	cmd/compile/internal/gc/reflect.go
+//	cmd/link/internal/ld/decodesym.go
+//	reflect/type.go
+//	runtime/type.go
+const (
+	tflagUncommon  = 1 << 0
+	tflagExtraStar = 1 << 1
+)
 
 var dcommontype_algarray *Sym
 
+// dcommontype dumps the contents of a reflect.rtype (runtime._type).
 func dcommontype(s *Sym, ot int, t *Type) int {
 	if ot != 0 {
 		Fatalf("dcommontype %d", ot)
@@ -836,7 +843,8 @@ func dcommontype(s *Sym, ot int, t *Type) int {
 	//		kind          uint8
 	//		alg           *typeAlg
 	//		gcdata        *byte
-	//		string        *string
+	//		str           nameOff
+	//		_             int32
 	//	}
 	ot = duintptr(s, ot, uint64(t.Width))
 	ot = duintptr(s, ot, uint64(ptrdata))
@@ -847,6 +855,26 @@ func dcommontype(s *Sym, ot int, t *Type) int {
 	if uncommonSize(t) != 0 {
 		tflag |= tflagUncommon
 	}
+
+	exported := false
+	p := Tconv(t, FmtLeft|FmtUnsigned)
+	// If we're writing out type T,
+	// we are very likely to write out type *T as well.
+	// Use the string "*T"[1:] for "T", so that the two
+	// share storage. This is a cheap way to reduce the
+	// amount of space taken up by reflect strings.
+	if !strings.HasPrefix(p, "*") {
+		p = "*" + p
+		tflag |= tflagExtraStar
+		if t.Sym != nil {
+			exported = exportname(t.Sym.Name)
+		}
+	} else {
+		if t.Elem() != nil && t.Elem().Sym != nil {
+			exported = exportname(t.Elem().Sym.Name)
+		}
+	}
+
 	ot = duint8(s, ot, tflag)
 
 	// runtime (and common sense) expects alignment to be a power of two.
@@ -882,21 +910,9 @@ func dcommontype(s *Sym, ot int, t *Type) int {
 	}
 	ot = dsymptr(s, ot, gcsym, 0) // gcdata
 
-	p := Tconv(t, FmtLeft|FmtUnsigned)
-
-	// If we're writing out type T,
-	// we are very likely to write out type *T as well.
-	// Use the string "*T"[1:] for "T", so that the two
-	// share storage. This is a cheap way to reduce the
-	// amount of space taken up by reflect strings.
-	prefix := 0
-	if !strings.HasPrefix(p, "*") {
-		p = "*" + p
-		prefix = 1
-	}
-	_, symdata := stringsym(p) // string
-	ot = dsymptrLSym(Linksym(s), ot, symdata, prefix)
-	ot = duintxx(s, ot, uint64(len(p)-prefix), Widthint)
+	nsym := dname(p, "", nil, exported)
+	ot = dsymptrOffLSym(Linksym(s), ot, nsym, 0)
+	ot = duint32(s, ot, 0)
 
 	return ot
 }
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index 63caf9cf79..dbd5ad0b75 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -1832,7 +1832,7 @@ func dodataSect(symn int, syms []*LSym) (result []*LSym, maxAlign int32) {
 		case obj.STYPELINK:
 			// Sort typelinks by the rtype.string field so the reflect
 			// package can binary search type links.
-			symsSort[i].name = string(decodetype_string(s.R[0].Sym))
+			symsSort[i].name = string(decodetype_str(s.R[0].Sym))
 		}
 	}
 
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index b1c55cf787..330aa6dc13 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -16,6 +16,18 @@ import (
 // ../../runtime/type.go, or more specifically, with what
 // ../gc/reflect.c stuffs in these.
 
+// tflag is documented in reflect/type.go.
+//
+// tflag values must be kept in sync with copies in:
+//	cmd/compile/internal/gc/reflect.go
+//	cmd/link/internal/ld/decodesym.go
+//	reflect/type.go
+//	runtime/type.go
+const (
+	tflagUncommon  = 1 << 0
+	tflagExtraStar = 1 << 1
+)
+
 func decode_reloc(s *LSym, off int32) *Reloc {
 	for i := range s.R {
 		if s.R[i].Off == off {
@@ -47,9 +59,9 @@ func decode_inuxi(p []byte, sz int) uint64 {
 	}
 }
 
-func commonsize() int      { return 6*SysArch.PtrSize + 8 } // runtime._type
-func structfieldSize() int { return 3 * SysArch.PtrSize }   // runtime.structfield
-func uncommonSize() int    { return 2 * SysArch.PtrSize }   // runtime.uncommontype
+func commonsize() int      { return 4*SysArch.PtrSize + 8 + 8 } // runtime._type
+func structfieldSize() int { return 3 * SysArch.PtrSize }       // runtime.structfield
+func uncommonSize() int    { return 2 * SysArch.PtrSize }       // runtime.uncommontype
 
 // Type.commonType.kind
 func decodetype_kind(s *LSym) uint8 {
@@ -73,7 +85,6 @@ func decodetype_ptrdata(s *LSym) int64 {
 
 // Type.commonType.tflag
 func decodetype_hasUncommon(s *LSym) bool {
-	const tflagUncommon = 1 // see ../../../../reflect/type.go:/^type.tflag
 	return s.P[2*SysArch.PtrSize+4]&tflagUncommon != 0
 }
 
@@ -211,16 +222,13 @@ func decodetype_structfieldarrayoff(s *LSym, i int) int {
 	return off
 }
 
-// decodetype_string returns the contents of an rtype's string field.
-func decodetype_string(s *LSym) []byte {
-	off := 4*SysArch.PtrSize + 8
-	strlen := int64(decode_inuxi(s.P[off+SysArch.PtrSize:], SysArch.IntSize))
-
-	r := decode_reloc(s, int32(off))
-	if r == nil {
-		return nil
+// decodetype_str returns the contents of an rtype's str field (a nameOff).
+func decodetype_str(s *LSym) string {
+	str := decodetype_name(s, 4*SysArch.PtrSize+8)
+	if s.P[2*SysArch.PtrSize+4]&tflagExtraStar != 0 {
+		return str[1:]
 	}
-	return r.Sym.P[r.Add : r.Add+strlen]
+	return str
 }
 
 // decodetype_name decodes the name from a reflect.name.
@@ -233,7 +241,6 @@ func decodetype_name(s *LSym, off int) string {
 	data := r.Sym.P
 	namelen := int(uint16(data[1]<<8) | uint16(data[2]))
 	return string(data[3 : 3+namelen])
-
 }
 
 func decodetype_structfieldname(s *LSym, i int) string {
diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go
index 4dfae2743d..e88bc880e2 100644
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -4175,12 +4175,12 @@ func TestStructOfExportRules(t *testing.T) {
 		},
 		{
 			field:     StructField{Name: "", Type: TypeOf(ΦType{})},
-			mustPanic: true, // TODO(sbinet): creating a struct with UTF-8 fields not supported
+			mustPanic: false,
 			exported:  true,
 		},
 		{
 			field:     StructField{Name: "", Type: TypeOf(φType{})},
-			mustPanic: true, // TODO(sbinet): creating a struct with UTF-8 fields not supported
+			mustPanic: false,
 			exported:  false,
 		},
 		{
@@ -5674,6 +5674,42 @@ func TestNames(t *testing.T) {
 	}
 }
 
+func TestExported(t *testing.T) {
+	type ΦExported struct{}
+	type φUnexported struct{}
+	type BigP *big
+	type P int
+	type p *P
+	type P2 p
+	type p3 p
+
+	type exportTest struct {
+		v    interface{}
+		want bool
+	}
+	exportTests := []exportTest{
+		{D1{}, true},
+		{(*D1)(nil), true},
+		{big{}, false},
+		{(*big)(nil), false},
+		{(BigP)(nil), true},
+		{(*BigP)(nil), true},
+		{ΦExported{}, true},
+		{φUnexported{}, false},
+		{P(0), true},
+		{(p)(nil), false},
+		{(P2)(nil), true},
+		{(p3)(nil), false},
+	}
+
+	for i, test := range exportTests {
+		typ := TypeOf(test.v)
+		if got := IsExported(typ); got != test.want {
+			t.Errorf("%d: %s exported=%v, want %v", i, typ.Name(), got, test.want)
+		}
+	}
+}
+
 type embed struct {
 	EmbedWithUnexpMeth
 }
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go
index f527434f0d..00189f3353 100644
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -51,7 +51,7 @@ func TypeLinks() []string {
 		rodata := sections[i]
 		for _, off := range offs {
 			typ := (*rtype)(resolveTypeOff(unsafe.Pointer(rodata), off))
-			r = append(r, typ.string)
+			r = append(r, typ.String())
 		}
 	}
 	return r
@@ -103,3 +103,9 @@ type OtherPkgFields struct {
 	OtherExported   int
 	otherUnexported int
 }
+
+func IsExported(t Type) bool {
+	typ := t.(*rtype)
+	n := typ.nameOff(typ.str)
+	return n.isExported()
+}
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 0cae69a79c..b1758e6913 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -242,6 +242,11 @@ const (
 
 // tflag is used by an rtype to signal what extra type information is
 // available in the memory directly following the rtype value.
+//
+// tflag values must be kept in sync with copies in:
+//	cmd/compile/internal/gc/reflect.go
+//	cmd/link/internal/ld/decodesym.go
+//	runtime/type.go
 type tflag uint8
 
 const (
@@ -256,7 +261,13 @@ const (
 	//		u uncommonType
 	//	}
 	//	u := &(*tUncommon)(unsafe.Pointer(t)).u
-	tflagUncommon tflag = 1
+	tflagUncommon tflag = 1 << 0
+
+	// tflagExtraStar means the name in the str field has an
+	// extraneous '*' prefix. This is because for most types T in
+	// a program, the type *T also exists and reusing the str data
+	// saves binary size.
+	tflagExtraStar tflag = 1 << 1
 )
 
 // rtype is the common implementation of most values.
@@ -273,7 +284,8 @@ type rtype struct {
 	kind       uint8    // enumeration for C
 	alg        *typeAlg // algorithm table
 	gcdata     *byte    // garbage collection data
-	string     string   // string form; unnecessary but undeniably useful
+	str        nameOff  // string form
+	_          int32    // unused; keeps rtype always a multiple of ptrSize
 }
 
 // a copy of runtime.typeAlg
@@ -420,6 +432,9 @@ type structType struct {
 // If the import path follows, then 4 bytes at the end of
 // the data form a nameOff. The import path is only set for concrete
 // methods that are defined in a different package than their type.
+//
+// If a name starts with "*", then the exported bit represents
+// whether the pointed to type is exported.
 type name struct {
 	bytes *byte
 }
@@ -724,7 +739,13 @@ func (t *rtype) uncommon() *uncommonType {
 	}
 }
 
-func (t *rtype) String() string { return t.string }
+func (t *rtype) String() string {
+	s := t.nameOff(t.str).name()
+	if t.tflag&tflagExtraStar != 0 {
+		return s[1:]
+	}
+	return s
+}
 
 func (t *rtype) Size() uintptr { return t.size }
 
@@ -833,33 +854,34 @@ func hasPrefix(s, prefix string) bool {
 }
 
 func (t *rtype) Name() string {
-	if hasPrefix(t.string, "map[") {
+	s := t.String()
+	if hasPrefix(s, "map[") {
 		return ""
 	}
-	if hasPrefix(t.string, "struct {") {
+	if hasPrefix(s, "struct {") {
 		return ""
 	}
-	if hasPrefix(t.string, "chan ") {
+	if hasPrefix(s, "chan ") {
 		return ""
 	}
-	if hasPrefix(t.string, "chan<-") {
+	if hasPrefix(s, "chan<-") {
 		return ""
 	}
-	if hasPrefix(t.string, "func(") {
+	if hasPrefix(s, "func(") {
 		return ""
 	}
-	switch t.string[0] {
+	switch s[0] {
 	case '[', '*', '<':
 		return ""
 	}
-	i := len(t.string) - 1
+	i := len(s) - 1
 	for i >= 0 {
-		if t.string[i] == '.' {
+		if s[i] == '.' {
 			break
 		}
 		i--
 	}
-	return t.string[i+1:]
+	return s[i+1:]
 }
 
 func (t *rtype) ChanDir() ChanDir {
@@ -1391,7 +1413,7 @@ func (t *rtype) ptrTo() *rtype {
 	}
 
 	// Look in known types.
-	s := "*" + t.string
+	s := "*" + t.String()
 	for _, tt := range typesByString(s) {
 		p = (*ptrType)(unsafe.Pointer(tt))
 		if p.elem == t {
@@ -1408,7 +1430,7 @@ func (t *rtype) ptrTo() *rtype {
 	prototype := *(**ptrType)(unsafe.Pointer(&iptr))
 	*p = *prototype
 
-	p.string = s
+	p.str = resolveReflectName(newName(s, "", "", false))
 
 	// For the type structures linked into the binary, the
 	// compiler provides a good hash of the string.
@@ -1645,7 +1667,7 @@ func haveIdenticalUnderlyingType(T, V *rtype) bool {
 //
 // and
 //
-//	t1.string < t2.string
+//	t1.String() < t2.String()
 //
 // Note that strings are not unique identifiers for types:
 // there can be more than one with a given string.
@@ -1669,12 +1691,12 @@ func typesByString(s string) []*rtype {
 		section := sections[offsI]
 
 		// We are looking for the first index i where the string becomes >= s.
-		// This is a copy of sort.Search, with f(h) replaced by (*typ[h].string >= s).
+		// This is a copy of sort.Search, with f(h) replaced by (*typ[h].String() >= s).
 		i, j := 0, len(offs)
 		for i < j {
 			h := i + (j-i)/2 // avoid overflow when computing h
 			// i ≤ h < j
-			if !(rtypeOff(section, offs[h]).string >= s) {
+			if !(rtypeOff(section, offs[h]).String() >= s) {
 				i = h + 1 // preserves f(i-1) == false
 			} else {
 				j = h // preserves f(j) == true
@@ -1687,7 +1709,7 @@ func typesByString(s string) []*rtype {
 		// to do a linear scan anyway.
 		for j := i; j < len(offs); j++ {
 			typ := rtypeOff(section, offs[j])
-			if typ.string != s {
+			if typ.String() != s {
 				break
 			}
 			ret = append(ret, typ)
@@ -1783,11 +1805,11 @@ func ChanOf(dir ChanDir, t Type) Type {
 		lookupCache.Unlock()
 		panic("reflect.ChanOf: invalid dir")
 	case SendDir:
-		s = "chan<- " + typ.string
+		s = "chan<- " + typ.String()
 	case RecvDir:
-		s = "<-chan " + typ.string
+		s = "<-chan " + typ.String()
 	case BothDir:
-		s = "chan " + typ.string
+		s = "chan " + typ.String()
 	}
 	for _, tt := range typesByString(s) {
 		ch := (*chanType)(unsafe.Pointer(tt))
@@ -1802,7 +1824,7 @@ func ChanOf(dir ChanDir, t Type) Type {
 	ch := new(chanType)
 	*ch = *prototype
 	ch.dir = uintptr(dir)
-	ch.string = s
+	ch.str = resolveReflectName(newName(s, "", "", false))
 	ch.hash = fnv1(typ.hash, 'c', byte(dir))
 	ch.elem = typ
 
@@ -1832,7 +1854,7 @@ func MapOf(key, elem Type) Type {
 	}
 
 	// Look in known types.
-	s := "map[" + ktyp.string + "]" + etyp.string
+	s := "map[" + ktyp.String() + "]" + etyp.String()
 	for _, tt := range typesByString(s) {
 		mt := (*mapType)(unsafe.Pointer(tt))
 		if mt.key == ktyp && mt.elem == etyp {
@@ -1844,7 +1866,7 @@ func MapOf(key, elem Type) Type {
 	var imap interface{} = (map[unsafe.Pointer]unsafe.Pointer)(nil)
 	mt := new(mapType)
 	*mt = **(**mapType)(unsafe.Pointer(&imap))
-	mt.string = s
+	mt.str = resolveReflectName(newName(s, "", "", false))
 	mt.hash = fnv1(etyp.hash, 'm', byte(ktyp.hash>>24), byte(ktyp.hash>>16), byte(ktyp.hash>>8), byte(ktyp.hash))
 	mt.key = ktyp
 	mt.elem = etyp
@@ -2002,7 +2024,7 @@ func FuncOf(in, out []Type, variadic bool) Type {
 	}
 
 	// Populate the remaining fields of ft and store in cache.
-	ft.string = str
+	ft.str = resolveReflectName(newName(str, "", "", false))
 	funcLookupCache.m[hash] = append(funcLookupCache.m[hash], &ft.rtype)
 
 	return &ft.rtype
@@ -2018,9 +2040,9 @@ func funcStr(ft *funcType) string {
 		}
 		if ft.IsVariadic() && i == int(ft.inCount)-1 {
 			repr = append(repr, "..."...)
-			repr = append(repr, (*sliceType)(unsafe.Pointer(t)).elem.string...)
+			repr = append(repr, (*sliceType)(unsafe.Pointer(t)).elem.String()...)
 		} else {
-			repr = append(repr, t.string...)
+			repr = append(repr, t.String()...)
 		}
 	}
 	repr = append(repr, ')')
@@ -2034,7 +2056,7 @@ func funcStr(ft *funcType) string {
 		if i > 0 {
 			repr = append(repr, ", "...)
 		}
-		repr = append(repr, t.string...)
+		repr = append(repr, t.String()...)
 	}
 	if len(out) > 1 {
 		repr = append(repr, ')')
@@ -2199,8 +2221,8 @@ func bucketOf(ktyp, etyp *rtype) *rtype {
 	b.ptrdata = ptrdata
 	b.kind = kind
 	b.gcdata = gcdata
-	s := "bucket(" + ktyp.string + "," + etyp.string + ")"
-	b.string = s
+	s := "bucket(" + ktyp.String() + "," + etyp.String() + ")"
+	b.str = resolveReflectName(newName(s, "", "", false))
 	return b
 }
 
@@ -2216,7 +2238,7 @@ func SliceOf(t Type) Type {
 	}
 
 	// Look in known types.
-	s := "[]" + typ.string
+	s := "[]" + typ.String()
 	for _, tt := range typesByString(s) {
 		slice := (*sliceType)(unsafe.Pointer(tt))
 		if slice.elem == typ {
@@ -2229,7 +2251,7 @@ func SliceOf(t Type) Type {
 	prototype := *(**sliceType)(unsafe.Pointer(&islice))
 	slice := new(sliceType)
 	*slice = *prototype
-	slice.string = s
+	slice.str = resolveReflectName(newName(s, "", "", false))
 	slice.hash = fnv1(typ.hash, '[')
 	slice.elem = typ
 
@@ -2337,11 +2359,11 @@ func StructOf(fields []StructField) Type {
 				// Embedded ** and *interface{} are illegal
 				elem := ft.Elem()
 				if k := elem.Kind(); k == Ptr || k == Interface {
-					panic("reflect.StructOf: illegal anonymous field type " + ft.string)
+					panic("reflect.StructOf: illegal anonymous field type " + ft.String())
 				}
 				name = elem.String()
 			} else {
-				name = ft.string
+				name = ft.String()
 			}
 			// TODO(sbinet) check for syntactically impossible type names?
 
@@ -2463,7 +2485,7 @@ func StructOf(fields []StructField) Type {
 
 		hash = fnv1(hash, byte(ft.hash>>24), byte(ft.hash>>16), byte(ft.hash>>8), byte(ft.hash))
 
-		repr = append(repr, (" " + ft.string)...)
+		repr = append(repr, (" " + ft.String())...)
 		if f.name.tagLen() > 0 {
 			hash = fnv1(hash, []byte(f.name.tag())...)
 			repr = append(repr, (" " + strconv.Quote(f.name.tag()))...)
@@ -2579,7 +2601,7 @@ func StructOf(fields []StructField) Type {
 		}
 	}
 
-	typ.string = str
+	typ.str = resolveReflectName(newName(str, "", "", false))
 	typ.hash = hash
 	typ.size = size
 	typ.align = typalign
@@ -2691,11 +2713,11 @@ func StructOf(fields []StructField) Type {
 func runtimeStructField(field StructField) structField {
 	exported := field.PkgPath == ""
 	if field.Name == "" {
-		t := field.Type
+		t := field.Type.(*rtype)
 		if t.Kind() == Ptr {
-			t = t.Elem()
+			t = t.Elem().(*rtype)
 		}
-		exported = isExported(t.Name())
+		exported = t.nameOff(t.str).isExported()
 	} else if exported {
 		b0 := field.Name[0]
 		if ('a' <= b0 && b0 <= 'z') || b0 == '_' {
@@ -2711,25 +2733,6 @@ func runtimeStructField(field StructField) structField {
 	}
 }
 
-func isExported(s string) bool {
-	if s == "" {
-		return false
-	}
-	// FIXME(sbinet): handle utf8/runes (see https://golang.org/issue/15064)
-	// TODO: turn rtype.string into a reflect.name type, and put the exported
-	//       bit on there which can be checked here with field.Type.(*rtype).string.isExported()
-	//       When done, remove the documented limitation of StructOf.
-	r := s[0]
-	switch {
-	case 'A' <= r && r <= 'Z':
-		return true
-	case r == '_' || 'a' <= r && r <= 'z':
-		return false
-	default:
-		panic("reflect.StructOf: creating a struct with UTF-8 fields is not supported yet")
-	}
-}
-
 // typeptrdata returns the length in bytes of the prefix of t
 // containing pointer data. Anything after this offset is scalar data.
 // keep in sync with ../cmd/compile/internal/gc/reflect.go
@@ -2779,7 +2782,7 @@ func ArrayOf(count int, elem Type) Type {
 	}
 
 	// Look in known types.
-	s := "[" + strconv.Itoa(count) + "]" + typ.string
+	s := "[" + strconv.Itoa(count) + "]" + typ.String()
 	for _, tt := range typesByString(s) {
 		array := (*arrayType)(unsafe.Pointer(tt))
 		if array.elem == typ {
@@ -2792,7 +2795,7 @@ func ArrayOf(count int, elem Type) Type {
 	prototype := *(**arrayType)(unsafe.Pointer(&iarray))
 	array := new(arrayType)
 	*array = *prototype
-	array.string = s
+	array.str = resolveReflectName(newName(s, "", "", false))
 	array.hash = fnv1(typ.hash, '[')
 	for n := uint32(count); n > 0; n >>= 8 {
 		array.hash = fnv1(array.hash, byte(n))
@@ -3046,11 +3049,11 @@ func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uin
 
 	var s string
 	if rcvr != nil {
-		s = "methodargs(" + rcvr.string + ")(" + t.string + ")"
+		s = "methodargs(" + rcvr.String() + ")(" + t.String() + ")"
 	} else {
-		s = "funcargs(" + t.string + ")"
+		s = "funcargs(" + t.String() + ")"
 	}
-	x.string = s
+	x.str = resolveReflectName(newName(s, "", "", false))
 
 	// cache result for future callers
 	if layoutCache.m == nil {
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
index 7aacc8cf9b..66943495b5 100644
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -146,7 +146,7 @@ func interhash(p unsafe.Pointer, h uintptr) uintptr {
 	t := tab._type
 	fn := t.alg.hash
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + t._string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -163,7 +163,7 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
 	}
 	fn := t.alg.hash
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + t._string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -221,7 +221,7 @@ func efaceeq(x, y eface) bool {
 	}
 	eq := t.alg.equal
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + t._string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return eq(noescape(unsafe.Pointer(&x.data)), noescape(unsafe.Pointer(&y.data)))
@@ -239,7 +239,7 @@ func ifaceeq(x, y iface) bool {
 	t := xtab._type
 	eq := t.alg.equal
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + t._string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return eq(noescape(unsafe.Pointer(&x.data)), noescape(unsafe.Pointer(&y.data)))
diff --git a/src/runtime/error.go b/src/runtime/error.go
index 15f6bdf014..0238c5e592 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -67,7 +67,7 @@ type stringer interface {
 
 func typestring(x interface{}) string {
 	e := efaceOf(&x)
-	return e._type._string
+	return e._type.string()
 }
 
 // For calling from C.
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index adfd660847..1db29d7cb4 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -184,7 +184,7 @@ func dumptype(t *_type) {
 	dumpint(uint64(uintptr(unsafe.Pointer(t))))
 	dumpint(uint64(t.size))
 	if x := t.uncommon(); x == nil || x.pkgpath.name() == "" {
-		dumpstr(t._string)
+		dumpstr(t.string())
 	} else {
 		pkgpathstr := x.pkgpath.name()
 		pkgpath := stringStructOf(&pkgpathstr)
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 352ff77465..007c1ed174 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -38,7 +38,7 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
 			return nil
 		}
 		name := inter.typ.nameOff(inter.mhdr[0].name)
-		panic(&TypeAssertionError{"", typ._string, inter.typ._string, name.name()})
+		panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), name.name()})
 	}
 
 	h := itabhash(inter, typ)
@@ -128,7 +128,7 @@ func additab(m *itab, locked, canfail bool) {
 			if locked {
 				unlock(&ifaceLock)
 			}
-			panic(&TypeAssertionError{"", typ._string, inter.typ._string, iname})
+			panic(&TypeAssertionError{"", typ.string(), inter.typ.string(), iname})
 		}
 		m.bad = 1
 		break
@@ -196,18 +196,18 @@ func convT2I(tab *itab, elem unsafe.Pointer, x unsafe.Pointer) (i iface) {
 func panicdottype(have, want, iface *_type) {
 	haveString := ""
 	if have != nil {
-		haveString = have._string
+		haveString = have.string()
 	}
-	panic(&TypeAssertionError{iface._string, haveString, want._string, ""})
+	panic(&TypeAssertionError{iface.string(), haveString, want.string(), ""})
 }
 
 func assertI2T(t *_type, i iface, r unsafe.Pointer) {
 	tab := i.tab
 	if tab == nil {
-		panic(&TypeAssertionError{"", "", t._string, ""})
+		panic(&TypeAssertionError{"", "", t.string(), ""})
 	}
 	if tab._type != t {
-		panic(&TypeAssertionError{tab.inter.typ._string, tab._type._string, t._string, ""})
+		panic(&TypeAssertionError{tab.inter.typ.string(), tab._type.string(), t.string(), ""})
 	}
 	if r != nil {
 		if isDirectIface(t) {
@@ -238,10 +238,10 @@ func assertI2T2(t *_type, i iface, r unsafe.Pointer) bool {
 
 func assertE2T(t *_type, e eface, r unsafe.Pointer) {
 	if e._type == nil {
-		panic(&TypeAssertionError{"", "", t._string, ""})
+		panic(&TypeAssertionError{"", "", t.string(), ""})
 	}
 	if e._type != t {
-		panic(&TypeAssertionError{"", e._type._string, t._string, ""})
+		panic(&TypeAssertionError{"", e._type.string(), t.string(), ""})
 	}
 	if r != nil {
 		if isDirectIface(t) {
@@ -285,7 +285,7 @@ func assertI2E(inter *interfacetype, i iface, r *eface) {
 	tab := i.tab
 	if tab == nil {
 		// explicit conversions require non-nil interface value.
-		panic(&TypeAssertionError{"", "", inter.typ._string, ""})
+		panic(&TypeAssertionError{"", "", inter.typ.string(), ""})
 	}
 	r._type = tab._type
 	r.data = i.data
@@ -322,7 +322,7 @@ func assertI2I(inter *interfacetype, i iface, r *iface) {
 	tab := i.tab
 	if tab == nil {
 		// explicit conversions require non-nil interface value.
-		panic(&TypeAssertionError{"", "", inter.typ._string, ""})
+		panic(&TypeAssertionError{"", "", inter.typ.string(), ""})
 	}
 	if tab.inter == inter {
 		r.tab = tab
@@ -361,7 +361,7 @@ func assertE2I(inter *interfacetype, e eface, r *iface) {
 	t := e._type
 	if t == nil {
 		// explicit conversions require non-nil interface value.
-		panic(&TypeAssertionError{"", "", inter.typ._string, ""})
+		panic(&TypeAssertionError{"", "", inter.typ.string(), ""})
 	}
 	r.tab = getitab(inter, t, false)
 	r.data = e.data
@@ -402,7 +402,7 @@ func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) {
 func assertE2E(inter *interfacetype, e eface, r *eface) {
 	if e._type == nil {
 		// explicit conversions require non-nil interface value.
-		panic(&TypeAssertionError{"", "", inter.typ._string, ""})
+		panic(&TypeAssertionError{"", "", inter.typ.string(), ""})
 	}
 	*r = e
 }
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 685c29066b..f025ce1c68 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -461,11 +461,11 @@ func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
 		throw("runtime: typeBitsBulkBarrier without type")
 	}
 	if typ.size != size {
-		println("runtime: typeBitsBulkBarrier with type ", typ._string, " of size ", typ.size, " but memory size", size)
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if typ.kind&kindGCProg != 0 {
-		println("runtime: typeBitsBulkBarrier with type ", typ._string, " with GC prog")
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if !writeBarrier.needed {
@@ -916,7 +916,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	}
 	if nw == 0 {
 		// No pointers! Caller was supposed to check.
-		println("runtime: invalid type ", typ._string)
+		println("runtime: invalid type ", typ.string())
 		throw("heapBitsSetType: called with non-pointer type")
 		return
 	}
@@ -1100,7 +1100,7 @@ Phase4:
 	if doubleCheck {
 		end := heapBitsForAddr(x + size)
 		if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
-			println("ended at wrong bitmap byte for", typ._string, "x", dataSize/typ.size)
+			println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
 			print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
 			print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
 			h0 := heapBitsForAddr(x)
@@ -1136,7 +1136,7 @@ Phase4:
 				}
 			}
 			if have != want {
-				println("mismatch writing bits for", typ._string, "x", dataSize/typ.size)
+				println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
 				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
 				print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
 				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index f698e72709..e81650d842 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -274,7 +274,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 		throw("runtime.SetFinalizer: first argument is nil")
 	}
 	if etyp.kind&kindMask != kindPtr {
-		throw("runtime.SetFinalizer: first argument is " + etyp._string + ", not pointer")
+		throw("runtime.SetFinalizer: first argument is " + etyp.string() + ", not pointer")
 	}
 	ot := (*ptrtype)(unsafe.Pointer(etyp))
 	if ot.elem == nil {
@@ -328,14 +328,14 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 	}
 
 	if ftyp.kind&kindMask != kindFunc {
-		throw("runtime.SetFinalizer: second argument is " + ftyp._string + ", not a function")
+		throw("runtime.SetFinalizer: second argument is " + ftyp.string() + ", not a function")
 	}
 	ft := (*functype)(unsafe.Pointer(ftyp))
 	if ft.dotdotdot() {
-		throw("runtime.SetFinalizer: cannot pass " + etyp._string + " to finalizer " + ftyp._string + " because dotdotdot")
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string() + " because dotdotdot")
 	}
 	if ft.dotdotdot() || ft.inCount != 1 {
-		throw("runtime.SetFinalizer: cannot pass " + etyp._string + " to finalizer " + ftyp._string)
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 	}
 	fint := ft.in()[0]
 	switch {
@@ -358,7 +358,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 			goto okarg
 		}
 	}
-	throw("runtime.SetFinalizer: cannot pass " + etyp._string + " to finalizer " + ftyp._string)
+	throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 okarg:
 	// compute size needed for return parameters
 	nret := uintptr(0)
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index f3b9b4bc78..c3e4e2cb87 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -624,7 +624,7 @@ func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
 	if typ == nil {
 		print("tracealloc(", p, ", ", hex(size), ")\n")
 	} else {
-		print("tracealloc(", p, ", ", hex(size), ", ", typ._string, ")\n")
+		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
 	}
 	if gp.m.curg == nil || gp == gp.m.curg {
 		goroutineheader(gp)
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 31f7ff81b8..0b28fa6d43 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -8,10 +8,18 @@ package runtime
 
 import "unsafe"
 
-// tflag is documented in ../reflect/type.go.
+// tflag is documented in reflect/type.go.
+//
+// tflag values must be kept in sync with copies in:
+//	cmd/compile/internal/gc/reflect.go
+//	cmd/link/internal/ld/decodesym.go
+//	reflect/type.go
 type tflag uint8
 
-const tflagUncommon tflag = 1
+const (
+	tflagUncommon  tflag = 1 << 0
+	tflagExtraStar tflag = 1 << 1
+)
 
 // Needs to be in sync with ../cmd/compile/internal/ld/decodesym.go:/^func.commonsize,
 // ../cmd/compile/internal/gc/reflect.go:/^func.dcommontype and
@@ -28,8 +36,17 @@ type _type struct {
 	// gcdata stores the GC type data for the garbage collector.
 	// If the KindGCProg bit is set in kind, gcdata is a GC program.
 	// Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
-	gcdata  *byte
-	_string string
+	gcdata *byte
+	str    nameOff
+	_      int32
+}
+
+func (t *_type) string() string {
+	s := t.nameOff(t.str).name()
+	if t.tflag&tflagExtraStar != 0 {
+		return s[1:]
+	}
+	return s
 }
 
 func (t *_type) uncommon() *uncommontype {
@@ -99,33 +116,34 @@ func hasPrefix(s, prefix string) bool {
 }
 
 func (t *_type) name() string {
-	if hasPrefix(t._string, "map[") {
+	s := t.string()
+	if hasPrefix(s, "map[") {
 		return ""
 	}
-	if hasPrefix(t._string, "struct {") {
+	if hasPrefix(s, "struct {") {
 		return ""
 	}
-	if hasPrefix(t._string, "chan ") {
+	if hasPrefix(s, "chan ") {
 		return ""
 	}
-	if hasPrefix(t._string, "chan<-") {
+	if hasPrefix(s, "chan<-") {
 		return ""
 	}
-	if hasPrefix(t._string, "func(") {
+	if hasPrefix(s, "func(") {
 		return ""
 	}
-	switch t._string[0] {
+	switch s[0] {
 	case '[', '*', '<':
 		return ""
 	}
-	i := len(t._string) - 1
+	i := len(s) - 1
 	for i >= 0 {
-		if t._string[i] == '.' {
+		if s[i] == '.' {
 			break
 		}
 		i--
 	}
-	return t._string[i+1:]
+	return s[i+1:]
 }
 
 // reflectOffs holds type offsets defined at run time by the reflect package.
@@ -497,7 +515,7 @@ func typesEqual(t, v *_type) bool {
 	if kind != v.kind&kindMask {
 		return false
 	}
-	if t._string != v._string {
+	if t.string() != v.string() {
 		return false
 	}
 	ut := t.uncommon()
-- 
cgit v1.3


From c165988360457553ccbfa4a09919de3262a4438a Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Thu, 7 Apr 2016 21:37:45 -0400
Subject: cmd/compile, etc: use nameOff in uncommonType

linux/amd64 PIE:
	cmd/go:  -62KB (0.5%)
	jujud:  -550KB (0.7%)

For #6853.

Change-Id: Ieb67982abce5832e24b997506f0ae7108f747108
Reviewed-on: https://go-review.googlesource.com/22371
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/cmd/compile/internal/gc/reflect.go | 15 ++++++---------
 src/cmd/link/internal/ld/decodesym.go  |  6 +++---
 src/cmd/link/internal/ld/symtab.go     |  5 +++++
 src/reflect/type.go                    | 21 +++++++++------------
 src/runtime/heapdump.go                |  4 ++--
 src/runtime/iface.go                   |  2 +-
 src/runtime/type.go                    |  6 ++++--
 7 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index 1643c2ce4b..3cd769fd2d 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -75,7 +75,7 @@ func uncommonSize(t *Type) int { // Sizeof(runtime.uncommontype{})
 	if t.Sym == nil && len(methods(t)) == 0 {
 		return 0
 	}
-	return 2 * Widthptr
+	return 4 + 2 + 2
 }
 
 func makefield(name string, t *Type) *Field {
@@ -463,6 +463,9 @@ func dgopkgpathLSym(s *obj.LSym, ot int, pkg *Pkg) int {
 
 // dgopkgpathOffLSym writes an offset relocation in s at offset ot to the pkg path symbol.
 func dgopkgpathOffLSym(s *obj.LSym, ot int, pkg *Pkg) int {
+	if pkg == nil {
+		return duintxxLSym(s, ot, 0, 4)
+	}
 	if pkg == localpkg && myimportpath == "" {
 		// If we don't know the full import path of the package being compiled
 		// (i.e. -p was not passed on the compiler command line), emit a reference to
@@ -597,12 +600,9 @@ func dextratype(s *Sym, ot int, t *Type, dataAdd int) int {
 		dtypesym(a.type_)
 	}
 
-	ot = dgopkgpath(s, ot, typePkg(t))
+	ot = dgopkgpathOffLSym(Linksym(s), ot, typePkg(t))
 
-	dataAdd += Widthptr + 2 + 2
-	if Widthptr == 8 {
-		dataAdd += 4
-	}
+	dataAdd += 4 + 2 + 2
 	mcount := len(m)
 	if mcount != int(uint16(mcount)) {
 		Fatalf("too many methods on %s: %d", t, mcount)
@@ -613,9 +613,6 @@ func dextratype(s *Sym, ot int, t *Type, dataAdd int) int {
 
 	ot = duint16(s, ot, uint16(mcount))
 	ot = duint16(s, ot, uint16(dataAdd))
-	if Widthptr == 8 {
-		ot = duint32(s, ot, 0) // align for following pointers
-	}
 	return ot
 }
 
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index 330aa6dc13..3ec488bbe8 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -61,7 +61,7 @@ func decode_inuxi(p []byte, sz int) uint64 {
 
 func commonsize() int      { return 4*SysArch.PtrSize + 8 + 8 } // runtime._type
 func structfieldSize() int { return 3 * SysArch.PtrSize }       // runtime.structfield
-func uncommonSize() int    { return 2 * SysArch.PtrSize }       // runtime.uncommontype
+func uncommonSize() int    { return 4 + 2 + 2 }                 // runtime.uncommontype
 
 // Type.commonType.kind
 func decodetype_kind(s *LSym) uint8 {
@@ -361,8 +361,8 @@ func decodetype_methods(s *LSym) []methodsig {
 		// just Sizeof(rtype)
 	}
 
-	mcount := int(decode_inuxi(s.P[off+SysArch.PtrSize:], 2))
-	moff := int(decode_inuxi(s.P[off+SysArch.PtrSize+2:], 2))
+	mcount := int(decode_inuxi(s.P[off+4:], 2))
+	moff := int(decode_inuxi(s.P[off+4+2:], 2))
 	off += moff                // offset to array of reflect.method values
 	const sizeofMethod = 4 * 4 // sizeof reflect.method in program
 	return decode_methodsig(s, off, sizeofMethod, mcount)
diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go
index acc238f698..94a6d0ab29 100644
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@@ -435,6 +435,11 @@ func symtab() {
 				s.Outer = symtype
 			}
 
+		case strings.HasPrefix(s.Name, "go.importpath.") && UseRelro():
+			// Keep go.importpath symbols in the same section as types and
+			// names, as they can be referred to by a section offset.
+			s.Type = obj.STYPERELRO
+
 		case strings.HasPrefix(s.Name, "go.typelink."):
 			ntypelinks++
 			s.Type = obj.STYPELINK
diff --git a/src/reflect/type.go b/src/reflect/type.go
index b1758e6913..ff6ff14c83 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -311,9 +311,9 @@ type method struct {
 // Using a pointer to this struct reduces the overall size required
 // to describe an unnamed type with no methods.
 type uncommonType struct {
-	pkgPath name   // import path; empty for built-in types like int, string
-	mcount  uint16 // number of methods
-	moff    uint16 // offset from this uncommontype to [mcount]method
+	pkgPath nameOff // import path; empty for built-in types like int, string
+	mcount  uint16  // number of methods
+	moff    uint16  // offset from this uncommontype to [mcount]method
 }
 
 // ChanDir represents a channel type's direction.
@@ -613,13 +613,6 @@ func (t *uncommonType) methods() []method {
 	return (*[1 << 16]method)(add(unsafe.Pointer(t), uintptr(t.moff)))[:t.mcount:t.mcount]
 }
 
-func (t *uncommonType) PkgPath() string {
-	if t == nil {
-		return ""
-	}
-	return t.pkgPath.name()
-}
-
 // resolveNameOff resolves a name offset from a base pointer.
 // The (*rtype).nameOff method is a convenience wrapper for this function.
 // Implemented in the runtime package.
@@ -799,7 +792,7 @@ func (t *rtype) Method(i int) (m Method) {
 	if !pname.isExported() {
 		m.PkgPath = pname.pkgPath()
 		if m.PkgPath == "" {
-			m.PkgPath = ut.pkgPath.name()
+			m.PkgPath = t.nameOff(ut.pkgPath).name()
 		}
 		fl |= flagStickyRO
 	}
@@ -846,7 +839,11 @@ func (t *rtype) MethodByName(name string) (m Method, ok bool) {
 }
 
 func (t *rtype) PkgPath() string {
-	return t.uncommon().PkgPath()
+	ut := t.uncommon()
+	if ut == nil {
+		return ""
+	}
+	return t.nameOff(ut.pkgPath).name()
 }
 
 func hasPrefix(s, prefix string) bool {
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 1db29d7cb4..0afab09095 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -183,10 +183,10 @@ func dumptype(t *_type) {
 	dumpint(tagType)
 	dumpint(uint64(uintptr(unsafe.Pointer(t))))
 	dumpint(uint64(t.size))
-	if x := t.uncommon(); x == nil || x.pkgpath.name() == "" {
+	if x := t.uncommon(); x == nil || t.nameOff(x.pkgpath).name() == "" {
 		dumpstr(t.string())
 	} else {
-		pkgpathstr := x.pkgpath.name()
+		pkgpathstr := t.nameOff(x.pkgpath).name()
 		pkgpath := stringStructOf(&pkgpathstr)
 		namestr := t.name()
 		name := stringStructOf(&namestr)
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 007c1ed174..b57d1cc63c 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -112,7 +112,7 @@ func additab(m *itab, locked, canfail bool) {
 			if typ.typeOff(t.mtyp) == itype && tname.name() == iname {
 				pkgPath := tname.pkgPath()
 				if pkgPath == "" {
-					pkgPath = x.pkgpath.name()
+					pkgPath = typ.nameOff(x.pkgpath).name()
 				}
 				if tname.isExported() || pkgPath == ipkg {
 					if m != nil {
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 0b28fa6d43..9e4c40553a 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -304,7 +304,7 @@ type method struct {
 }
 
 type uncommontype struct {
-	pkgpath name
+	pkgpath nameOff
 	mcount  uint16 // number of methods
 	moff    uint16 // offset from this uncommontype to [mcount]method
 }
@@ -524,7 +524,9 @@ func typesEqual(t, v *_type) bool {
 		if ut == nil || uv == nil {
 			return false
 		}
-		if ut.pkgpath.name() != uv.pkgpath.name() {
+		pkgpatht := t.nameOff(ut.pkgpath).name()
+		pkgpathv := v.nameOff(uv.pkgpath).name()
+		if pkgpatht != pkgpathv {
 			return false
 		}
 	}
-- 
cgit v1.3


From 32302d6289e9721015d5d7ac99bbce30de47746c Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Fri, 22 Apr 2016 07:08:13 -0700
Subject: runtime/cgo: use normal libinit on PPC GNU/Linux

The special case was because PPC did not support external linking, but
now it does.

Fixes #10410.

Change-Id: I9b024686e0f03da7a44c1c59b41c529802f16ab0
Reviewed-on: https://go-review.googlesource.com/22372
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Reviewed-by: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/cgo/gcc_libinit.c              |  1 -
 src/runtime/cgo/gcc_libinit_linux_ppc64x.c | 26 --------------------------
 2 files changed, 27 deletions(-)
 delete mode 100644 src/runtime/cgo/gcc_libinit_linux_ppc64x.c

(limited to 'src/runtime')

diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c
index bdbaa2973c..06b9557709 100644
--- a/src/runtime/cgo/gcc_libinit.c
+++ b/src/runtime/cgo/gcc_libinit.c
@@ -4,7 +4,6 @@
 
 // +build cgo
 // +build darwin dragonfly freebsd linux netbsd solaris
-// +build !ppc64,!ppc64le
 
 #include <pthread.h>
 #include <stdio.h>
diff --git a/src/runtime/cgo/gcc_libinit_linux_ppc64x.c b/src/runtime/cgo/gcc_libinit_linux_ppc64x.c
deleted file mode 100644
index c133142f93..0000000000
--- a/src/runtime/cgo/gcc_libinit_linux_ppc64x.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// TODO: see issue #10410
-// +build linux
-// +build ppc64 ppc64le
-
-#include <stdio.h>
-#include <stdlib.h>
-
-void
-x_cgo_sys_thread_create(void* (*func)(void*), void* arg) {
-	fprintf(stderr, "x_cgo_sys_thread_create not implemented");
-	abort();
-}
-
-void
-_cgo_wait_runtime_init_done() {
-	// TODO(spetrovic): implement this method.
-}
-
-void
-x_cgo_notify_runtime_init_done(void* dummy) {
-	// TODO(spetrovic): implement this method.
-}
\ No newline at end of file
-- 
cgit v1.3


From 2d342fba78d9cbddb4c8c71bfc0d1044b2e5c58a Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 22 Apr 2016 22:48:11 +0200
Subject: runtime: fix description of trace events

Change-Id: I037101b1921fe151695d32e9874b50dd64982298
Reviewed-on: https://go-review.googlesource.com/22314
Reviewed-by: Austin Clements <austin@google.com>
---
 src/internal/trace/parser.go | 8 ++++----
 src/runtime/trace.go         | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/runtime')

diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index 3099b0ffeb..d279ddeacf 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -815,7 +815,7 @@ const (
 	EvGCScanDone     = 10 // GC scan done [timestamp]
 	EvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	EvGCSweepDone    = 12 // GC sweep done [timestamp]
-	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new start id, stack id]
+	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
 	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
 	EvGoEnd          = 15 // goroutine ends [timestamp]
 	EvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
@@ -833,9 +833,9 @@ const (
 	EvGoSysCall      = 28 // syscall enter [timestamp, stack]
 	EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
 	EvGoSysBlock     = 30 // syscall blocks [timestamp]
-	EvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [goroutine id]
-	EvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [goroutine id]
-	EvHeapAlloc      = 33 // memstats.heap_alloc change [timestamp, heap_alloc]
+	EvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+	EvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+	EvHeapAlloc      = 33 // memstats.heap_live change [timestamp, heap_alloc]
 	EvNextGC         = 34 // memstats.next_gc change [timestamp, next_gc]
 	EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	EvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index dcf534549a..06fbdfac94 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -33,7 +33,7 @@ const (
 	traceEvGCScanDone     = 10 // GC scan done [timestamp]
 	traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
-	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new start id, stack id]
+	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
 	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
 	traceEvGoEnd          = 15 // goroutine ends [timestamp]
 	traceEvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
@@ -51,8 +51,8 @@ const (
 	traceEvGoSysCall      = 28 // syscall enter [timestamp, stack]
 	traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
 	traceEvGoSysBlock     = 30 // syscall blocks [timestamp]
-	traceEvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [goroutine id]
-	traceEvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [goroutine id]
+	traceEvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+	traceEvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
 	traceEvHeapAlloc      = 33 // memstats.heap_live change [timestamp, heap_alloc]
 	traceEvNextGC         = 34 // memstats.next_gc change [timestamp, next_gc]
 	traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
-- 
cgit v1.3


From a3703618eadeb74b60f2cb9a23fabe178d4b141d Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 5 Apr 2016 15:29:14 +0200
Subject: runtime: use per-goroutine sequence numbers in tracer

Currently tracer uses global sequencer and it introduces
significant slowdown on parallel machines (up to 10x).
Replace the global sequencer with per-goroutine sequencer.

If we assign per-goroutine sequence numbers to only 3 types
of events (start, unblock and syscall exit), it is enough to
restore consistent partial ordering of all events. Even these
events don't need sequence numbers all the time (if goroutine
starts on the same P where it was unblocked, then start does
not need sequence number).
The burden of restoring the order is put on trace parser.
Details of the algorithm are described in the comments.

On http benchmark with GOMAXPROCS=48:
no tracing: 5026 ns/op
tracing: 27803 ns/op (+453%)
with this change: 6369 ns/op (+26%, mostly for traceback)

Also trace size is reduced by ~22%. Average event size before: 4.63
bytes/event, after: 3.62 bytes/event.

Besides running trace tests, I've also tested with manually broken
cputicks (random skew for each event, per-P skew and episodic random skew).
In all cases broken timestamps were detected and no test failures.

Change-Id: I078bde421ccc386a66f6c2051ab207bcd5613efa
Reviewed-on: https://go-review.googlesource.com/21512
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/internal/trace/order.go       | 278 ++++++++++++++++++++++++++++++++++++++
 src/internal/trace/parser.go      | 208 +++++++++++++++-------------
 src/internal/trace/parser_test.go |   8 +-
 src/runtime/proc.go               |  25 +---
 src/runtime/runtime2.go           |  21 +--
 src/runtime/trace.go              | 121 +++++++++--------
 6 files changed, 478 insertions(+), 183 deletions(-)
 create mode 100644 src/internal/trace/order.go

(limited to 'src/runtime')

diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go
new file mode 100644
index 0000000000..f9ec44c745
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,278 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"fmt"
+	"sort"
+)
+
+type eventBatch struct {
+	events   []*Event
+	selected bool
+}
+
+type orderEvent struct {
+	ev    *Event
+	batch int
+	g     uint64
+	init  gState
+	next  gState
+}
+
+type gStatus int
+
+type gState struct {
+	seq    uint64
+	status gStatus
+}
+
+const (
+	gDead gStatus = iota
+	gRunnable
+	gRunning
+	gWaiting
+
+	unordered = ^uint64(0)
+	garbage   = ^uint64(0) - 1
+	noseq     = ^uint64(0)
+	seqinc    = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+	pending := 0
+	var batches []*eventBatch
+	for _, v := range m {
+		pending += len(v)
+		batches = append(batches, &eventBatch{v, false})
+	}
+	gs := make(map[uint64]gState)
+	var frontier []orderEvent
+	for ; pending != 0; pending-- {
+		for i, b := range batches {
+			if b.selected || len(b.events) == 0 {
+				continue
+			}
+			ev := b.events[0]
+			g, init, next := stateTransition(ev)
+			if !transitionReady(g, gs[g], init) {
+				continue
+			}
+			frontier = append(frontier, orderEvent{ev, i, g, init, next})
+			b.events = b.events[1:]
+			b.selected = true
+			// Get rid of "Local" events, they are intended merely for ordering.
+			switch ev.Type {
+			case EvGoStartLocal:
+				ev.Type = EvGoStart
+			case EvGoUnblockLocal:
+				ev.Type = EvGoUnblock
+			case EvGoSysExitLocal:
+				ev.Type = EvGoSysExit
+			}
+		}
+		if len(frontier) == 0 {
+			return nil, fmt.Errorf("no consistent ordering of events possible")
+		}
+		sort.Sort(orderEventList(frontier))
+		f := frontier[0]
+		frontier[0] = frontier[len(frontier)-1]
+		frontier = frontier[:len(frontier)-1]
+		events = append(events, f.ev)
+		transition(gs, f.g, f.init, f.next)
+		if !batches[f.batch].selected {
+			panic("frontier batch is not selected")
+		}
+		batches[f.batch].selected = false
+	}
+
+	// At this point we have a consistent stream of events.
+	// Make sure time stamps respect the ordering.
+	// The tests will skip (not fail) the test case if they see this error.
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+
+	// The last part is giving correct timestamps to EvGoSysExit events.
+	// The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+	// is potentially acquired long before event emission. So far we've used
+	// timestamp of event emission (ev.Ts).
+	// We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+	// seemingly broken timestamps (misplaced event).
+	// We also can't simply update the timestamp and resort events, because
+	// if timestamps are broken we will misplace the event and later report
+	// logically broken trace (instead of reporting broken timestamps).
+	lastSysBlock := make(map[uint64]int64)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoSysBlock, EvGoInSyscall:
+			lastSysBlock[ev.G] = ev.Ts
+		case EvGoSysExit:
+			ts := int64(ev.Args[2])
+			if ts == 0 {
+				continue
+			}
+			block := lastSysBlock[ev.G]
+			if block == 0 {
+				return nil, fmt.Errorf("stray syscall exit")
+			}
+			if ts < block {
+				return nil, ErrTimeOrder
+			}
+			ev.Ts = ts
+		}
+	}
+	sort.Sort(eventList(events))
+
+	return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+	switch ev.Type {
+	case EvGoCreate:
+		g = ev.Args[0]
+		init = gState{0, gDead}
+		next = gState{1, gRunnable}
+	case EvGoWaiting, EvGoInSyscall:
+		g = ev.G
+		init = gState{1, gRunnable}
+		next = gState{2, gWaiting}
+	case EvGoStart:
+		g = ev.G
+		init = gState{ev.Args[1], gRunnable}
+		next = gState{ev.Args[1] + 1, gRunning}
+	case EvGoStartLocal:
+		// noseq means that this event is ready for merging as soon as
+		// frontier reaches it (EvGoStartLocal is emitted on the same P
+		// as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+		// is already merged).
+		// seqinc is a stub for cases when event increments g sequence,
+		// but since we don't know current seq we also don't know next seq.
+		g = ev.G
+		init = gState{noseq, gRunnable}
+		next = gState{seqinc, gRunning}
+	case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+		EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep, EvGoSysBlock:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gWaiting}
+	case EvGoSched, EvGoPreempt:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gRunnable}
+	case EvGoUnblock, EvGoSysExit:
+		g = ev.Args[0]
+		init = gState{ev.Args[1], gWaiting}
+		next = gState{ev.Args[1] + 1, gRunnable}
+	case EvGoUnblockLocal, EvGoSysExitLocal:
+		g = ev.Args[0]
+		init = gState{noseq, gWaiting}
+		next = gState{seqinc, gRunnable}
+	case EvGCStart:
+		g = garbage
+		init = gState{ev.Args[0], gDead}
+		next = gState{ev.Args[0] + 1, gDead}
+	default:
+		// no ordering requirements
+		g = unordered
+	}
+	return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+	return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+	if g == unordered {
+		return
+	}
+	curr := gs[g]
+	if !transitionReady(g, curr, init) {
+		panic("event sequences are broken")
+	}
+	switch next.seq {
+	case noseq:
+		next.seq = curr.seq
+	case seqinc:
+		next.seq = curr.seq + 1
+	}
+	gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+	for _, batch := range m {
+		events = append(events, batch...)
+	}
+	for _, ev := range events {
+		if ev.Type == EvGoSysExit {
+			// EvGoSysExit emission is delayed until the thread has a P.
+			// Give it the real sequence number and time stamp.
+			ev.seq = int64(ev.Args[1])
+			if ev.Args[2] != 0 {
+				ev.Ts = int64(ev.Args[2])
+			}
+		}
+	}
+	sort.Sort(eventSeqList(events))
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+	return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+	return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+	return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+	return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+	return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+	return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+	return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index d279ddeacf..e6f29445c1 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"os"
 	"os/exec"
-	"sort"
 	"strconv"
 	"strings"
 )
@@ -135,7 +134,12 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 		}
 		off += n
 		typ := buf[0] << 2 >> 2
-		narg := buf[0] >> 6
+		narg := buf[0]>>6 + 1
+		inlineArgs := byte(4)
+		if ver < 1007 {
+			narg++
+			inlineArgs++
+		}
 		if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
 			err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
 			return
@@ -180,8 +184,8 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 			continue
 		}
 		ev := rawEvent{typ: typ, off: off0}
-		if narg < 3 {
-			for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
+		if narg < inlineArgs {
+			for i := 0; i < int(narg); i++ {
 				var v uint64
 				v, off, err = readVal(r, off)
 				if err != nil {
@@ -191,7 +195,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 				ev.args = append(ev.args, v)
 			}
 		} else {
-			// If narg == 3, the first value is length of the event in bytes.
+			// More than inlineArgs args, the first value is length of the event in bytes.
 			var v uint64
 			v, off, err = readVal(r, off)
 			if err != nil {
@@ -250,34 +254,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 	var lastP int
 	lastGs := make(map[int]uint64) // last goroutine running on P
 	stacks = make(map[uint64][]*Frame)
+	batches := make(map[int][]*Event) // events by P
 	for _, raw := range rawEvents {
 		desc := EventDescriptions[raw.typ]
 		if desc.Name == "" {
 			err = fmt.Errorf("missing description for event type %v", raw.typ)
 			return
 		}
-		if raw.typ != EvStack {
-			narg := len(desc.Args)
-			if desc.Stack {
-				narg++
-			}
-			if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
-				narg++ // sequence number
-				narg++ // timestamp
-			}
-			if len(raw.args) != narg {
-				err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
-					desc.Name, raw.off, narg, len(raw.args))
-				return
-			}
+		narg := argNum(raw, ver)
+		if len(raw.args) != narg {
+			err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+				desc.Name, raw.off, narg, len(raw.args))
+			return
 		}
 		switch raw.typ {
 		case EvBatch:
 			lastGs[lastP] = lastG
 			lastP = int(raw.args[0])
 			lastG = lastGs[lastP]
-			lastSeq = int64(raw.args[1])
-			lastTs = int64(raw.args[2])
+			if ver < 1007 {
+				lastSeq = int64(raw.args[1])
+				lastTs = int64(raw.args[2])
+			} else {
+				lastTs = int64(raw.args[1])
+			}
 		case EvFrequency:
 			ticksPerSec = int64(raw.args[0])
 			if ticksPerSec <= 0 {
@@ -328,18 +328,26 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 			}
 		default:
 			e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
-			e.seq = lastSeq + int64(raw.args[0])
-			e.Ts = lastTs + int64(raw.args[1])
-			lastSeq = e.seq
-			lastTs = e.Ts
-			for i := range desc.Args {
-				e.Args[i] = raw.args[i+2]
+			var argOffset int
+			if ver < 1007 {
+				e.seq = lastSeq + int64(raw.args[0])
+				e.Ts = lastTs + int64(raw.args[1])
+				lastSeq = e.seq
+				argOffset = 2
+			} else {
+				e.Ts = lastTs + int64(raw.args[0])
+				argOffset = 1
 			}
-			if desc.Stack {
-				e.StkID = raw.args[len(desc.Args)+2]
+			lastTs = e.Ts
+			for i := argOffset; i < narg; i++ {
+				if i == narg-1 && desc.Stack {
+					e.StkID = raw.args[i]
+				} else {
+					e.Args[i-argOffset] = raw.args[i]
+				}
 			}
 			switch raw.typ {
-			case EvGoStart:
+			case EvGoStart, EvGoStartLocal:
 				lastG = e.Args[0]
 				e.G = lastG
 			case EvGCStart, EvGCDone, EvGCScanStart, EvGCScanDone:
@@ -349,28 +357,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 				EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
 				EvGoSysBlock:
 				lastG = 0
-			case EvGoSysExit:
-				// EvGoSysExit emission is delayed until the thread has a P.
-				// Give it the real sequence number and time stamp.
-				e.seq = int64(e.Args[1])
-				if e.Args[2] != 0 {
-					e.Ts = int64(e.Args[2])
-				}
+			case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+				e.G = e.Args[0]
 			}
-			events = append(events, e)
+			batches[lastP] = append(batches[lastP], e)
 		}
 	}
-	if len(events) == 0 {
+	if len(batches) == 0 {
 		err = fmt.Errorf("trace is empty")
 		return
 	}
-
-	// Sort by sequence number and translate cpu ticks to real time.
-	sort.Sort(eventList(events))
 	if ticksPerSec == 0 {
 		err = fmt.Errorf("no EvFrequency event")
 		return
 	}
+	if ver < 1007 {
+		events, err = order1005(batches)
+	} else {
+		events, err = order1007(batches)
+	}
+	if err != nil {
+		return
+	}
+
+	// Translate cpu ticks to real time.
 	minTs := events[0].Ts
 	// Use floating point to avoid integer overflows.
 	freq := 1e9 / float64(ticksPerSec)
@@ -382,7 +392,6 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 		}
 		if ev.Type == EvGoSysExit {
 			ev.P = SyscallP
-			ev.G = ev.Args[0]
 		}
 	}
 
@@ -543,19 +552,15 @@ func postProcessTrace(ver int, events []*Event) error {
 			p.evSweep.Link = ev
 			p.evSweep = nil
 		case EvGoWaiting:
-			g1 := gs[ev.Args[0]]
-			if g1.state != gRunnable {
-				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
 			}
-			g1.state = gWaiting
-			gs[ev.Args[0]] = g1
+			g.state = gWaiting
 		case EvGoInSyscall:
-			g1 := gs[ev.Args[0]]
-			if g1.state != gRunnable {
-				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
 			}
-			g1.state = gWaiting
-			gs[ev.Args[0]] = g1
+			g.state = gWaiting
 		case EvGoCreate:
 			if err := checkRunning(p, g, ev, true); err != nil {
 				return err
@@ -666,18 +671,6 @@ func postProcessTrace(ver int, events []*Event) error {
 	// TODO(dvyukov): restore stacks for EvGoStart events.
 	// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
 
-	// Last, after all the other consistency checks,
-	// make sure time stamps respect sequence numbers.
-	// The tests will skip (not fail) the test case if they see this error,
-	// so check everything else that could possibly be wrong first.
-	lastTs := int64(0)
-	for _, ev := range events {
-		if ev.Ts < lastTs {
-			return ErrTimeOrder
-		}
-		lastTs = ev.Ts
-	}
-
 	return nil
 }
 
@@ -773,30 +766,51 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
 	return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
 }
 
-type eventList []*Event
-
-func (l eventList) Len() int {
-	return len(l)
-}
-
-func (l eventList) Less(i, j int) bool {
-	return l[i].seq < l[j].seq
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+	for _, ev := range events {
+		PrintEvent(ev)
+	}
 }
 
-func (l eventList) Swap(i, j int) {
-	l[i], l[j] = l[j], l[i]
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+	desc := EventDescriptions[ev.Type]
+	fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+	for i, a := range desc.Args {
+		fmt.Printf(" %v=%v", a, ev.Args[i])
+	}
+	fmt.Printf("\n")
 }
 
-// Print dumps events to stdout. For debugging.
-func Print(events []*Event) {
-	for _, ev := range events {
-		desc := EventDescriptions[ev.Type]
-		fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
-		for i, a := range desc.Args {
-			fmt.Printf(" %v=%v", a, ev.Args[i])
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+	desc := EventDescriptions[raw.typ]
+	if raw.typ == EvStack {
+		return len(raw.args)
+	}
+	narg := len(desc.Args)
+	if desc.Stack {
+		narg++
+	}
+	switch raw.typ {
+	case EvBatch, EvFrequency, EvTimerGoroutine:
+		if ver < 1007 {
+			narg++ // there was an unused arg before 1.7
+		}
+	case EvGCStart, EvGoStart, EvGoUnblock:
+		if ver < 1007 {
+			narg-- // 1.7 added an additional seq arg
+		}
+		fallthrough
+	default:
+		narg++ // timestamp
+		if ver < 1007 {
+			narg++ // sequence
 		}
-		fmt.Printf("\n")
 	}
+	return narg
 }
 
 // Event types in the trace.
@@ -809,21 +823,21 @@ const (
 	EvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	EvProcStart      = 5  // start of P [timestamp, thread id]
 	EvProcStop       = 6  // stop of P [timestamp]
-	EvGCStart        = 7  // GC start [timestamp, stack id]
+	EvGCStart        = 7  // GC start [timestamp, seq, stack id]
 	EvGCDone         = 8  // GC done [timestamp]
 	EvGCScanStart    = 9  // GC scan start [timestamp]
 	EvGCScanDone     = 10 // GC scan done [timestamp]
 	EvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	EvGCSweepDone    = 12 // GC sweep done [timestamp]
 	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
 	EvGoEnd          = 15 // goroutine ends [timestamp]
 	EvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
 	EvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
 	EvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
 	EvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
 	EvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-	EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+	EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
 	EvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
 	EvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
 	EvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -831,7 +845,7 @@ const (
 	EvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
 	EvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
 	EvGoSysCall      = 28 // syscall enter [timestamp, stack]
-	EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+	EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
 	EvGoSysBlock     = 30 // syscall blocks [timestamp]
 	EvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
 	EvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -840,7 +854,10 @@ const (
 	EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	EvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
 	EvString         = 37 // string dictionary entry [ID, length, string]
-	EvCount          = 38
+	EvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	EvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	EvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	EvCount          = 41
 )
 
 var EventDescriptions = [EvCount]struct {
@@ -850,27 +867,27 @@ var EventDescriptions = [EvCount]struct {
 	Args       []string
 }{
 	EvNone:           {"None", 1005, false, []string{}},
-	EvBatch:          {"Batch", 1005, false, []string{"p", "seq", "ticks"}},
-	EvFrequency:      {"Frequency", 1005, false, []string{"freq", "unused"}},
+	EvBatch:          {"Batch", 1005, false, []string{"p", "ticks"}}, // in 1.5 format it was {"p", "seq", "ticks"}
+	EvFrequency:      {"Frequency", 1005, false, []string{"freq"}},   // in 1.5 format it was {"freq", "unused"}
 	EvStack:          {"Stack", 1005, false, []string{"id", "siz"}},
 	EvGomaxprocs:     {"Gomaxprocs", 1005, true, []string{"procs"}},
 	EvProcStart:      {"ProcStart", 1005, false, []string{"thread"}},
 	EvProcStop:       {"ProcStop", 1005, false, []string{}},
-	EvGCStart:        {"GCStart", 1005, true, []string{}},
+	EvGCStart:        {"GCStart", 1005, true, []string{"seq"}}, // in 1.5 format it was {}
 	EvGCDone:         {"GCDone", 1005, false, []string{}},
 	EvGCScanStart:    {"GCScanStart", 1005, false, []string{}},
 	EvGCScanDone:     {"GCScanDone", 1005, false, []string{}},
 	EvGCSweepStart:   {"GCSweepStart", 1005, true, []string{}},
 	EvGCSweepDone:    {"GCSweepDone", 1005, false, []string{}},
 	EvGoCreate:       {"GoCreate", 1005, true, []string{"g", "stack"}},
-	EvGoStart:        {"GoStart", 1005, false, []string{"g"}},
+	EvGoStart:        {"GoStart", 1005, false, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
 	EvGoEnd:          {"GoEnd", 1005, false, []string{}},
 	EvGoStop:         {"GoStop", 1005, true, []string{}},
 	EvGoSched:        {"GoSched", 1005, true, []string{}},
 	EvGoPreempt:      {"GoPreempt", 1005, true, []string{}},
 	EvGoSleep:        {"GoSleep", 1005, true, []string{}},
 	EvGoBlock:        {"GoBlock", 1005, true, []string{}},
-	EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g"}},
+	EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
 	EvGoBlockSend:    {"GoBlockSend", 1005, true, []string{}},
 	EvGoBlockRecv:    {"GoBlockRecv", 1005, true, []string{}},
 	EvGoBlockSelect:  {"GoBlockSelect", 1005, true, []string{}},
@@ -884,7 +901,10 @@ var EventDescriptions = [EvCount]struct {
 	EvGoInSyscall:    {"GoInSyscall", 1005, false, []string{"g"}},
 	EvHeapAlloc:      {"HeapAlloc", 1005, false, []string{"mem"}},
 	EvNextGC:         {"NextGC", 1005, false, []string{"mem"}},
-	EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g", "unused"}},
+	EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g"}}, // in 1.5 format it was {"g", "unused"}
 	EvFutileWakeup:   {"FutileWakeup", 1005, false, []string{}},
 	EvString:         {"String", 1007, false, []string{}},
+	EvGoStartLocal:   {"GoStartLocal", 1007, false, []string{"g"}},
+	EvGoUnblockLocal: {"GoUnblockLocal", 1007, true, []string{"g"}},
+	EvGoSysExitLocal: {"GoSysExitLocal", 1007, false, []string{"g", "ts"}},
 }
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
index 337d5a85d7..340f106484 100644
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -89,10 +89,10 @@ func TestParseVersion(t *testing.T) {
 func TestTimestampOverflow(t *testing.T) {
 	// Test that parser correctly handles large timestamps (long tracing).
 	w := newWriter()
-	w.emit(EvBatch, 0, 0, 0)
-	w.emit(EvFrequency, 1e9, 0)
+	w.emit(EvBatch, 0, 0)
+	w.emit(EvFrequency, 1e9)
 	for ts := uint64(1); ts < 1e16; ts *= 2 {
-		w.emit(EvGoCreate, 1, ts, ts, 1, 0)
+		w.emit(EvGoCreate, ts, ts, 0, 0)
 	}
 	if _, err := Parse(w, ""); err != nil {
 		t.Fatalf("failed to parse: %v", err)
@@ -110,7 +110,7 @@ func newWriter() *writer {
 }
 
 func (w *writer) emit(typ byte, args ...uint64) {
-	nargs := byte(len(args)) - 2
+	nargs := byte(len(args)) - 1
 	if nargs > 3 {
 		nargs = 3
 	}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 98a986cd63..a847823da4 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1796,23 +1796,7 @@ func execute(gp *g, inheritTime bool) {
 		// GoSysExit has to happen when we have a P, but before GoStart.
 		// So we emit it here.
 		if gp.syscallsp != 0 && gp.sysblocktraced {
-			// Since gp.sysblocktraced is true, we must emit an event.
-			// There is a race between the code that initializes sysexitseq
-			// and sysexitticks (in exitsyscall, which runs without a P,
-			// and therefore is not stopped with the rest of the world)
-			// and the code that initializes a new trace.
-			// The recorded sysexitseq and sysexitticks must therefore
-			// be treated as "best effort". If they are valid for this trace,
-			// then great, use them for greater accuracy.
-			// But if they're not valid for this trace, assume that the
-			// trace was started after the actual syscall exit (but before
-			// we actually managed to start the goroutine, aka right now),
-			// and assign a fresh time stamp to keep the log consistent.
-			seq, ts := gp.sysexitseq, gp.sysexitticks
-			if seq == 0 || int64(seq)-int64(trace.seqStart) < 0 {
-				seq, ts = tracestamp()
-			}
-			traceGoSysExit(seq, ts)
+			traceGoSysExit(gp.sysexitticks)
 		}
 		traceGoStart()
 	}
@@ -2481,7 +2465,6 @@ func exitsyscall(dummy int32) {
 	}
 
 	_g_.sysexitticks = 0
-	_g_.sysexitseq = 0
 	if trace.enabled {
 		// Wait till traceGoSysBlock event is emitted.
 		// This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -2492,7 +2475,7 @@ func exitsyscall(dummy int32) {
 		// Tracing code can invoke write barriers that cannot run without a P.
 		// So instead we remember the syscall exit time and emit the event
 		// in execute when we have a P.
-		_g_.sysexitseq, _g_.sysexitticks = tracestamp()
+		_g_.sysexitticks = cputicks()
 	}
 
 	_g_.m.locks--
@@ -2540,7 +2523,7 @@ func exitsyscallfast() bool {
 					// Denote blocking of the new syscall.
 					traceGoSysBlock(_g_.m.p.ptr())
 					// Denote completion of the current syscall.
-					traceGoSysExit(tracestamp())
+					traceGoSysExit(0)
 				})
 			}
 			_g_.m.p.ptr().syscalltick++
@@ -2564,7 +2547,7 @@ func exitsyscallfast() bool {
 						osyield()
 					}
 				}
-				traceGoSysExit(tracestamp())
+				traceGoSysExit(0)
 			}
 		})
 		if ok {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0fdea400de..8cfe6b06e6 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -332,16 +332,17 @@ type g struct {
 	waitsince      int64  // approx time when the g become blocked
 	waitreason     string // if status==Gwaiting
 	schedlink      guintptr
-	preempt        bool   // preemption signal, duplicates stackguard0 = stackpreempt
-	paniconfault   bool   // panic (instead of crash) on unexpected fault address
-	preemptscan    bool   // preempted g does scan for gc
-	gcscandone     bool   // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool   // false at start of gc cycle, true if G has not run since last scan
-	throwsplit     bool   // must not split stack
-	raceignore     int8   // ignore race detection events
-	sysblocktraced bool   // StartTrace has emitted EvGoInSyscall about this goroutine
-	sysexitticks   int64  // cputicks when syscall has returned (for tracing)
-	sysexitseq     uint64 // trace seq when syscall has returned (for tracing)
+	preempt        bool     // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault   bool     // panic (instead of crash) on unexpected fault address
+	preemptscan    bool     // preempted g does scan for gc
+	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
+	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan
+	throwsplit     bool     // must not split stack
+	raceignore     int8     // ignore race detection events
+	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
+	sysexitticks   int64    // cputicks when syscall has returned (for tracing)
+	traceseq       uint64   // trace event sequencer
+	tracelastp     puintptr // last P emitted an event for this goroutine
 	lockedm        *m
 	sig            uint32
 	writebuf       []byte
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 06fbdfac94..092f941f0c 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -13,7 +13,6 @@
 package runtime
 
 import (
-	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -27,21 +26,21 @@ const (
 	traceEvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	traceEvProcStart      = 5  // start of P [timestamp, thread id]
 	traceEvProcStop       = 6  // stop of P [timestamp]
-	traceEvGCStart        = 7  // GC start [timestamp, stack id]
+	traceEvGCStart        = 7  // GC start [timestamp, seq, stack id]
 	traceEvGCDone         = 8  // GC done [timestamp]
 	traceEvGCScanStart    = 9  // GC scan start [timestamp]
 	traceEvGCScanDone     = 10 // GC scan done [timestamp]
 	traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
 	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
 	traceEvGoEnd          = 15 // goroutine ends [timestamp]
 	traceEvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
 	traceEvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
 	traceEvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
 	traceEvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
 	traceEvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-	traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+	traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
 	traceEvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
 	traceEvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
 	traceEvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -49,7 +48,7 @@ const (
 	traceEvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
 	traceEvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
 	traceEvGoSysCall      = 28 // syscall enter [timestamp, stack]
-	traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+	traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
 	traceEvGoSysBlock     = 30 // syscall blocks [timestamp]
 	traceEvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
 	traceEvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -58,7 +57,10 @@ const (
 	traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	traceEvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
 	traceEvString         = 37 // string dictionary entry [ID, length, string]
-	traceEvCount          = 38
+	traceEvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	traceEvCount          = 41
 )
 
 const (
@@ -105,6 +107,7 @@ var trace struct {
 	ticksEnd      int64       // cputicks when tracing was stopped
 	timeStart     int64       // nanotime when tracing was started
 	timeEnd       int64       // nanotime when tracing was stopped
+	seqGC         uint64      // GC start/done sequencer
 	reading       traceBufPtr // buffer currently handed off to user
 	empty         traceBufPtr // stack of empty buffers
 	fullHead      traceBufPtr // queue of full buffers
@@ -122,31 +125,9 @@ var trace struct {
 	buf     traceBufPtr // global trace buffer, used when running without a p
 }
 
-var traceseq uint64 // global trace sequence number
-
-// tracestamp returns a consistent sequence number, time stamp pair
-// for use in a trace. We need to make sure that time stamp ordering
-// (assuming synchronized CPUs) and sequence ordering match.
-// To do that, we increment traceseq, grab ticks, and increment traceseq again.
-// We treat odd traceseq as a sign that another thread is in the middle
-// of the sequence and spin until it is done.
-// Not splitting stack to avoid preemption, just in case the call sites
-// that used to call xadd64 and cputicks are sensitive to that.
-//go:nosplit
-func tracestamp() (seq uint64, ts int64) {
-	seq = atomic.Load64(&traceseq)
-	for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) {
-		seq = atomic.Load64(&traceseq)
-	}
-	ts = cputicks()
-	atomic.Store64(&traceseq, seq+2)
-	return seq >> 1, ts
-}
-
 // traceBufHeader is per-P tracing buffer.
 type traceBufHeader struct {
 	link      traceBufPtr             // in trace.empty/full
-	lastSeq   uint64                  // sequence number of last event
 	lastTicks uint64                  // when we wrote the last event
 	pos       int                     // next write offset in arr
 	stk       [traceStackSize]uintptr // scratch buffer for traceback
@@ -194,13 +175,6 @@ func StartTrace() error {
 		return errorString("tracing is already enabled")
 	}
 
-	trace.seqStart, trace.ticksStart = tracestamp()
-	trace.timeStart = nanotime()
-	trace.headerWritten = false
-	trace.footerWritten = false
-	trace.strings = make(map[string]uint64)
-	trace.stringSeq = 0
-
 	// Can't set trace.enabled yet. While the world is stopped, exitsyscall could
 	// already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
 	// That would lead to an inconsistent trace:
@@ -213,12 +187,15 @@ func StartTrace() error {
 	for _, gp := range allgs {
 		status := readgstatus(gp)
 		if status != _Gdead {
-			traceGoCreate(gp, gp.startpc)
+			traceGoCreate(gp, gp.startpc) // also resets gp.traceseq/tracelastp
 		}
 		if status == _Gwaiting {
+			// traceEvGoWaiting is implied to have seq=1.
+			gp.traceseq++
 			traceEvent(traceEvGoWaiting, -1, uint64(gp.goid))
 		}
 		if status == _Gsyscall {
+			gp.traceseq++
 			traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
 		} else {
 			gp.sysblocktraced = false
@@ -226,6 +203,17 @@ func StartTrace() error {
 	}
 	traceProcStart()
 	traceGoStart()
+	// Note: ticksStart needs to be set after we emit traceEvGoInSyscall events.
+	// If we do it the other way around, it is possible that exitsyscall will
+	// query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp.
+	// It will lead to a false conclusion that cputicks is broken.
+	trace.ticksStart = cputicks()
+	trace.timeStart = nanotime()
+	trace.headerWritten = false
+	trace.footerWritten = false
+	trace.strings = make(map[string]uint64)
+	trace.stringSeq = 0
+	trace.seqGC = 0
 	_g_.m.startingtrace = false
 	trace.enabled = true
 
@@ -382,11 +370,9 @@ func ReadTrace() []byte {
 		var data []byte
 		data = append(data, traceEvFrequency|0<<traceArgCountShift)
 		data = traceAppend(data, uint64(freq))
-		data = traceAppend(data, 0)
 		if timers.gp != nil {
 			data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
 			data = traceAppend(data, uint64(timers.gp.goid))
-			data = traceAppend(data, 0)
 		}
 		// This will emit a bunch of full buffers, we will pick them up
 		// on the next iteration.
@@ -494,19 +480,14 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		(*bufp).set(buf)
 	}
 
-	seq, ticksraw := tracestamp()
-	seqDiff := seq - buf.lastSeq
-	ticks := uint64(ticksraw) / traceTickDiv
+	ticks := uint64(cputicks()) / traceTickDiv
 	tickDiff := ticks - buf.lastTicks
 	if buf.pos == 0 {
 		buf.byte(traceEvBatch | 1<<traceArgCountShift)
 		buf.varint(uint64(pid))
-		buf.varint(seq)
 		buf.varint(ticks)
-		seqDiff = 0
 		tickDiff = 0
 	}
-	buf.lastSeq = seq
 	buf.lastTicks = ticks
 	narg := byte(len(args))
 	if skip >= 0 {
@@ -525,7 +506,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		buf.varint(0)
 		lenp = &buf.arr[buf.pos-1]
 	}
-	buf.varint(seqDiff)
 	buf.varint(tickDiff)
 	for _, a := range args {
 		buf.varint(a)
@@ -892,7 +872,8 @@ func traceProcStop(pp *p) {
 }
 
 func traceGCStart() {
-	traceEvent(traceEvGCStart, 3)
+	traceEvent(traceEvGCStart, 3, trace.seqGC)
+	trace.seqGC++
 }
 
 func traceGCDone() {
@@ -916,13 +897,23 @@ func traceGCSweepDone() {
 }
 
 func traceGoCreate(newg *g, pc uintptr) {
+	newg.traceseq = 0
+	newg.tracelastp = getg().m.p
 	// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
 	id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum})
 	traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
 }
 
 func traceGoStart() {
-	traceEvent(traceEvGoStart, -1, uint64(getg().m.curg.goid))
+	_g_ := getg().m.curg
+	_p_ := _g_.m.p
+	_g_.traceseq++
+	if _g_.tracelastp == _p_ {
+		traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
+	} else {
+		_g_.tracelastp = _p_
+		traceEvent(traceEvGoStart, -1, uint64(_g_.goid), _g_.traceseq)
+	}
 }
 
 func traceGoEnd() {
@@ -930,10 +921,14 @@ func traceGoEnd() {
 }
 
 func traceGoSched() {
+	_g_ := getg()
+	_g_.tracelastp = _g_.m.p
 	traceEvent(traceEvGoSched, 1)
 }
 
 func traceGoPreempt() {
+	_g_ := getg()
+	_g_.tracelastp = _g_.m.p
 	traceEvent(traceEvGoPreempt, 1)
 }
 
@@ -945,19 +940,37 @@ func traceGoPark(traceEv byte, skip int, gp *g) {
 }
 
 func traceGoUnpark(gp *g, skip int) {
-	traceEvent(traceEvGoUnblock, skip, uint64(gp.goid))
+	_p_ := getg().m.p
+	gp.traceseq++
+	if gp.tracelastp == _p_ {
+		traceEvent(traceEvGoUnblockLocal, skip, uint64(gp.goid))
+	} else {
+		gp.tracelastp = _p_
+		traceEvent(traceEvGoUnblock, skip, uint64(gp.goid), gp.traceseq)
+	}
 }
 
 func traceGoSysCall() {
 	traceEvent(traceEvGoSysCall, 1)
 }
 
-func traceGoSysExit(seq uint64, ts int64) {
-	if int64(seq)-int64(trace.seqStart) < 0 {
-		// The timestamp was obtained during a previous tracing session, ignore.
-		return
-	}
-	traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
+func traceGoSysExit(ts int64) {
+	if ts != 0 && ts < trace.ticksStart {
+		// There is a race between the code that initializes sysexitticks
+		// (in exitsyscall, which runs without a P, and therefore is not
+		// stopped with the rest of the world) and the code that initializes
+		// a new trace. The recorded sysexitticks must therefore be treated
+		// as "best effort". If they are valid for this trace, then great,
+		// use them for greater accuracy. But if they're not valid for this
+		// trace, assume that the trace was started after the actual syscall
+		// exit (but before we actually managed to start the goroutine,
+		// aka right now), and assign a fresh time stamp to keep the log consistent.
+		ts = 0
+	}
+	_g_ := getg().m.curg
+	_g_.traceseq++
+	_g_.tracelastp = _g_.m.p
+	traceEvent(traceEvGoSysExit, -1, uint64(_g_.goid), _g_.traceseq, uint64(ts)/traceTickDiv)
 }
 
 func traceGoSysBlock(pp *p) {
-- 
cgit v1.3


From 75b844f0d228bda5dea2aabae096909f81355bac Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 7 Apr 2016 15:48:15 +0200
Subject: runtime/trace: test detection of broken timestamps

On some processors cputicks (used to generate trace timestamps)
produce non-monotonic timestamps. It is important that the parser
distinguishes logically inconsistent traces (e.g. missing, excessive
or misordered events) from broken timestamps. The former is a bug
in tracer, the latter is a machine issue.

Test that (1) parser does not return a logical error in case of
broken timestamps and (2) broken timestamps are eventually detected
and reported.

Change-Id: Ib4b1eb43ce128b268e754400ed8b5e8def04bd78
Reviewed-on: https://go-review.googlesource.com/21608
Reviewed-by: Austin Clements <austin@google.com>
---
 src/internal/trace/parser.go          | 15 +++++++++++
 src/runtime/trace/trace_stack_test.go |  5 +---
 src/runtime/trace/trace_test.go       | 48 +++++++++++++++++++++++++----------
 3 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'src/runtime')

diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index e6f29445c1..843d0eaf63 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -9,10 +9,12 @@ import (
 	"bytes"
 	"fmt"
 	"io"
+	"math/rand"
 	"os"
 	"os/exec"
 	"strconv"
 	"strings"
+	_ "unsafe"
 )
 
 // Event describes one event in the trace.
@@ -371,6 +373,16 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 		err = fmt.Errorf("no EvFrequency event")
 		return
 	}
+	if BreakTimestampsForTesting {
+		var batchArr [][]*Event
+		for _, batch := range batches {
+			batchArr = append(batchArr, batch)
+		}
+		for i := 0; i < 5; i++ {
+			batch := batchArr[rand.Intn(len(batchArr))]
+			batch[rand.Intn(len(batch))].Ts += int64(rand.Intn(2000) - 1000)
+		}
+	}
 	if ver < 1007 {
 		events, err = order1005(batches)
 	} else {
@@ -813,6 +825,9 @@ func argNum(raw rawEvent, ver int) int {
 	return narg
 }
 
+// BreakTimestampsForTesting causes the parser to randomly alter timestamps (for testing of broken cputicks).
+var BreakTimestampsForTesting bool
+
 // Event types in the trace.
 // Verbatim copy from src/runtime/trace.go.
 const (
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index c3fb0f6fee..52a71bfb94 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -125,10 +125,7 @@ func TestTraceSymbolize(t *testing.T) {
 	<-pipeReadDone
 
 	Stop()
-	events, _, err := parseTrace(t, buf)
-	if err != nil {
-		t.Fatalf("failed to parse trace: %v", err)
-	}
+	events, _ := parseTrace(t, buf)
 
 	// Now check that the stacks are correct.
 	type frame struct {
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index d10e928a66..5fad3fb7f0 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -61,13 +61,13 @@ func TestTrace(t *testing.T) {
 	}
 }
 
-func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) {
 	events, err := trace.Parse(r, "")
 	if err == trace.ErrTimeOrder {
 		t.Skipf("skipping trace: %v", err)
 	}
 	if err != nil {
-		return nil, nil, err
+		t.Fatalf("failed to parse trace: %v", err)
 	}
 	gs := trace.GoroutineStats(events)
 	for goid := range gs {
@@ -75,7 +75,31 @@ func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GD
 		// But still check that RelatedGoroutines does not crash, hang, etc.
 		_ = trace.RelatedGoroutines(events, goid)
 	}
-	return events, gs, nil
+	return events, gs
+}
+
+func testBrokenTimestamps(t *testing.T, data []byte) {
+	// On some processors cputicks (used to generate trace timestamps)
+	// produce non-monotonic timestamps. It is important that the parser
+	// distinguishes logically inconsistent traces (e.g. missing, excessive
+	// or misordered events) from broken timestamps. The former is a bug
+	// in tracer, the latter is a machine issue.
+	// So now that we have a consistent trace, test that (1) parser does
+	// not return a logical error in case of broken timestamps
+	// and (2) broken timestamps are eventually detected and reported.
+	trace.BreakTimestampsForTesting = true
+	defer func() {
+		trace.BreakTimestampsForTesting = false
+	}()
+	for i := 0; i < 1e4; i++ {
+		_, err := trace.Parse(bytes.NewReader(data), "")
+		if err == trace.ErrTimeOrder {
+			return
+		}
+		if err != nil {
+			t.Fatalf("failed to parse trace: %v", err)
+		}
+	}
 }
 
 func TestTraceStress(t *testing.T) {
@@ -209,10 +233,9 @@ func TestTraceStress(t *testing.T) {
 	runtime.GOMAXPROCS(procs)
 
 	Stop()
-	_, _, err = parseTrace(t, buf)
-	if err != nil {
-		t.Fatalf("failed to parse trace: %v", err)
-	}
+	trace := buf.Bytes()
+	parseTrace(t, buf)
+	testBrokenTimestamps(t, trace)
 }
 
 // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine.
@@ -353,9 +376,9 @@ func TestTraceStressStartStop(t *testing.T) {
 		}
 		time.Sleep(time.Millisecond)
 		Stop()
-		if _, _, err := parseTrace(t, buf); err != nil {
-			t.Fatalf("failed to parse trace: %v", err)
-		}
+		trace := buf.Bytes()
+		parseTrace(t, buf)
+		testBrokenTimestamps(t, trace)
 	}
 	<-outerDone
 }
@@ -413,10 +436,7 @@ func TestTraceFutileWakeup(t *testing.T) {
 	done.Wait()
 
 	Stop()
-	events, _, err := parseTrace(t, buf)
-	if err != nil {
-		t.Fatalf("failed to parse trace: %v", err)
-	}
+	events, _ := parseTrace(t, buf)
 	// Check that (1) trace does not contain EvFutileWakeup events and
 	// (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events
 	// (we call runtime.Gosched between all operations, so these would be futile wakeups).
-- 
cgit v1.3


From 6f3f02f80d23d3bbc2857be147341517d1762fbd Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Sun, 24 Apr 2016 17:04:32 -0700
Subject: runtime: zero tmpbuf between len and cap

Zero the entire buffer so we don't need to
lower its capacity upon return.  This lets callers
do some appending without allocation.

Zeroing is cheap, the byte buffer requires only
4 extra instructions.

Fixes #14235

Change-Id: I970d7badcef047dafac75ac17130030181f18fe2
Reviewed-on: https://go-review.googlesource.com/22424
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/string.go      |  6 ++++--
 src/runtime/string_test.go | 26 ++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/string.go b/src/runtime/string.go
index 112ce5d588..ef28ba9828 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -139,7 +139,8 @@ func slicebytetostringtmp(b []byte) string {
 func stringtoslicebyte(buf *tmpBuf, s string) []byte {
 	var b []byte
 	if buf != nil && len(s) <= len(buf) {
-		b = buf[:len(s):len(s)]
+		*buf = tmpBuf{}
+		b = buf[:len(s)]
 	} else {
 		b = rawbyteslice(len(s))
 	}
@@ -171,7 +172,8 @@ func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
 	}
 	var a []rune
 	if buf != nil && n <= len(buf) {
-		a = buf[:n:n]
+		*buf = [tmpStringBufSize]rune{}
+		a = buf[:n]
 	} else {
 		a = rawruneslice(n)
 	}
diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go
index ee9709e87d..0f1d82a481 100644
--- a/src/runtime/string_test.go
+++ b/src/runtime/string_test.go
@@ -238,17 +238,35 @@ func TestRangeStringCast(t *testing.T) {
 	}
 }
 
+func isZeroed(b []byte) bool {
+	for _, x := range b {
+		if x != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func isZeroedR(r []rune) bool {
+	for _, x := range r {
+		if x != 0 {
+			return false
+		}
+	}
+	return true
+}
+
 func TestString2Slice(t *testing.T) {
 	// Make sure we don't return slices that expose
 	// an unzeroed section of stack-allocated temp buf
 	// between len and cap. See issue 14232.
 	s := "foož"
 	b := ([]byte)(s)
-	if cap(b) != 5 {
-		t.Errorf("want cap of 5, got %d", cap(b))
+	if !isZeroed(b[len(b):cap(b)]) {
+		t.Errorf("extra bytes not zeroed")
 	}
 	r := ([]rune)(s)
-	if cap(r) != 4 {
-		t.Errorf("want cap of 4, got %d", cap(r))
+	if !isZeroedR(r[len(r):cap(r)]) {
+		t.Errorf("extra runes not zeroed")
 	}
 }
-- 
cgit v1.3


From 9cb79e9536a2f7977f9139a808f912d216094ecc Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Mon, 25 Apr 2016 14:12:26 -0700
Subject: runtime: arm5, fix large-offset floating-point stores

The code sequence for large-offset floating-point stores
includes adding the base pointer to r11.  Make sure we
can interpret that instruction correctly.

Fixes build.

Fixes #15440

Change-Id: I7fe5a4a57e08682967052bf77c54e0ec47fcb53e
Reviewed-on: https://go-review.googlesource.com/22440
Reviewed-by: Michael Hudson-Doyle <michael.hudson@canonical.com>
---
 src/runtime/softfloat_arm.go | 11 ++++++-----
 src/runtime/vlop_arm_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/softfloat_arm.go b/src/runtime/softfloat_arm.go
index b1f1a72925..648b2e1169 100644
--- a/src/runtime/softfloat_arm.go
+++ b/src/runtime/softfloat_arm.go
@@ -168,14 +168,15 @@ execute:
 		}
 		return 1
 	}
-	if i == 0xe08bb00d {
-		// add sp to r11.
-		// might be part of a large stack offset address
+	if i&0xfffffff0 == 0xe08bb000 {
+		r := i & 0xf
+		// add r to r11.
+		// might be part of a large offset address calculation
 		// (or might not, but again no harm done).
-		regs[11] += regs[13]
+		regs[11] += regs[r]
 
 		if fptrace > 0 {
-			print("*** cpu R[11] += R[13] ", hex(regs[11]), "\n")
+			print("*** cpu R[11] += R[", r, "] ", hex(regs[11]), "\n")
 		}
 		return 1
 	}
diff --git a/src/runtime/vlop_arm_test.go b/src/runtime/vlop_arm_test.go
index 1a211196f2..85cea923a9 100644
--- a/src/runtime/vlop_arm_test.go
+++ b/src/runtime/vlop_arm_test.go
@@ -82,3 +82,47 @@ func TestUsplit(t *testing.T) {
 		}
 	}
 }
+
+//go:noinline
+func armFloatWrite(a *[129]float64) {
+	// This used to miscompile on arm5.
+	// The offset is too big to fit in a load.
+	// So the code does:
+	//   ldr     r0, [sp, #8]
+	//   bl      6f690 <_sfloat>
+	//   ldr     fp, [pc, #32]   ; (address of 128.0)
+	//   vldr    d0, [fp]
+	//   ldr     fp, [pc, #28]   ; (1024)
+	//   add     fp, fp, r0
+	//   vstr    d0, [fp]
+	// The software floating-point emulator gives up on the add.
+	// This causes the store to not work.
+	// See issue 15440.
+	a[128] = 128.0
+}
+func TestArmFloatBigOffsetWrite(t *testing.T) {
+	var a [129]float64
+	for i := 0; i < 128; i++ {
+		a[i] = float64(i)
+	}
+	armFloatWrite(&a)
+	for i, x := range a {
+		if x != float64(i) {
+			t.Errorf("bad entry %d:%f\n", i, x)
+		}
+	}
+}
+
+//go:noinline
+func armFloatRead(a *[129]float64) float64 {
+	return a[128]
+}
+func TestArmFloatBigOffsetRead(t *testing.T) {
+	var a [129]float64
+	for i := 0; i < 129; i++ {
+		a[i] = float64(i)
+	}
+	if x := armFloatRead(&a); x != 128.0 {
+		t.Errorf("bad value %f\n", x)
+	}
+}
-- 
cgit v1.3


From 6b02a1924725688b4d264065454ac5287fbed535 Mon Sep 17 00:00:00 2001
From: Ilya Tocar <ilya.tocar@intel.com>
Date: Thu, 21 Apr 2016 18:24:12 +0300
Subject: strings: use SSE4.2 in strings.Index on AMD64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use PCMPESTRI instruction if available.

Index-4              21.1ns ± 0%  21.1ns ± 0%     ~     (all samples are equal)
IndexHard1-4          395µs ± 0%   105µs ± 0%  -73.53%        (p=0.000 n=19+20)
IndexHard2-4          300µs ± 0%   147µs ± 0%  -51.11%        (p=0.000 n=19+20)
IndexHard3-4          665µs ± 0%   665µs ± 0%     ~           (p=0.942 n=16+19)

Change-Id: I4f66794164740a2b939eb1c78934e2390b489064
Reviewed-on: https://go-review.googlesource.com/22337
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
---
 src/cmd/internal/obj/x86/a.out.go  |   2 +
 src/cmd/internal/obj/x86/anames.go |   1 +
 src/cmd/internal/obj/x86/asm6.go   |   1 +
 src/runtime/asm_amd64.s            | 157 +++++++++++++++++++++++--------------
 4 files changed, 103 insertions(+), 58 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go
index 64bd865e42..c41fd953e7 100644
--- a/src/cmd/internal/obj/x86/a.out.go
+++ b/src/cmd/internal/obj/x86/a.out.go
@@ -739,6 +739,8 @@ const (
 	AUNPCKLPS
 	AXORPD
 	AXORPS
+	APCMPESTRI
+
 	ARETFW
 	ARETFL
 	ARETFQ
diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go
index 3b59e2f36f..e3fef54e71 100644
--- a/src/cmd/internal/obj/x86/anames.go
+++ b/src/cmd/internal/obj/x86/anames.go
@@ -682,6 +682,7 @@ var Anames = []string{
 	"UNPCKLPS",
 	"XORPD",
 	"XORPS",
+	"PCMPESTRI",
 	"RETFW",
 	"RETFL",
 	"RETFQ",
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 57ef045b98..1c7fcf37be 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -1648,6 +1648,7 @@ var optab =
 	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
 	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
 	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
+	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
 
 	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
 	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 83db4d3e81..cdda29f347 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1666,122 +1666,126 @@ big_loop_avx2_exit:
 // TODO: Also use this in bytes.Index
 TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
 	MOVQ s+0(FP), DI
-	MOVQ s_len+8(FP), CX
-	MOVQ c+16(FP), AX
-	MOVQ c_len+24(FP), BX
-	CMPQ BX, CX
+	// We want len in DX and AX, because PCMPESTRI implicitly consumes them
+	MOVQ s_len+8(FP), DX
+	MOVQ c+16(FP), BP
+	MOVQ c_len+24(FP), AX
+	CMPQ AX, DX
 	JA fail
-	CMPQ BX, $2
+	CMPQ DX, $16
+	JAE sse42
+no_sse42:
+	CMPQ AX, $2
 	JA   _3_or_more
-	MOVW (AX), AX
-	LEAQ -1(DI)(CX*1), CX
+	MOVW (BP), BP
+	LEAQ -1(DI)(DX*1), DX
 loop2:
 	MOVW (DI), SI
-	CMPW SI,AX
+	CMPW SI,BP
 	JZ success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop2
 	JMP fail
 _3_or_more:
-	CMPQ BX, $3
+	CMPQ AX, $3
 	JA   _4_or_more
-	MOVW 1(AX), DX
-	MOVW (AX), AX
-	LEAQ -2(DI)(CX*1), CX
+	MOVW 1(BP), BX
+	MOVW (BP), BP
+	LEAQ -2(DI)(DX*1), DX
 loop3:
 	MOVW (DI), SI
-	CMPW SI,AX
+	CMPW SI,BP
 	JZ   partial_success3
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop3
 	JMP fail
 partial_success3:
 	MOVW 1(DI), SI
-	CMPW SI,DX
+	CMPW SI,BX
 	JZ success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop3
 	JMP fail
 _4_or_more:
-	CMPQ BX, $4
+	CMPQ AX, $4
 	JA   _5_or_more
-	MOVL (AX), AX
-	LEAQ -3(DI)(CX*1), CX
+	MOVL (BP), BP
+	LEAQ -3(DI)(DX*1), DX
 loop4:
 	MOVL (DI), SI
-	CMPL SI,AX
+	CMPL SI,BP
 	JZ   success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop4
 	JMP fail
 _5_or_more:
-	CMPQ BX, $7
+	CMPQ AX, $7
 	JA   _8_or_more
-	LEAQ 1(DI)(CX*1), CX
-	SUBQ BX, CX
-	MOVL -4(AX)(BX*1), DX
-	MOVL (AX), AX
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVL -4(BP)(AX*1), BX
+	MOVL (BP), BP
 loop5to7:
 	MOVL (DI), SI
-	CMPL SI,AX
+	CMPL SI,BP
 	JZ   partial_success5to7
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop5to7
 	JMP fail
 partial_success5to7:
-	MOVL -4(BX)(DI*1), SI
-	CMPL SI,DX
+	MOVL -4(AX)(DI*1), SI
+	CMPL SI,BX
 	JZ success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop5to7
 	JMP fail
 _8_or_more:
-	CMPQ BX, $8
+	CMPQ AX, $8
 	JA   _9_or_more
-	MOVQ (AX), AX
-	LEAQ -7(DI)(CX*1), CX
+	MOVQ (BP), BP
+	LEAQ -7(DI)(DX*1), DX
 loop8:
 	MOVQ (DI), SI
-	CMPQ SI,AX
+	CMPQ SI,BP
 	JZ   success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop8
 	JMP fail
 _9_or_more:
-	CMPQ BX, $16
+	CMPQ AX, $16
 	JA   _16_or_more
-	LEAQ 1(DI)(CX*1), CX
-	SUBQ BX, CX
-	MOVQ -8(AX)(BX*1), DX
-	MOVQ (AX), AX
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVQ -8(BP)(AX*1), BX
+	MOVQ (BP), BP
 loop9to15:
 	MOVQ (DI), SI
-	CMPQ SI,AX
+	CMPQ SI,BP
 	JZ   partial_success9to15
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop9to15
 	JMP fail
 partial_success9to15:
-	MOVQ -8(BX)(DI*1), SI
-	CMPQ SI,DX
+	MOVQ -8(AX)(DI*1), SI
+	CMPQ SI,BX
 	JZ success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop9to15
 	JMP fail
 _16_or_more:
-	CMPQ BX, $16
+	CMPQ AX, $17
 	JA   _17_to_31
-	MOVOU (AX), X1
-	LEAQ -15(DI)(CX*1), CX
+	MOVOU (BP), X1
+	LEAQ -15(DI)(DX*1), DX
 loop16:
 	MOVOU (DI), X2
 	PCMPEQB X1, X2
@@ -1789,14 +1793,14 @@ loop16:
 	CMPQ  SI, $0xffff
 	JE   success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop16
 	JMP fail
 _17_to_31:
-	LEAQ 1(DI)(CX*1), CX
-	SUBQ BX, CX
-	MOVOU -16(AX)(BX*1), X0
-	MOVOU (AX), X1
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVOU -16(BP)(AX*1), X0
+	MOVOU (BP), X1
 loop17to31:
 	MOVOU (DI), X2
 	PCMPEQB X1,X2
@@ -1804,21 +1808,58 @@ loop17to31:
 	CMPQ  SI, $0xffff
 	JE   partial_success17to31
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop17to31
 	JMP fail
 partial_success17to31:
-	MOVOU -16(BX)(DI*1), X3
+	MOVOU -16(AX)(DI*1), X3
 	PCMPEQB X0, X3
 	PMOVMSKB X3, SI
 	CMPQ  SI, $0xffff
 	JE success
 	ADDQ $1,DI
-	CMPQ DI,CX
+	CMPQ DI,DX
 	JB loop17to31
 fail:
 	MOVQ $-1, ret+32(FP)
 	RET
+sse42:
+	MOVL runtime·cpuid_ecx(SB), CX
+	ANDL $0x100000, CX
+	JZ no_sse42
+	CMPQ AX, $12
+	// PCMPESTRI is slower than normal compare,
+	// so using it makes sense only if we advance 4+ bytes per compare
+	// This value was determined experimentally and is the ~same
+	// on Nehalem (first with SSE42) and Haswell.
+	JAE _9_or_more
+	LEAQ 16(BP), SI
+	TESTW $0xff0, SI
+	JEQ no_sse42
+	MOVOU (BP), X1
+	LEAQ -15(DI)(DX*1), SI
+	MOVQ $16, R9
+	SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
+loop_sse42:
+	// 0x0c means: unsigned byte compare (bits 0,1 are 00)
+	// for equality (bits 2,3 are 11)
+	// result is not masked or inverted (bits 4,5 are 00)
+	// and corresponds to first matching byte (bit 6 is 0)
+	PCMPESTRI $0x0c, (DI), X1
+	// CX == 16 means no match,
+	// CX > R9 means partial match at the end of the string,
+	// otherwise sep is at offset CX from X1 start
+	CMPQ CX, R9
+	JBE sse42_success
+	ADDQ R9, DI
+	CMPQ DI, SI
+	JB loop_sse42
+	PCMPESTRI $0x0c, -1(SI), X1
+	CMPQ CX, R9
+	JA fail
+	LEAQ -1(SI), DI
+sse42_success:
+	ADDQ CX, DI
 success:
 	SUBQ s+0(FP), DI
 	MOVQ DI, ret+32(FP)
-- 
cgit v1.3


From 55154cf0b27e3c48e7cf7654c890868a95e7eed6 Mon Sep 17 00:00:00 2001
From: Michael Munday <munday@ca.ibm.com>
Date: Wed, 13 Apr 2016 13:34:41 -0400
Subject: cmd/link: fix gdb backtrace on architectures using a link register

Also adds TestGdbBacktrace to the runtime package.

Dwarf modifications written by Bryan Chan (@bryanpkc) who is also
at IBM and covered by the same CLA.

Fixes #14628

Change-Id: I106a1f704c3745a31f29cdadb0032e3905829850
Reviewed-on: https://go-review.googlesource.com/20193
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/link/internal/ld/dwarf.go |  58 ++++++++++++------
 src/runtime/runtime-gdb_test.go   | 122 +++++++++++++++++++++++++++++++++-----
 2 files changed, 145 insertions(+), 35 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go
index 0202df664a..bf1a7e74c1 100644
--- a/src/cmd/link/internal/ld/dwarf.go
+++ b/src/cmd/link/internal/ld/dwarf.go
@@ -1619,14 +1619,13 @@ func writelines(prev *LSym) *LSym {
  *  Emit .debug_frame
  */
 const (
-	CIERESERVE          = 16
-	DATAALIGNMENTFACTOR = -4
+	dataAlignmentFactor = -4
 )
 
 // appendPCDeltaCFA appends per-PC CFA deltas to b and returns the final slice.
 func appendPCDeltaCFA(b []byte, deltapc, cfa int64) []byte {
 	b = append(b, DW_CFA_def_cfa_offset_sf)
-	b = appendSleb128(b, cfa/DATAALIGNMENTFACTOR)
+	b = appendSleb128(b, cfa/dataAlignmentFactor)
 
 	switch {
 	case deltapc < 0x40:
@@ -1654,36 +1653,42 @@ func writeframes(prev *LSym) *LSym {
 	prev.Next = fs
 
 	// Emit the CIE, Section 6.4.1
-	Adduint32(Ctxt, fs, CIERESERVE)           // initial length, must be multiple of thearch.ptrsize
+	cieReserve := uint32(16)
+	if haslinkregister() {
+		cieReserve = 32
+	}
+	Adduint32(Ctxt, fs, cieReserve)           // initial length, must be multiple of pointer size
 	Adduint32(Ctxt, fs, 0xffffffff)           // cid.
 	Adduint8(Ctxt, fs, 3)                     // dwarf version (appendix F)
 	Adduint8(Ctxt, fs, 0)                     // augmentation ""
 	uleb128put(fs, 1)                         // code_alignment_factor
-	sleb128put(fs, DATAALIGNMENTFACTOR)       // guess
+	sleb128put(fs, dataAlignmentFactor)       // all CFI offset calculations include multiplication with this factor
 	uleb128put(fs, int64(Thearch.Dwarfreglr)) // return_address_register
 
-	Adduint8(Ctxt, fs, DW_CFA_def_cfa)
-
-	uleb128put(fs, int64(Thearch.Dwarfregsp)) // register SP (**ABI-dependent, defined in l.h)
+	Adduint8(Ctxt, fs, DW_CFA_def_cfa)        // Set the current frame address..
+	uleb128put(fs, int64(Thearch.Dwarfregsp)) // ...to use the value in the platform's SP register (defined in l.go)...
 	if haslinkregister() {
-		uleb128put(fs, int64(0)) // offset
-	} else {
-		uleb128put(fs, int64(SysArch.PtrSize)) // offset
-	}
+		uleb128put(fs, int64(0)) // ...plus a 0 offset.
 
-	Adduint8(Ctxt, fs, DW_CFA_offset_extended)
-	uleb128put(fs, int64(Thearch.Dwarfreglr)) // return address
-	if haslinkregister() {
-		uleb128put(fs, int64(0)/DATAALIGNMENTFACTOR) // at cfa - 0
+		Adduint8(Ctxt, fs, DW_CFA_same_value) // The platform's link register is unchanged during the prologue.
+		uleb128put(fs, int64(Thearch.Dwarfreglr))
+
+		Adduint8(Ctxt, fs, DW_CFA_val_offset)     // The previous value...
+		uleb128put(fs, int64(Thearch.Dwarfregsp)) // ...of the platform's SP register...
+		uleb128put(fs, int64(0))                  // ...is CFA+0.
 	} else {
-		uleb128put(fs, int64(-SysArch.PtrSize)/DATAALIGNMENTFACTOR) // at cfa - x*4
+		uleb128put(fs, int64(SysArch.PtrSize)) // ...plus the word size (because the call instruction implicitly adds one word to the frame).
+
+		Adduint8(Ctxt, fs, DW_CFA_offset_extended)                  // The previous value...
+		uleb128put(fs, int64(Thearch.Dwarfreglr))                   // ...of the return address...
+		uleb128put(fs, int64(-SysArch.PtrSize)/dataAlignmentFactor) // ...is saved at [CFA - (PtrSize/4)].
 	}
 
 	// 4 is to exclude the length field.
-	pad := CIERESERVE + 4 - fs.Size
+	pad := int64(cieReserve) + 4 - fs.Size
 
 	if pad < 0 {
-		Exitf("dwarf: CIERESERVE too small by %d bytes.", -pad)
+		Exitf("dwarf: cieReserve too small by %d bytes.", -pad)
 	}
 
 	Addbytes(Ctxt, fs, zeros[:pad])
@@ -1712,6 +1717,21 @@ func writeframes(prev *LSym) *LSym {
 			}
 
 			if haslinkregister() {
+				// TODO(bryanpkc): This is imprecise. In general, the instruction
+				// that stores the return address to the stack frame is not the
+				// same one that allocates the frame.
+				if pcsp.value > 0 {
+					// The return address is preserved at (CFA-frame_size)
+					// after a stack frame has been allocated.
+					deltaBuf = append(deltaBuf, DW_CFA_offset_extended_sf)
+					deltaBuf = appendUleb128(deltaBuf, uint64(Thearch.Dwarfreglr))
+					deltaBuf = appendSleb128(deltaBuf, -int64(pcsp.value)/dataAlignmentFactor)
+				} else {
+					// The return address is restored into the link register
+					// when a stack frame has been de-allocated.
+					deltaBuf = append(deltaBuf, DW_CFA_same_value)
+					deltaBuf = appendUleb128(deltaBuf, uint64(Thearch.Dwarfreglr))
+				}
 				deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(pcsp.value))
 			} else {
 				deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(SysArch.PtrSize)+int64(pcsp.value))
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index 7cfcefc2c2..4f82646dbb 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -3,6 +3,7 @@ package runtime_test
 import (
 	"bytes"
 	"fmt"
+	"internal/testenv"
 	"io/ioutil"
 	"os"
 	"os/exec"
@@ -13,19 +14,22 @@ import (
 	"testing"
 )
 
-func checkGdbPython(t *testing.T) {
-	cmd := exec.Command("gdb", "-nx", "-q", "--batch", "-iex", "python import sys; print('go gdb python support')")
-	out, err := cmd.CombinedOutput()
-
-	if err != nil {
-		t.Skipf("skipping due to issue running gdb: %v", err)
+func checkGdbEnvironment(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+	if runtime.GOOS == "darwin" {
+		t.Skip("gdb does not work on darwin")
 	}
-	if string(out) != "go gdb python support\n" {
-		t.Skipf("skipping due to lack of python gdb support: %s", out)
+	if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final {
+		t.Skip("gdb test can fail with GOROOT_FINAL pending")
 	}
+}
 
+func checkGdbVersion(t *testing.T) {
 	// Issue 11214 reports various failures with older versions of gdb.
-	out, err = exec.Command("gdb", "--version").CombinedOutput()
+	out, err := exec.Command("gdb", "--version").CombinedOutput()
+	if err != nil {
+		t.Skipf("skipping: error executing gdb: %v", err)
+	}
 	re := regexp.MustCompile(`([0-9]+)\.([0-9]+)`)
 	matches := re.FindSubmatch(out)
 	if len(matches) < 3 {
@@ -42,6 +46,18 @@ func checkGdbPython(t *testing.T) {
 	t.Logf("gdb version %d.%d", major, minor)
 }
 
+func checkGdbPython(t *testing.T) {
+	cmd := exec.Command("gdb", "-nx", "-q", "--batch", "-iex", "python import sys; print('go gdb python support')")
+	out, err := cmd.CombinedOutput()
+
+	if err != nil {
+		t.Skipf("skipping due to issue running gdb: %v", err)
+	}
+	if string(out) != "go gdb python support\n" {
+		t.Skipf("skipping due to lack of python gdb support: %s", out)
+	}
+}
+
 const helloSource = `
 package main
 import "fmt"
@@ -57,13 +73,8 @@ func main() {
 `
 
 func TestGdbPython(t *testing.T) {
-	if runtime.GOOS == "darwin" {
-		t.Skip("gdb does not work on darwin")
-	}
-	if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final {
-		t.Skip("gdb test can fail with GOROOT_FINAL pending")
-	}
-
+	checkGdbEnvironment(t)
+	checkGdbVersion(t)
 	checkGdbPython(t)
 
 	dir, err := ioutil.TempDir("", "go-build")
@@ -162,3 +173,82 @@ func TestGdbPython(t *testing.T) {
 		t.Logf("gdb cannot backtrace for GOARCH=%s, skipped goroutine backtrace test", runtime.GOARCH)
 	}
 }
+
+const backtraceSource = `
+package main
+
+//go:noinline
+func aaa() bool { return bbb() }
+
+//go:noinline
+func bbb() bool { return ccc() }
+
+//go:noinline
+func ccc() bool { return ddd() }
+
+//go:noinline
+func ddd() bool { return f() }
+
+//go:noinline
+func eee() bool { return true }
+
+var f = eee
+
+func main() {
+	_ = aaa()
+}
+`
+
+// TestGdbBacktrace tests that gdb can unwind the stack correctly
+// using only the DWARF debug info.
+func TestGdbBacktrace(t *testing.T) {
+	checkGdbEnvironment(t)
+	checkGdbVersion(t)
+
+	dir, err := ioutil.TempDir("", "go-build")
+	if err != nil {
+		t.Fatalf("failed to create temp directory: %v", err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Build the source code.
+	src := filepath.Join(dir, "main.go")
+	err = ioutil.WriteFile(src, []byte(backtraceSource), 0644)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+	cmd := exec.Command("go", "build", "-o", "a.exe")
+	cmd.Dir = dir
+	out, err := testEnv(cmd).CombinedOutput()
+	if err != nil {
+		t.Fatalf("building source %v\n%s", err, out)
+	}
+
+	// Execute gdb commands.
+	args := []string{"-nx", "-batch",
+		"-ex", "break main.eee",
+		"-ex", "run",
+		"-ex", "backtrace",
+		"-ex", "continue",
+		filepath.Join(dir, "a.exe"),
+	}
+	got, _ := exec.Command("gdb", args...).CombinedOutput()
+
+	// Check that the backtrace matches the source code.
+	bt := []string{
+		"eee",
+		"ddd",
+		"ccc",
+		"bbb",
+		"aaa",
+		"main",
+	}
+	for i, name := range bt {
+		s := fmt.Sprintf("#%v.*main\\.%v", i, name)
+		re := regexp.MustCompile(s)
+		if found := re.Find(got) != nil; !found {
+			t.Errorf("could not find '%v' in backtrace", s)
+			t.Fatalf("gdb output:\n%v", string(got))
+		}
+	}
+}
-- 
cgit v1.3


From 1a2cf91f5e9e3dfb0873e61ed6907cc365857f6c Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 16:27:51 -0500
Subject: runtime: split gfree list into with-stacks and without-stacks

Currently all free Gs are added to one list. Split this into two
lists: one for free Gs with cached stacks and one for Gs without
cached stacks.

This lets us preferentially allocate Gs that already have a stack, but
more importantly, it sets us up to free cached G stacks concurrently.

Change-Id: Idbe486f708997e1c9d166662995283f02d1eeb3c
Reviewed-on: https://go-review.googlesource.com/20664
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go     | 34 ++++++++++++++++++++++++++--------
 src/runtime/runtime2.go |  7 ++++---
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index a847823da4..9c840882b6 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2798,8 +2798,13 @@ func gfput(_p_ *p, gp *g) {
 			_p_.gfreecnt--
 			gp = _p_.gfree
 			_p_.gfree = gp.schedlink.ptr()
-			gp.schedlink.set(sched.gfree)
-			sched.gfree = gp
+			if gp.stack.lo == 0 {
+				gp.schedlink.set(sched.gfreeNoStack)
+				sched.gfreeNoStack = gp
+			} else {
+				gp.schedlink.set(sched.gfreeStack)
+				sched.gfreeStack = gp
+			}
 			sched.ngfree++
 		}
 		unlock(&sched.gflock)
@@ -2811,12 +2816,20 @@ func gfput(_p_ *p, gp *g) {
 func gfget(_p_ *p) *g {
 retry:
 	gp := _p_.gfree
-	if gp == nil && sched.gfree != nil {
+	if gp == nil && (sched.gfreeStack != nil || sched.gfreeNoStack != nil) {
 		lock(&sched.gflock)
-		for _p_.gfreecnt < 32 && sched.gfree != nil {
+		for _p_.gfreecnt < 32 {
+			if sched.gfreeStack != nil {
+				// Prefer Gs with stacks.
+				gp = sched.gfreeStack
+				sched.gfreeStack = gp.schedlink.ptr()
+			} else if sched.gfreeNoStack != nil {
+				gp = sched.gfreeNoStack
+				sched.gfreeNoStack = gp.schedlink.ptr()
+			} else {
+				break
+			}
 			_p_.gfreecnt++
-			gp = sched.gfree
-			sched.gfree = gp.schedlink.ptr()
 			sched.ngfree--
 			gp.schedlink.set(_p_.gfree)
 			_p_.gfree = gp
@@ -2853,8 +2866,13 @@ func gfpurge(_p_ *p) {
 		_p_.gfreecnt--
 		gp := _p_.gfree
 		_p_.gfree = gp.schedlink.ptr()
-		gp.schedlink.set(sched.gfree)
-		sched.gfree = gp
+		if gp.stack.lo == 0 {
+			gp.schedlink.set(sched.gfreeNoStack)
+			sched.gfreeNoStack = gp
+		} else {
+			gp.schedlink.set(sched.gfreeStack)
+			sched.gfreeStack = gp
+		}
 		sched.ngfree++
 	}
 	unlock(&sched.gflock)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 8cfe6b06e6..0a988ce469 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -523,9 +523,10 @@ type schedt struct {
 	runqsize int32
 
 	// Global cache of dead G's.
-	gflock mutex
-	gfree  *g
-	ngfree int32
+	gflock       mutex
+	gfreeStack   *g
+	gfreeNoStack *g
+	ngfree       int32
 
 	// Central cache of sudog structs.
 	sudoglock  mutex
-- 
cgit v1.3


From e8337491aa6b6a5f96f31077764352549dc34159 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 17:00:46 -0500
Subject: runtime: free dead G stacks concurrently

Currently we free cached stacks of dead Gs during STW stack root
marking. We do this during STW because there's no way to take
ownership of a particular dead G, so attempting to free a dead G's
stack during concurrent stack root marking could race with reusing
that G.

However, we can do this concurrently if we take a completely different
approach. One way to prevent reuse of a dead G is to remove it from
the free G list. Hence, this adds a new fixed root marking task that
simply removes all Gs from the list of dead Gs with cached stacks,
frees their stacks, and then adds them to the list of dead Gs without
cached stacks.

This is also a necessary step toward rescanning only dirty stacks,
since it eliminates another task from STW stack marking.

Change-Id: Iefbad03078b284a2e7bf30fba397da4ca87fe095
Reviewed-on: https://go-review.googlesource.com/20665
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgcmark.go | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index ad64b735a5..3c6aec943b 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -15,6 +15,7 @@ import (
 const (
 	fixedRootFinalizers = iota
 	fixedRootFlushCaches
+	fixedRootFreeGStacks
 	fixedRootCount
 
 	// rootBlockBytes is the number of bytes to scan per data or
@@ -126,6 +127,13 @@ func markroot(gcw *gcWork, i uint32) {
 			flushallmcaches()
 		}
 
+	case i == fixedRootFreeGStacks:
+		// Only do this once per GC cycle; preferably
+		// concurrently.
+		if !work.markrootDone {
+			markrootFreeGStacks()
+		}
+
 	case baseSpans <= i && i < baseStacks:
 		// mark MSpan.specials
 		markrootSpans(gcw, int(i-baseSpans))
@@ -144,13 +152,6 @@ func markroot(gcw *gcWork, i uint32) {
 			gp.waitsince = work.tstart
 		}
 
-		if gcphase == _GCmarktermination && status == _Gdead {
-			// Free gp's stack if necessary. Only do this
-			// during mark termination because otherwise
-			// _Gdead may be transient.
-			shrinkstack(gp)
-		}
-
 		if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
 			// GC background workers may be
 			// non-preemptible, so we may deadlock if we
@@ -215,6 +216,36 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
 	scanblock(b, n, ptrmask, gcw)
 }
 
+// markrootFreeGStacks frees stacks of dead Gs.
+//
+// This does not free stacks of dead Gs cached on Ps, but having a few
+// cached stacks around isn't a problem.
+//
+//TODO go:nowritebarrier
+func markrootFreeGStacks() {
+	// Take list of dead Gs with stacks.
+	lock(&sched.gflock)
+	list := sched.gfreeStack
+	sched.gfreeStack = nil
+	unlock(&sched.gflock)
+	if list == nil {
+		return
+	}
+
+	// Free stacks.
+	tail := list
+	for gp := list; gp != nil; gp = gp.schedlink.ptr() {
+		shrinkstack(gp)
+		tail = gp
+	}
+
+	// Put Gs back on the free list.
+	lock(&sched.gflock)
+	tail.schedlink.set(sched.gfreeNoStack)
+	sched.gfreeNoStack = list
+	unlock(&sched.gflock)
+}
+
 // markrootSpans marks roots for one shard of work.spans.
 //
 //go:nowritebarrier
-- 
cgit v1.3


From efb0c55407cdbef6aa5471f057b8afd1d0303369 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 13:54:55 -0500
Subject: runtime: avoid span root marking entirely during mark termination

Currently we enqueue span root mark jobs during both concurrent mark
and mark termination, but we make the job a no-op during mark
termination.

This is silly. Instead of queueing them up just to not do them, don't
queue them up in the first place.

Change-Id: Ie1d36de884abfb17dd0db6f0449a2b7c997affab
Reviewed-on: https://go-review.googlesource.com/20666
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgcmark.go | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 3c6aec943b..8384190407 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -55,8 +55,19 @@ func gcMarkRootPrepare() {
 		}
 	}
 
-	// Compute number of span roots.
-	work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+	if !work.markrootDone {
+		// On the first markroot, we need to scan span roots.
+		// In concurrent GC, this happens during concurrent
+		// mark and we depend on addfinalizer to ensure the
+		// above invariants for objects that get finalizers
+		// after concurrent mark. In STW GC, this will happen
+		// during mark termination.
+		work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+	} else {
+		// We've already scanned span roots and kept the scan
+		// up-to-date during concurrent mark.
+		work.nSpanRoots = 0
+	}
 
 	// Snapshot of allglen. During concurrent scan, we just need
 	// to be consistent about how many markroot jobs we create and
@@ -263,14 +274,8 @@ func markrootSpans(gcw *gcWork, shard int) {
 	// TODO(austin): There are several ideas for making this more
 	// efficient in issue #11485.
 
-	// We process objects with finalizers only during the first
-	// markroot pass. In concurrent GC, this happens during
-	// concurrent mark and we depend on addfinalizer to ensure the
-	// above invariants for objects that get finalizers after
-	// concurrent mark. In STW GC, this will happen during mark
-	// termination.
 	if work.markrootDone {
-		return
+		throw("markrootSpans during second markroot")
 	}
 
 	sg := mheap_.sweepgen
-- 
cgit v1.3


From 269c969c81774d1579e80a8c35edbd0ebea065a7 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 2 Mar 2016 14:52:08 -0500
Subject: runtime: remove stack barriers during concurrent mark

Currently we remove stack barriers during STW mark termination, which
has a non-trivial per-goroutine cost and means that we have to touch
even clean stacks during mark termination. However, there's no problem
with leaving them in during the sweep phase. They just have to be out
by the time we install new stack barriers immediately prior to
scanning the stack such as during the mark phase of the next GC cycle
or during mark termination in a STW GC.

Hence, move the gcRemoveStackBarriers from STW mark termination to
just before we install new stack barriers during concurrent mark. This
removes the cost from STW. Furthermore, this combined with concurrent
stack shrinking means that the mark termination scan of a clean stack
is a complete no-op, which will make it possible to skip clean stacks
entirely during mark termination.

This has the downside that it will mess up anything outside of Go that
tries to walk Go stacks all the time instead of just some of the time.
This includes tools like GDB, perf, and VTune. We'll improve the
situation shortly.

Change-Id: Ia40baad8f8c16aeefac05425e00b0cf478137097
Reviewed-on: https://go-review.googlesource.com/20667
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgcmark.go | 28 ++++++++++++----------------
 src/runtime/mstkbar.go |  7 +++----
 2 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 8384190407..bad7c7e92b 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -602,9 +602,6 @@ func gcFlushBgCredit(scanWork int64) {
 //go:nowritebarrier
 func scanstack(gp *g) {
 	if gp.gcscanvalid {
-		if gcphase == _GCmarktermination {
-			gcRemoveStackBarriers(gp)
-		}
 		return
 	}
 
@@ -647,6 +644,7 @@ func scanstack(gp *g) {
 	} else {
 		sp = gp.sched.sp
 	}
+	gcLockStackBarriers(gp) // Not necessary during mark term, but harmless.
 	switch gcphase {
 	case _GCmark:
 		// Install stack barriers during stack scan.
@@ -657,16 +655,18 @@ func scanstack(gp *g) {
 			nextBarrier = ^uintptr(0)
 		}
 
-		if gp.stkbarPos != 0 || len(gp.stkbar) != 0 {
-			// If this happens, it's probably because we
-			// scanned a stack twice in the same phase.
-			print("stkbarPos=", gp.stkbarPos, " len(stkbar)=", len(gp.stkbar), " goid=", gp.goid, " gcphase=", gcphase, "\n")
-			throw("g already has stack barriers")
-		}
-
-		gcLockStackBarriers(gp)
+		// Remove any existing stack barriers before we
+		// install new ones.
+		gcRemoveStackBarriers(gp)
 
 	case _GCmarktermination:
+		if !work.markrootDone {
+			// This is a STW GC. There may be stale stack
+			// barriers from an earlier cycle since we
+			// never passed through mark phase.
+			gcRemoveStackBarriers(gp)
+		}
+
 		if int(gp.stkbarPos) == len(gp.stkbar) {
 			// gp hit all of the stack barriers (or there
 			// were none). Re-scan the whole stack.
@@ -683,8 +683,6 @@ func scanstack(gp *g) {
 			}
 		}
 
-		gcRemoveStackBarriers(gp)
-
 	default:
 		throw("scanstack in wrong phase")
 	}
@@ -722,9 +720,7 @@ func scanstack(gp *g) {
 	if gcphase == _GCmarktermination {
 		gcw.dispose()
 	}
-	if gcphase == _GCmark {
-		gcUnlockStackBarriers(gp)
-	}
+	gcUnlockStackBarriers(gp)
 	gp.gcscanvalid = true
 }
 
diff --git a/src/runtime/mstkbar.go b/src/runtime/mstkbar.go
index 016625ae92..f320c351d0 100644
--- a/src/runtime/mstkbar.go
+++ b/src/runtime/mstkbar.go
@@ -214,14 +214,15 @@ func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
 }
 
 // gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
+//
+// gp's stack barriers must be locked.
+//
 //go:nowritebarrier
 func gcRemoveStackBarriers(gp *g) {
 	if debugStackBarrier && gp.stkbarPos != 0 {
 		print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
 	}
 
-	gcLockStackBarriers(gp)
-
 	// Remove stack barriers that we didn't hit.
 	for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
 		gcRemoveStackBarrier(gp, stkbar)
@@ -231,8 +232,6 @@ func gcRemoveStackBarriers(gp *g) {
 	// adjust them.
 	gp.stkbarPos = 0
 	gp.stkbar = gp.stkbar[:0]
-
-	gcUnlockStackBarriers(gp)
 }
 
 // gcRemoveStackBarrier removes a single stack barrier. It is the
-- 
cgit v1.3


From 9f263c14edccb564b675ed6c4f12260f333505d5 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 2 Mar 2016 17:55:45 -0500
Subject: runtime: remove stack barriers during sweep

This adds a best-effort pass to remove stack barriers immediately
after the end of mark termination. This isn't necessary for the Go
runtime, but should help external tools that perform stack walks but
aren't aware of Go's stack barriers such as GDB, perf, and VTune.
(Though clearly they'll still have trouble unwinding stacks during
mark.)

Change-Id: I66600fae1f03ee36b5459d2b00dcc376269af18e
Reviewed-on: https://go-review.googlesource.com/20668
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgc.go     |  7 +++++++
 src/runtime/mstkbar.go | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index bc157cddbb..425ed3a160 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1266,6 +1266,13 @@ func gcMarkTermination() {
 	// Free stack spans. This must be done between GC cycles.
 	systemstack(freeStackSpans)
 
+	// Best-effort remove stack barriers so they don't get in the
+	// way of things like GDB and perf.
+	lock(&allglock)
+	myallgs := allgs
+	unlock(&allglock)
+	gcTryRemoveAllStackBarriers(myallgs)
+
 	// Print gctrace before dropping worldsema. As soon as we drop
 	// worldsema another cycle could start and smash the stats
 	// we're trying to print.
diff --git a/src/runtime/mstkbar.go b/src/runtime/mstkbar.go
index f320c351d0..1bf9d573b7 100644
--- a/src/runtime/mstkbar.go
+++ b/src/runtime/mstkbar.go
@@ -257,6 +257,31 @@ func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
 	*lrPtr = sys.Uintreg(stkbar.savedLRVal)
 }
 
+// gcTryRemoveAllStackBarriers tries to remove stack barriers from all
+// Gs in gps. It is best-effort and efficient. If it can't remove
+// barriers from a G immediately, it will simply skip it.
+func gcTryRemoveAllStackBarriers(gps []*g) {
+	for _, gp := range gps {
+	retry:
+		for {
+			switch s := readgstatus(gp); s {
+			default:
+				break retry
+
+			case _Grunnable, _Gsyscall, _Gwaiting:
+				if !castogscanstatus(gp, s, s|_Gscan) {
+					continue
+				}
+				gcLockStackBarriers(gp)
+				gcRemoveStackBarriers(gp)
+				gcUnlockStackBarriers(gp)
+				restartg(gp)
+				break retry
+			}
+		}
+	}
+}
+
 // gcPrintStkbars prints the stack barriers of gp for debugging. It
 // places a "@@@" marker at gp.stkbarPos. If marker >= 0, it will also
 // place a "==>" marker before the marker'th entry.
-- 
cgit v1.3


From c707d8385639dfda22dc06b112f5f7af78006a1f Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 18 Apr 2016 18:28:36 -0400
Subject: runtime: fix typos in comment about gcscanvalid

Change-Id: Id4ad7ebf88a21eba2bc5714b96570ed5cfaed757
Reviewed-on: https://go-review.googlesource.com/22210
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 9c840882b6..d5acbee0a7 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -643,17 +643,17 @@ func readgstatus(gp *g) uint32 {
 	return atomic.Load(&gp.atomicstatus)
 }
 
-// Ownership of gscanvalid:
+// Ownership of gcscanvalid:
 //
 // If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
-// then gp owns gp.gscanvalid, and other goroutines must not modify it.
+// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
 //
 // Otherwise, a second goroutine can lock the scan state by setting _Gscan
-// in the status bit and then modify gscanvalid, and then unlock the scan state.
+// in the status bit and then modify gcscanvalid, and then unlock the scan state.
 //
 // Note that the first condition implies an exception to the second:
 // if a second goroutine changes gp's status to _Grunning|_Gscan,
-// that second goroutine still does not have the right to modify gscanvalid.
+// that second goroutine still does not have the right to modify gcscanvalid.
 
 // The Gscanstatuses are acting like locks and this releases them.
 // If it proves to be a performance hit we should be able to make these
-- 
cgit v1.3


From 5b765ce310c594276ea919a9cb455cc894fee999 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 14:08:10 -0500
Subject: runtime: don't clear gcscanvalid in casfrom_Gscanstatus

Currently we clear gcscanvalid in both casgstatus and
casfrom_Gscanstatus if the new status is _Grunning. This is very
important to do in casgstatus. However, this is potentially wrong in
casfrom_Gscanstatus because in this case the caller doesn't own gp and
hence the write is racy. Unlike the other _Gscan statuses, during
_Gscanrunning, the G is still running. This does not indicate that
it's transitioning into a running state. The scan simply hasn't
happened yet, so it's neither valid nor invalid.

Conveniently, this also means clearing gcscanvalid is unnecessary in
this case because the G was already in _Grunning, so we can simply
remove this code. What will happen instead is that the G will be
preempted to scan itself, that scan will set gcscanvalid to true, and
then the G will return to _Grunning via casgstatus, clearing
gcscanvalid.

This fix will become necessary shortly when we start keeping track of
the set of G's with dirty stacks, since it will no longer be
idempotent to simply set gcscanvalid to false.

Change-Id: I688c82e6fbf00d5dbbbff49efa66acb99ee86785
Reviewed-on: https://go-review.googlesource.com/20669
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go  | 3 ---
 src/runtime/stack.go | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index d5acbee0a7..dcdc7bedb8 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -681,9 +681,6 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
 		dumpgstatus(gp)
 		throw("casfrom_Gscanstatus: gp->status is not in scan state")
 	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
-	}
 }
 
 // This will return false if the gp is not in the expected status and the cas fails.
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index dcb1b06dbd..c4b1fb862e 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -1016,6 +1016,7 @@ func newstack() {
 			gp.preemptscan = false
 			gp.preempt = false
 			casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
+			// This clears gcscanvalid.
 			casgstatus(gp, _Gwaiting, _Grunning)
 			gp.stackguard0 = gp.stack.lo + _StackGuard
 			gogo(&gp.sched) // never return
-- 
cgit v1.3


From 2a889b9d931e58166350f785b16edc51e28ef19b Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 4 Mar 2016 11:58:26 -0500
Subject: runtime: make stack re-scan O(# dirty stacks)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the stack re-scan during mark termination is O(# stacks)
because we enqueue a root marking job for every goroutine. It takes
~34ns to process this root marking job for a valid (clean) stack, so
at around 300k goroutines we exceed the 10ms pause goal. A non-trivial
portion of this time is spent simply taking the cache miss to check
the gcscanvalid flag, so simply optimizing the path that handles clean
stacks can only improve this so much.

Fix this by keeping an explicit list of goroutines with dirty stacks
that need to be rescanned. When a goroutine first transitions to
running after a stack scan and marks its stack dirty, it adds itself
to this list. We enqueue root marking jobs only for the goroutines in
this list, so this improves stack re-scanning asymptotically by
completely eliminating time spent on clean goroutines.

This reduces mark termination time for 500k idle goroutines from 15ms
to 238µs. Overall performance effect is negligible.

name \ 95%ile-time/markTerm     old           new         delta
IdleGs/gs:500000/gomaxprocs:12  15000µs ± 0%  238µs ± 5%  -98.41% (p=0.000 n=10+10)

name              old time/op  new time/op  delta
XBenchGarbage-12  2.30ms ± 3%  2.29ms ± 1%  -0.43%  (p=0.049 n=17+18)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.57s ± 3%     2.59s ± 2%    ~     (p=0.141 n=19+20)
Fannkuch11-12                2.09s ± 0%     2.10s ± 1%  +0.53%  (p=0.000 n=19+19)
FmtFprintfEmpty-12          45.3ns ± 3%    45.2ns ± 2%    ~     (p=0.845 n=20+20)
FmtFprintfString-12          129ns ± 0%     127ns ± 0%  -1.55%  (p=0.000 n=16+16)
FmtFprintfInt-12             123ns ± 0%     119ns ± 1%  -3.24%  (p=0.000 n=19+19)
FmtFprintfIntInt-12          195ns ± 1%     189ns ± 1%  -3.11%  (p=0.000 n=17+17)
FmtFprintfPrefixedInt-12     193ns ± 1%     187ns ± 1%  -3.06%  (p=0.000 n=19+19)
FmtFprintfFloat-12           254ns ± 0%     255ns ± 1%  +0.35%  (p=0.001 n=14+17)
FmtManyArgs-12               781ns ± 0%     770ns ± 0%  -1.48%  (p=0.000 n=16+19)
GobDecode-12                7.00ms ± 1%    6.98ms ± 1%    ~     (p=0.563 n=19+19)
GobEncode-12                5.91ms ± 1%    5.92ms ± 0%    ~     (p=0.118 n=19+18)
Gzip-12                      219ms ± 1%     215ms ± 1%  -1.81%  (p=0.000 n=18+18)
Gunzip-12                   37.2ms ± 0%    37.4ms ± 0%  +0.45%  (p=0.000 n=17+19)
HTTPClientServer-12         76.9µs ± 3%    77.5µs ± 2%  +0.81%  (p=0.030 n=20+19)
JSONEncode-12               15.0ms ± 0%    14.8ms ± 1%  -0.88%  (p=0.001 n=15+19)
JSONDecode-12               50.6ms ± 0%    53.2ms ± 2%  +5.07%  (p=0.000 n=17+19)
Mandelbrot200-12            4.05ms ± 0%    4.05ms ± 1%    ~     (p=0.581 n=16+17)
GoParse-12                  3.34ms ± 1%    3.30ms ± 1%  -1.21%  (p=0.000 n=15+20)
RegexpMatchEasy0_32-12      69.6ns ± 1%    69.8ns ± 2%    ~     (p=0.566 n=19+19)
RegexpMatchEasy0_1K-12       238ns ± 1%     236ns ± 0%  -0.91%  (p=0.000 n=17+13)
RegexpMatchEasy1_32-12      69.8ns ± 1%    70.0ns ± 1%  +0.23%  (p=0.026 n=17+16)
RegexpMatchEasy1_1K-12       371ns ± 1%     363ns ± 1%  -2.07%  (p=0.000 n=19+19)
RegexpMatchMedium_32-12      107ns ± 2%     106ns ± 1%  -0.51%  (p=0.031 n=18+20)
RegexpMatchMedium_1K-12     33.0µs ± 0%    32.9µs ± 0%  -0.30%  (p=0.004 n=16+16)
RegexpMatchHard_32-12       1.70µs ± 0%    1.70µs ± 0%  +0.45%  (p=0.000 n=16+17)
RegexpMatchHard_1K-12       51.1µs ± 2%    51.4µs ± 1%  +0.53%  (p=0.000 n=17+19)
Revcomp-12                   378ms ± 1%     385ms ± 1%  +1.92%  (p=0.000 n=19+18)
Template-12                 64.3ms ± 2%    65.0ms ± 2%  +1.09%  (p=0.001 n=19+19)
TimeParse-12                 315ns ± 1%     317ns ± 2%    ~     (p=0.108 n=18+20)
TimeFormat-12                360ns ± 1%     337ns ± 0%  -6.30%  (p=0.000 n=18+13)
[Geo mean]                  51.8µs         51.6µs       -0.48%

Change-Id: Icf8994671476840e3998236e15407a505d4c760c
Reviewed-on: https://go-review.googlesource.com/20700
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgc.go      |  18 ++++++-
 src/runtime/mgcmark.go  | 133 +++++++++++++++++++++++++++++++++++++++++-------
 src/runtime/proc.go     |  33 +++++++++++-
 src/runtime/runtime2.go |  11 +++-
 4 files changed, 171 insertions(+), 24 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 425ed3a160..328ff4cd88 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -762,7 +762,7 @@ var work struct {
 	alldone note
 
 	// Number of roots of various root types. Set by gcMarkRootPrepare.
-	nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int
+	nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nRescanRoots int
 
 	// markrootDone indicates that roots have been marked at least
 	// once during the current GC cycle. This is checked by root
@@ -830,6 +830,14 @@ var work struct {
 		head, tail guintptr
 	}
 
+	// rescan is a list of G's that need to be rescanned during
+	// mark termination. A G adds itself to this list when it
+	// first invalidates its stack scan.
+	rescan struct {
+		lock mutex
+		list []guintptr
+	}
+
 	// Timing/utilization stats for this cycle.
 	stwprocs, maxprocs                 int32
 	tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start
@@ -1736,14 +1744,22 @@ func gcCopySpans() {
 func gcResetMarkState() {
 	// This may be called during a concurrent phase, so make sure
 	// allgs doesn't change.
+	if !(gcphase == _GCoff || gcphase == _GCmarktermination) {
+		// Accessing gcRescan is unsafe.
+		throw("bad GC phase")
+	}
 	lock(&allglock)
 	for _, gp := range allgs {
 		gp.gcscandone = false  // set to true in gcphasework
 		gp.gcscanvalid = false // stack has not been scanned
+		gp.gcRescan = -1
 		gp.gcAssistBytes = 0
 	}
 	unlock(&allglock)
 
+	// Clear rescan list.
+	work.rescan.list = work.rescan.list[:0]
+
 	work.bytesMarked = 0
 	work.initialHeapLive = memstats.heap_live
 	work.markrootDone = false
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index bad7c7e92b..7f481dee22 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -32,6 +32,8 @@ const (
 //
 // The caller must have call gcCopySpans().
 //
+// The world must be stopped.
+//
 //go:nowritebarrier
 func gcMarkRootPrepare() {
 	// Compute how many data and BSS root blocks there are.
@@ -63,24 +65,31 @@ func gcMarkRootPrepare() {
 		// after concurrent mark. In STW GC, this will happen
 		// during mark termination.
 		work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+
+		// On the first markroot, we need to scan all Gs. Gs
+		// may be created after this point, but it's okay that
+		// we ignore them because they begin life without any
+		// roots, so there's nothing to scan, and any roots
+		// they create during the concurrent phase will be
+		// scanned during mark termination. During mark
+		// termination, allglen isn't changing, so we'll scan
+		// all Gs.
+		work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+		work.nRescanRoots = 0
 	} else {
 		// We've already scanned span roots and kept the scan
 		// up-to-date during concurrent mark.
 		work.nSpanRoots = 0
-	}
 
-	// Snapshot of allglen. During concurrent scan, we just need
-	// to be consistent about how many markroot jobs we create and
-	// how many Gs we check. Gs may be created after this point,
-	// but it's okay that we ignore them because they begin life
-	// without any roots, so there's nothing to scan, and any
-	// roots they create during the concurrent phase will be
-	// scanned during mark termination. During mark termination,
-	// allglen isn't changing, so we'll scan all Gs.
-	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+		// On the second pass of markroot, we're just scanning
+		// dirty stacks. It's safe to access rescan since the
+		// world is stopped.
+		work.nStackRoots = 0
+		work.nRescanRoots = len(work.rescan.list)
+	}
 
 	work.markrootNext = 0
-	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
+	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
 }
 
 // gcMarkRootCheck checks that all roots have been scanned. It is
@@ -92,11 +101,24 @@ func gcMarkRootCheck() {
 	}
 
 	lock(&allglock)
-	// Check that gc work is done.
-	for i := 0; i < work.nStackRoots; i++ {
-		gp := allgs[i]
-		if !gp.gcscandone {
-			throw("scan missed a g")
+	// Check that stacks have been scanned.
+	if gcphase == _GCmarktermination {
+		for i := 0; i < len(allgs); i++ {
+			gp := allgs[i]
+			if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
+				println("gp", gp, "goid", gp.goid,
+					"status", readgstatus(gp),
+					"gcscandone", gp.gcscandone,
+					"gcscanvalid", gp.gcscanvalid)
+				throw("scan missed a g")
+			}
+		}
+	} else {
+		for i := 0; i < work.nStackRoots; i++ {
+			gp := allgs[i]
+			if !gp.gcscandone {
+				throw("scan missed a g")
+			}
 		}
 	}
 	unlock(&allglock)
@@ -109,12 +131,18 @@ var oneptrmask = [...]uint8{1}
 //
 // Preemption must be disabled (because this uses a gcWork).
 //
+// nowritebarrier is only advisory here.
+//
 //go:nowritebarrier
 func markroot(gcw *gcWork, i uint32) {
+	// TODO(austin): This is a bit ridiculous. Compute and store
+	// the bases in gcMarkRootPrepare instead of the counts.
 	baseData := uint32(fixedRootCount)
 	baseBSS := baseData + uint32(work.nDataRoots)
 	baseSpans := baseBSS + uint32(work.nBSSRoots)
 	baseStacks := baseSpans + uint32(work.nSpanRoots)
+	baseRescan := baseStacks + uint32(work.nStackRoots)
+	end := baseRescan + uint32(work.nRescanRoots)
 
 	// Note: if you add a case here, please also update heapdump.go:dumproots.
 	switch {
@@ -151,10 +179,14 @@ func markroot(gcw *gcWork, i uint32) {
 
 	default:
 		// the rest is scanning goroutine stacks
-		if uintptr(i-baseStacks) >= allglen {
+		var gp *g
+		if baseStacks <= i && i < baseRescan {
+			gp = allgs[i-baseStacks]
+		} else if baseRescan <= i && i < end {
+			gp = work.rescan.list[i-baseRescan].ptr()
+		} else {
 			throw("markroot: bad index")
 		}
-		gp := allgs[i-baseStacks]
 
 		// remember when we've first observed the G blocked
 		// needed only to output in traceback
@@ -163,13 +195,14 @@ func markroot(gcw *gcWork, i uint32) {
 			gp.waitsince = work.tstart
 		}
 
-		if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
+		if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC && readgstatus(gp) != _Gdead {
 			// GC background workers may be
 			// non-preemptible, so we may deadlock if we
 			// try to scan them during a concurrent phase.
 			// They also have tiny stacks, so just ignore
 			// them until mark termination.
 			gp.gcscandone = true
+			queueRescan(gp)
 			break
 		}
 
@@ -721,6 +754,14 @@ func scanstack(gp *g) {
 		gcw.dispose()
 	}
 	gcUnlockStackBarriers(gp)
+	if gcphase == _GCmark {
+		// gp may have added itself to the rescan list between
+		// when GC started and now. It's clean now, so remove
+		// it. This isn't safe during mark termination because
+		// mark termination is consuming this list, but it's
+		// also not necessary.
+		dequeueRescan(gp)
+	}
 	gp.gcscanvalid = true
 }
 
@@ -797,6 +838,60 @@ func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
 	}
 }
 
+// queueRescan adds gp to the stack rescan list and clears
+// gp.gcscanvalid. The caller must own gp and ensure that gp isn't
+// already on the rescan list.
+func queueRescan(gp *g) {
+	if gcphase == _GCoff {
+		gp.gcscanvalid = false
+		return
+	}
+	if gp.gcRescan != -1 {
+		throw("g already on rescan list")
+	}
+
+	lock(&work.rescan.lock)
+	gp.gcscanvalid = false
+
+	// Recheck gcphase under the lock in case there was a phase change.
+	if gcphase == _GCoff {
+		unlock(&work.rescan.lock)
+		return
+	}
+	if len(work.rescan.list) == cap(work.rescan.list) {
+		throw("rescan list overflow")
+	}
+	n := len(work.rescan.list)
+	gp.gcRescan = int32(n)
+	work.rescan.list = work.rescan.list[:n+1]
+	work.rescan.list[n].set(gp)
+	unlock(&work.rescan.lock)
+}
+
+// dequeueRescan removes gp from the stack rescan list, if gp is on
+// the rescan list. The caller must own gp.
+func dequeueRescan(gp *g) {
+	if gp.gcRescan == -1 {
+		return
+	}
+	if gcphase == _GCoff {
+		gp.gcRescan = -1
+		return
+	}
+
+	lock(&work.rescan.lock)
+	if work.rescan.list[gp.gcRescan].ptr() != gp {
+		throw("bad dequeueRescan")
+	}
+	// Careful: gp may itself be the last G on the list.
+	last := work.rescan.list[len(work.rescan.list)-1]
+	work.rescan.list[gp.gcRescan] = last
+	last.ptr().gcRescan = gp.gcRescan
+	gp.gcRescan = -1
+	work.rescan.list = work.rescan.list[:len(work.rescan.list)-1]
+	unlock(&work.rescan.lock)
+}
+
 type gcDrainFlags int
 
 const (
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index dcdc7bedb8..ee732e3cf7 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -402,6 +402,16 @@ func allgadd(gp *g) {
 	lock(&allglock)
 	allgs = append(allgs, gp)
 	allglen = uintptr(len(allgs))
+
+	// Grow GC rescan list if necessary.
+	if len(allgs) > cap(work.rescan.list) {
+		lock(&work.rescan.lock)
+		l := work.rescan.list
+		// Let append do the heavy lifting, but keep the
+		// length the same.
+		work.rescan.list = append(l[:cap(l)], 0)[:len(l)]
+		unlock(&work.rescan.lock)
+	}
 	unlock(&allglock)
 }
 
@@ -754,8 +764,9 @@ func casgstatus(gp *g, oldval, newval uint32) {
 			nextYield = nanotime() + yieldDelay/2
 		}
 	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
+	if newval == _Grunning && gp.gcscanvalid {
+		// Run queueRescan on the system stack so it has more space.
+		systemstack(func() { queueRescan(gp) })
 	}
 }
 
@@ -1405,6 +1416,8 @@ func newextram() {
 	gp.syscallpc = gp.sched.pc
 	gp.syscallsp = gp.sched.sp
 	gp.stktopsp = gp.sched.sp
+	gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary
+	gp.gcRescan = -1
 	// malg returns status as Gidle, change to Gsyscall before adding to allg
 	// where GC will see it.
 	casgstatus(gp, _Gidle, _Gsyscall)
@@ -2210,6 +2223,10 @@ func goexit0(gp *g) {
 	gp.waitreason = ""
 	gp.param = nil
 
+	// Note that gp's stack scan is now "valid" because it has no
+	// stack. We could dequeueRescan, but that takes a lock and
+	// isn't really necessary.
+	gp.gcscanvalid = true
 	dropg()
 
 	if _g_.m.locked&^_LockExternal != 0 {
@@ -2700,6 +2717,7 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr
 	if newg == nil {
 		newg = malg(_StackMin)
 		casgstatus(newg, _Gidle, _Gdead)
+		newg.gcRescan = -1
 		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
 	}
 	if newg.stack.hi == 0 {
@@ -2733,6 +2751,17 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr
 	if isSystemGoroutine(newg) {
 		atomic.Xadd(&sched.ngsys, +1)
 	}
+	// The stack is dirty from the argument frame, so queue it for
+	// scanning. Do this before setting it to runnable so we still
+	// own the G. If we're recycling a G, it may already be on the
+	// rescan list.
+	if newg.gcRescan == -1 {
+		queueRescan(newg)
+	} else {
+		// The recycled G is already on the rescan list. Just
+		// mark the stack dirty.
+		newg.gcscanvalid = false
+	}
 	casgstatus(newg, _Gdead, _Grunnable)
 
 	if _p_.goidcache == _p_.goidcacheend {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0a988ce469..d35b897c3e 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -336,7 +336,7 @@ type g struct {
 	paniconfault   bool     // panic (instead of crash) on unexpected fault address
 	preemptscan    bool     // preempted g does scan for gc
 	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan
+	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan; transition from true to false by calling queueRescan and false to true by calling dequeueRescan
 	throwsplit     bool     // must not split stack
 	raceignore     int8     // ignore race detection events
 	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
@@ -354,7 +354,14 @@ type g struct {
 	racectx        uintptr
 	waiting        *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
 
-	// Per-G gcController state
+	// Per-G GC state
+
+	// gcRescan is this G's index in work.rescan.list. If this is
+	// -1, this G is not on the rescan list.
+	//
+	// If gcphase != _GCoff and this G is visible to the garbage
+	// collector, writes to this are protected by work.rescan.lock.
+	gcRescan int32
 
 	// gcAssistBytes is this G's GC assist credit in terms of
 	// bytes allocated. If this is positive, then the G has credit
-- 
cgit v1.3


From 6dfba5c7ce867583cbcea9da09dceacd2633bacc Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 26 Feb 2016 13:02:42 +0100
Subject: runtime/race: improve TestNoRaceIOHttp test

TestNoRaceIOHttp does all kinds of bad things:
1. Binds to a fixed port, so concurrent tests fail.
2. Registers HTTP handler multiple times, so repeated tests fail.
3. Relies on sleep to wait for listen.

Fix all of that.

Change-Id: I1210b7797ef5e92465b37dc407246d92a2a24fe8
Reviewed-on: https://go-review.googlesource.com/19953
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/race/testdata/io_test.go | 37 +++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/race/testdata/io_test.go b/src/runtime/race/testdata/io_test.go
index 1b3ee3822b..30a121bee4 100644
--- a/src/runtime/race/testdata/io_test.go
+++ b/src/runtime/race/testdata/io_test.go
@@ -7,9 +7,11 @@ package race_test
 import (
 	"fmt"
 	"io/ioutil"
+	"net"
 	"net/http"
 	"os"
 	"path/filepath"
+	"sync"
 	"testing"
 	"time"
 )
@@ -41,29 +43,34 @@ func TestNoRaceIOFile(t *testing.T) {
 	_ = x
 }
 
+var (
+	regHandler  sync.Once
+	handlerData int
+)
+
 func TestNoRaceIOHttp(t *testing.T) {
-	x := 0
-	go func() {
+	regHandler.Do(func() {
 		http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-			x = 41
+			handlerData++
 			fmt.Fprintf(w, "test")
-			x = 42
+			handlerData++
 		})
-		err := http.ListenAndServe("127.0.0.1:23651", nil)
-		if err != nil {
-			t.Fatalf("http.ListenAndServe: %v", err)
-		}
-	}()
-	time.Sleep(1e7)
-	x = 1
-	_, err := http.Get("http://127.0.0.1:23651")
+	})
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("net.Listen: %v", err)
+	}
+	defer ln.Close()
+	go http.Serve(ln, nil)
+	handlerData++
+	_, err = http.Get("http://" + ln.Addr().String())
 	if err != nil {
 		t.Fatalf("http.Get: %v", err)
 	}
-	x = 2
-	_, err = http.Get("http://127.0.0.1:23651")
+	handlerData++
+	_, err = http.Get("http://" + ln.Addr().String())
 	if err != nil {
 		t.Fatalf("http.Get: %v", err)
 	}
-	x = 3
+	handlerData++
 }
-- 
cgit v1.3


From 9629f55fbbfb4052ea24c247cbd5db49ba2f389e Mon Sep 17 00:00:00 2001
From: Cherry Zhang <cherryyz@google.com>
Date: Tue, 26 Apr 2016 15:17:56 -0400
Subject: cmd/link: remove absolute address for c-archive on darwin/arm

Now it is possible to build a c-archive as PIC on darwin/arm (this is
now the default). Then the system linker can link the binary using
the archive as PIE.

Fixes #12896.

Change-Id: Iad84131572422190f5fa036e7d71910dc155f155
Reviewed-on: https://go-review.googlesource.com/22461
Reviewed-by: David Crawshaw <crawshaw@golang.org>
---
 src/cmd/go/build.go                   |  5 +++++
 src/cmd/link/internal/arm/asm.go      | 30 ++++++++++++++++++++++++++++++
 src/cmd/link/internal/ld/data.go      |  3 +++
 src/cmd/link/internal/ld/macho.go     |  7 +++++--
 src/runtime/cgo/signal_darwin_armx.go |  6 +++++-
 5 files changed, 48 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/cmd/go/build.go b/src/cmd/go/build.go
index 4aaad04b3a..b8c12db196 100644
--- a/src/cmd/go/build.go
+++ b/src/cmd/go/build.go
@@ -334,6 +334,11 @@ func buildModeInit() {
 			}
 			return p
 		}
+		switch platform {
+		case "darwin/arm":
+			codegenArg = "-shared"
+		default:
+		}
 		exeSuffix = ".a"
 		ldBuildmode = "c-archive"
 	case "c-shared":
diff --git a/src/cmd/link/internal/arm/asm.go b/src/cmd/link/internal/arm/asm.go
index 069812fcef..aafdd9bc3d 100644
--- a/src/cmd/link/internal/arm/asm.go
+++ b/src/cmd/link/internal/arm/asm.go
@@ -330,6 +330,36 @@ func machoreloc1(r *ld.Reloc, sectoff int64) int {
 
 	rs := r.Xsym
 
+	if r.Type == obj.R_PCREL {
+		if rs.Type == obj.SHOSTOBJ {
+			ld.Diag("pc-relative relocation of external symbol is not supported")
+			return -1
+		}
+		if r.Siz != 4 {
+			return -1
+		}
+
+		// emit a pair of "scattered" relocations that
+		// resolve to the difference of section addresses of
+		// the symbol and the instruction
+		// this value is added to the field being relocated
+		o1 := uint32(sectoff)
+		o1 |= 1 << 31 // scattered bit
+		o1 |= ld.MACHO_ARM_RELOC_SECTDIFF << 24
+		o1 |= 2 << 28 // size = 4
+
+		o2 := uint32(0)
+		o2 |= 1 << 31 // scattered bit
+		o2 |= ld.MACHO_ARM_RELOC_PAIR << 24
+		o2 |= 2 << 28 // size = 4
+
+		ld.Thearch.Lput(o1)
+		ld.Thearch.Lput(uint32(ld.Symaddr(rs)))
+		ld.Thearch.Lput(o2)
+		ld.Thearch.Lput(uint32(ld.Ctxt.Cursym.Value + int64(r.Off)))
+		return 0
+	}
+
 	if rs.Type == obj.SHOSTOBJ || r.Type == obj.R_CALLARM {
 		if rs.Dynid < 0 {
 			ld.Diag("reloc %d to non-macho symbol %s type=%d", r.Type, rs.Name, rs.Type)
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index dbd5ad0b75..8964757846 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -560,6 +560,9 @@ func relocsym(s *LSym) {
 							o += int64(uint64(Symaddr(rs)) - rs.Sect.Vaddr)
 						}
 						o -= int64(r.Off) // relative to section offset, not symbol
+					} else if SysArch.Family == sys.ARM {
+						// see ../arm/asm.go:/machoreloc1
+						o += Symaddr(rs) - int64(Ctxt.Cursym.Value) - int64(r.Off)
 					} else {
 						o += int64(r.Siz)
 					}
diff --git a/src/cmd/link/internal/ld/macho.go b/src/cmd/link/internal/ld/macho.go
index 310435e49e..53cc96275d 100644
--- a/src/cmd/link/internal/ld/macho.go
+++ b/src/cmd/link/internal/ld/macho.go
@@ -79,6 +79,8 @@ const (
 	MACHO_X86_64_RELOC_SIGNED_2   = 7
 	MACHO_X86_64_RELOC_SIGNED_4   = 8
 	MACHO_ARM_RELOC_VANILLA       = 0
+	MACHO_ARM_RELOC_PAIR          = 1
+	MACHO_ARM_RELOC_SECTDIFF      = 2
 	MACHO_ARM_RELOC_BR24          = 5
 	MACHO_ARM64_RELOC_UNSIGNED    = 0
 	MACHO_ARM64_RELOC_BRANCH26    = 2
@@ -350,8 +352,9 @@ func machoshbits(mseg *MachoSeg, sect *Section, segname string) {
 
 	var msect *MachoSect
 	if sect.Rwx&1 == 0 && segname != "__DWARF" && (SysArch.Family == sys.ARM64 ||
-		(SysArch.Family == sys.AMD64 && (Buildmode == BuildmodeCShared || Buildmode == BuildmodeCArchive))) {
-		// Darwin external linker on arm64 and on amd64 in c-shared/c-archive buildmode
+		(SysArch.Family == sys.AMD64 && (Buildmode == BuildmodeCShared || Buildmode == BuildmodeCArchive)) ||
+		(SysArch.Family == sys.ARM && (Buildmode == BuildmodeCShared || Buildmode == BuildmodeCArchive))) {
+		// Darwin external linker on arm64 and on amd64 and arm in c-shared/c-archive buildmode
 		// complains about absolute relocs in __TEXT, so if the section is not
 		// executable, put it in __DATA segment.
 		msect = newMachoSect(mseg, buf, "__DATA")
diff --git a/src/runtime/cgo/signal_darwin_armx.go b/src/runtime/cgo/signal_darwin_armx.go
index 9c1ba5dee1..9f6741eb08 100644
--- a/src/runtime/cgo/signal_darwin_armx.go
+++ b/src/runtime/cgo/signal_darwin_armx.go
@@ -13,10 +13,14 @@ import "unsafe"
 //go:linkname x_cgo_panicmem x_cgo_panicmem
 var x_cgo_panicmem uintptr
 
+// use a pointer to avoid relocation of external symbol in __TEXT
+// make linker happy
+var _cgo_panicmem = &x_cgo_panicmem
+
 // TODO(crawshaw): move this into x_cgo_init, it will not run until
 // runtime has finished loading, which may be after its use.
 func init() {
-	x_cgo_panicmem = funcPC(panicmem)
+	*_cgo_panicmem = funcPC(panicmem)
 }
 
 func funcPC(f interface{}) uintptr {
-- 
cgit v1.3


From 74a9bad63899ffb02b747678c2c181ffb13983b9 Mon Sep 17 00:00:00 2001
From: Zhongwei Yao <zhongwei.yao@arm.com>
Date: Mon, 25 Apr 2016 11:08:38 +0800
Subject: cmd/compile: enable const division for arm64

performance:
benchmark                   old ns/op     new ns/op     delta
BenchmarkDivconstI64-8      8.28          2.70          -67.39%
BenchmarkDivconstU64-8      8.28          4.69          -43.36%
BenchmarkDivconstI32-8      8.28          6.39          -22.83%
BenchmarkDivconstU32-8      8.28          4.43          -46.50%
BenchmarkDivconstI16-8      5.17          5.17          +0.00%
BenchmarkDivconstU16-8      5.33          5.34          +0.19%
BenchmarkDivconstI8-8       3.50          3.50          +0.00%
BenchmarkDivconstU8-8       3.51          3.50          -0.28%

Fixes #15382

Change-Id: Ibce7b28f0586d593b33c4d4ecc5d5e7e7c905d13
Reviewed-on: https://go-review.googlesource.com/22292
Reviewed-by: Michael Munday <munday@ca.ibm.com>
Reviewed-by: David Chase <drchase@google.com>
---
 src/cmd/compile/internal/arm64/galign.go |  2 +
 src/cmd/compile/internal/arm64/ggen.go   | 47 ++++++++++++++++++++
 src/cmd/compile/internal/arm64/gsubr.go  | 12 ------
 src/cmd/compile/internal/arm64/peep.go   |  3 ++
 src/cmd/compile/internal/arm64/prog.go   |  3 ++
 src/cmd/compile/internal/gc/cgen.go      | 49 +++++++++++++++++----
 src/cmd/compile/internal/gc/go.go        | 36 ++++++++--------
 src/cmd/compile/internal/gc/walk.go      | 12 +++++-
 src/cmd/internal/obj/arm64/asm7.go       |  4 ++
 src/runtime/vlrt.go                      |  1 -
 test/bench/go1/divconst_test.go          | 73 ++++++++++++++++++++++++++++++++
 11 files changed, 202 insertions(+), 40 deletions(-)
 create mode 100644 test/bench/go1/divconst_test.go

(limited to 'src/runtime')

diff --git a/src/cmd/compile/internal/arm64/galign.go b/src/cmd/compile/internal/arm64/galign.go
index 17c851cb14..7acc4e08eb 100644
--- a/src/cmd/compile/internal/arm64/galign.go
+++ b/src/cmd/compile/internal/arm64/galign.go
@@ -29,6 +29,8 @@ func Main() {
 
 	gc.Thearch.Betypeinit = betypeinit
 	gc.Thearch.Cgen_hmul = cgen_hmul
+	gc.Thearch.AddSetCarry = AddSetCarry
+	gc.Thearch.RightShiftWithCarry = RightShiftWithCarry
 	gc.Thearch.Cgen_shift = cgen_shift
 	gc.Thearch.Clearfat = clearfat
 	gc.Thearch.Defframe = defframe
diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go
index 9abd901d7a..bddfed631a 100644
--- a/src/cmd/compile/internal/arm64/ggen.go
+++ b/src/cmd/compile/internal/arm64/ggen.go
@@ -252,6 +252,53 @@ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
 	}
 }
 
+// RightShiftWithCarry generates a constant unsigned
+// right shift with carry.
+//
+// res = n >> shift // with carry
+func RightShiftWithCarry(n *gc.Node, shift uint, res *gc.Node) {
+	// Extra 1 is for carry bit.
+	maxshift := uint(n.Type.Width*8 + 1)
+	if shift == 0 {
+		gmove(n, res)
+	} else if shift < maxshift {
+		// 1. clear rightmost bit of target
+		var n1 gc.Node
+		gc.Nodconst(&n1, n.Type, 1)
+		gins(optoas(gc.ORSH, n.Type), &n1, n)
+		gins(optoas(gc.OLSH, n.Type), &n1, n)
+		// 2. add carry flag to target
+		var n2 gc.Node
+		gc.Nodconst(&n1, n.Type, 0)
+		gc.Regalloc(&n2, n.Type, nil)
+		gins(optoas(gc.OAS, n.Type), &n1, &n2)
+		gins(arm64.AADC, &n2, n)
+		// 3. right rotate 1 bit
+		gc.Nodconst(&n1, n.Type, 1)
+		gins(arm64.AROR, &n1, n)
+
+		// ARM64 backend doesn't eliminate shifts by 0. It is manually checked here.
+		if shift > 1 {
+			var n3 gc.Node
+			gc.Nodconst(&n3, n.Type, int64(shift-1))
+			cgen_shift(gc.ORSH, true, n, &n3, res)
+		} else {
+			gmove(n, res)
+		}
+		gc.Regfree(&n2)
+	} else {
+		gc.Fatalf("RightShiftWithCarry: shift(%v) is bigger than max size(%v)", shift, maxshift)
+	}
+}
+
+// AddSetCarry generates add and set carry.
+//
+//   res = nl + nr // with carry flag set
+func AddSetCarry(nl *gc.Node, nr *gc.Node, res *gc.Node) {
+	gins(arm64.AADDS, nl, nr)
+	gmove(nr, res)
+}
+
 /*
  * generate high multiply:
  *   res = (nl*nr) >> width
diff --git a/src/cmd/compile/internal/arm64/gsubr.go b/src/cmd/compile/internal/arm64/gsubr.go
index efa66a09d3..f193291d01 100644
--- a/src/cmd/compile/internal/arm64/gsubr.go
+++ b/src/cmd/compile/internal/arm64/gsubr.go
@@ -890,18 +890,6 @@ func optoas(op gc.Op, t *gc.Type) obj.As {
 		ORSH_ | gc.TINT64:
 		a = arm64.AASR
 
-		// TODO(minux): handle rotates
-	//case CASE(ORROTC, TINT8):
-	//case CASE(ORROTC, TUINT8):
-	//case CASE(ORROTC, TINT16):
-	//case CASE(ORROTC, TUINT16):
-	//case CASE(ORROTC, TINT32):
-	//case CASE(ORROTC, TUINT32):
-	//case CASE(ORROTC, TINT64):
-	//case CASE(ORROTC, TUINT64):
-	//	a = 0//??? RLDC??
-	//	break;
-
 	case OHMUL_ | gc.TINT64:
 		a = arm64.ASMULH
 
diff --git a/src/cmd/compile/internal/arm64/peep.go b/src/cmd/compile/internal/arm64/peep.go
index 887353c889..22be1afebc 100644
--- a/src/cmd/compile/internal/arm64/peep.go
+++ b/src/cmd/compile/internal/arm64/peep.go
@@ -534,10 +534,13 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {
 		return 0
 
 	case arm64.AADD, /* read p->from, read p->reg, write p->to */
+		arm64.AADDS,
 		arm64.ASUB,
+		arm64.AADC,
 		arm64.AAND,
 		arm64.AORR,
 		arm64.AEOR,
+		arm64.AROR,
 		arm64.AMUL,
 		arm64.ASMULL,
 		arm64.AUMULL,
diff --git a/src/cmd/compile/internal/arm64/prog.go b/src/cmd/compile/internal/arm64/prog.go
index 3091c4a840..d504d0f0ee 100644
--- a/src/cmd/compile/internal/arm64/prog.go
+++ b/src/cmd/compile/internal/arm64/prog.go
@@ -59,6 +59,9 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
 	arm64.ALSR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AASR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ACMP & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead},
+	arm64.AADC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
+	arm64.AROR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.AADDS & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
 
 	// Floating point.
 	arm64.AFADDD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go
index bb7487c958..8db752ec51 100644
--- a/src/cmd/compile/internal/gc/cgen.go
+++ b/src/cmd/compile/internal/gc/cgen.go
@@ -2642,9 +2642,9 @@ func cgen_ret(n *Node) {
 // signed and unsigned high multiplication (OHMUL).
 func hasHMUL64() bool {
 	switch Ctxt.Arch.Family {
-	case sys.AMD64, sys.S390X:
+	case sys.AMD64, sys.S390X, sys.ARM64:
 		return true
-	case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
+	case sys.ARM, sys.I386, sys.MIPS64, sys.PPC64:
 		return false
 	}
 	Fatalf("unknown architecture")
@@ -2664,6 +2664,28 @@ func hasRROTC64() bool {
 	return false
 }
 
+func hasRightShiftWithCarry() bool {
+	switch Ctxt.Arch.Family {
+	case sys.ARM64:
+		return true
+	case sys.AMD64, sys.ARM, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
+		return false
+	}
+	Fatalf("unknown architecture")
+	return false
+}
+
+func hasAddSetCarry() bool {
+	switch Ctxt.Arch.Family {
+	case sys.ARM64:
+		return true
+	case sys.AMD64, sys.ARM, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
+		return false
+	}
+	Fatalf("unknown architecture")
+	return false
+}
+
 // generate division according to op, one of:
 //	res = nl / nr
 //	res = nl % nr
@@ -2699,8 +2721,9 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 		// the MSB. For now this needs the RROTC instruction.
 		// TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
 		// an alternative sequence of instructions for architectures
-		// that do not have a shift right with carry instruction.
-		if m.Ua != 0 && !hasRROTC64() {
+		// (TODO: MIPS64, PPC64, S390X) that do not have a shift
+		// right with carry instruction.
+		if m.Ua != 0 && !hasRROTC64() && !hasRightShiftWithCarry() {
 			goto longdiv
 		}
 		if op == OMOD {
@@ -2717,12 +2740,20 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
 
 		if m.Ua != 0 {
 			// Need to add numerator accounting for overflow.
-			Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
+			if hasAddSetCarry() {
+				Thearch.AddSetCarry(&n1, &n3, &n3)
+			} else {
+				Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
+			}
 
-			Nodconst(&n2, nl.Type, 1)
-			Thearch.Gins(Thearch.Optoas(ORROTC, nl.Type), &n2, &n3)
-			Nodconst(&n2, nl.Type, int64(m.S)-1)
-			Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3)
+			if !hasRROTC64() {
+				Thearch.RightShiftWithCarry(&n3, uint(m.S), &n3)
+			} else {
+				Nodconst(&n2, nl.Type, 1)
+				Thearch.Gins(Thearch.Optoas(ORROTC, nl.Type), &n2, &n3)
+				Nodconst(&n2, nl.Type, int64(m.S)-1)
+				Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3)
+			}
 		} else {
 			Nodconst(&n2, nl.Type, int64(m.S))
 			Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) // shift dx
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index 87b6121c8e..f9a372dcce 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -378,23 +378,25 @@ type Arch struct {
 	MAXWIDTH     int64
 	ReservedRegs []int
 
-	AddIndex     func(*Node, int64, *Node) bool // optional
-	Betypeinit   func()
-	Bgen_float   func(*Node, bool, int, *obj.Prog) // optional
-	Cgen64       func(*Node, *Node)                // only on 32-bit systems
-	Cgenindex    func(*Node, *Node, bool) *obj.Prog
-	Cgen_bmul    func(Op, *Node, *Node, *Node) bool
-	Cgen_float   func(*Node, *Node) // optional
-	Cgen_hmul    func(*Node, *Node, *Node)
-	Cgen_shift   func(Op, bool, *Node, *Node, *Node)
-	Clearfat     func(*Node)
-	Cmp64        func(*Node, *Node, Op, int, *obj.Prog) // only on 32-bit systems
-	Defframe     func(*obj.Prog)
-	Dodiv        func(Op, *Node, *Node, *Node)
-	Excise       func(*Flow)
-	Expandchecks func(*obj.Prog)
-	Getg         func(*Node)
-	Gins         func(obj.As, *Node, *Node) *obj.Prog
+	AddIndex            func(*Node, int64, *Node) bool // optional
+	Betypeinit          func()
+	Bgen_float          func(*Node, bool, int, *obj.Prog) // optional
+	Cgen64              func(*Node, *Node)                // only on 32-bit systems
+	Cgenindex           func(*Node, *Node, bool) *obj.Prog
+	Cgen_bmul           func(Op, *Node, *Node, *Node) bool
+	Cgen_float          func(*Node, *Node) // optional
+	Cgen_hmul           func(*Node, *Node, *Node)
+	RightShiftWithCarry func(*Node, uint, *Node)  // only on systems without RROTC instruction
+	AddSetCarry         func(*Node, *Node, *Node) // only on systems when ADD does not update carry flag
+	Cgen_shift          func(Op, bool, *Node, *Node, *Node)
+	Clearfat            func(*Node)
+	Cmp64               func(*Node, *Node, Op, int, *obj.Prog) // only on 32-bit systems
+	Defframe            func(*obj.Prog)
+	Dodiv               func(Op, *Node, *Node, *Node)
+	Excise              func(*Flow)
+	Expandchecks        func(*obj.Prog)
+	Getg                func(*Node)
+	Gins                func(obj.As, *Node, *Node) *obj.Prog
 
 	// Ginscmp generates code comparing n1 to n2 and jumping away if op is satisfied.
 	// The returned prog should be Patch'ed with the jump target.
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index bce34374e8..cc9a50e6a8 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -3424,7 +3424,7 @@ func walkdiv(n *Node, init *Nodes) *Node {
 	// if >= 0, nr is 1<<pow // 1 if nr is negative.
 
 	// TODO(minux)
-	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
+	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.PPC64) {
 		return n
 	}
 
@@ -3485,6 +3485,16 @@ func walkdiv(n *Node, init *Nodes) *Node {
 			goto ret
 		}
 
+		// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
+		// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
+		// data movement between registers. It could be enabled when generated code is good enough.
+		if Thearch.LinkArch.Family == sys.ARM64 {
+			switch Simtype[nl.Type.Etype] {
+			case TUINT8, TINT8, TUINT16, TINT16:
+				return n
+			}
+		}
+
 		switch Simtype[nl.Type.Etype] {
 		default:
 			return n
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 55397132e0..28bebaa3f7 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -155,6 +155,7 @@ var optab = []Optab{
 	{AADC, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
 	{AADC, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
 	{ANEG, C_REG, C_NONE, C_REG, 25, 4, 0, 0, 0},
+	{ANEG, C_NONE, C_NONE, C_REG, 25, 4, 0, 0, 0},
 	{ANGC, C_REG, C_NONE, C_REG, 17, 4, 0, 0, 0},
 	{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
 	{AADD, C_ADDCON, C_RSP, C_RSP, 2, 4, 0, 0, 0},
@@ -2198,6 +2199,9 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		o1 = oprrr(ctxt, p.As)
 
 		rf := int(p.From.Reg)
+		if rf == C_NONE {
+			rf = int(p.To.Reg)
+		}
 		rt := int(p.To.Reg)
 		o1 |= (uint32(rf&31) << 16) | (REGZERO & 31 << 5) | uint32(rt&31)
 
diff --git a/src/runtime/vlrt.go b/src/runtime/vlrt.go
index 2419f78ce2..cd37828ae4 100644
--- a/src/runtime/vlrt.go
+++ b/src/runtime/vlrt.go
@@ -195,7 +195,6 @@ func dodiv(n, d uint64) (q, r uint64) {
 	if GOARCH == "arm" {
 		// arm doesn't have a division instruction, so
 		// slowdodiv is the best that we can do.
-		// TODO: revisit for arm64.
 		return slowdodiv(n, d)
 	}
 
diff --git a/test/bench/go1/divconst_test.go b/test/bench/go1/divconst_test.go
new file mode 100644
index 0000000000..3cf6c26094
--- /dev/null
+++ b/test/bench/go1/divconst_test.go
@@ -0,0 +1,73 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package go1
+
+import (
+	"testing"
+)
+
+var i64res int64
+
+func BenchmarkDivconstI64(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i64res = int64(i) / 7
+	}
+}
+
+var u64res uint64
+
+func BenchmarkDivconstU64(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		u64res = uint64(i) / 7
+	}
+}
+
+var i32res int32
+
+func BenchmarkDivconstI32(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i32res = int32(i) / 7
+	}
+}
+
+var u32res uint32
+
+func BenchmarkDivconstU32(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		u32res = uint32(i) / 7
+	}
+}
+
+var i16res int16
+
+func BenchmarkDivconstI16(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i16res = int16(i) / 7
+	}
+}
+
+var u16res uint16
+
+func BenchmarkDivconstU16(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		u16res = uint16(i) / 7
+	}
+}
+
+var i8res int8
+
+func BenchmarkDivconstI8(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i8res = int8(i) / 7
+	}
+}
+
+var u8res uint8
+
+func BenchmarkDivconstU8(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		u8res = uint8(i) / 7
+	}
+}
-- 
cgit v1.3


From 217be5b35d8fb0f812ca59bf7dec3aa0fb850c46 Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Wed, 27 Apr 2016 12:49:27 -0400
Subject: reflect: unnamed interface types have no name

Fixes #15468

Change-Id: I8723171f87774a98d5e80e7832ebb96dd1fbea74
Reviewed-on: https://go-review.googlesource.com/22524
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: David Crawshaw <crawshaw@golang.org>
---
 src/reflect/all_test.go | 25 +++++++++++++++----------
 src/reflect/type.go     |  3 +++
 src/runtime/type.go     |  3 +++
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'src/runtime')

diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go
index aff8ea253b..870ccbf521 100644
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -5659,20 +5659,25 @@ type nameTest struct {
 }
 
 var nameTests = []nameTest{
-	{int32(0), "int32"},
-	{D1{}, "D1"},
-	{[]D1{}, ""},
-	{(chan D1)(nil), ""},
-	{(func() D1)(nil), ""},
-	{(<-chan D1)(nil), ""},
-	{(chan<- D1)(nil), ""},
-	{TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678(0), "TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678"},
+	{(*int32)(nil), "int32"},
+	{(*D1)(nil), "D1"},
+	{(*[]D1)(nil), ""},
+	{(*chan D1)(nil), ""},
+	{(*func() D1)(nil), ""},
+	{(*<-chan D1)(nil), ""},
+	{(*chan<- D1)(nil), ""},
+	{(*interface{})(nil), ""},
+	{(*interface {
+		F()
+	})(nil), ""},
+	{(*TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678)(nil), "TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678"},
 }
 
 func TestNames(t *testing.T) {
 	for _, test := range nameTests {
-		if got := TypeOf(test.v).Name(); got != test.want {
-			t.Errorf("%T Name()=%q, want %q", test.v, got, test.want)
+		typ := TypeOf(test.v).Elem()
+		if got := typ.Name(); got != test.want {
+			t.Errorf("%v Name()=%q, want %q", typ, got, test.want)
 		}
 	}
 }
diff --git a/src/reflect/type.go b/src/reflect/type.go
index ff6ff14c83..0213d56e83 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -867,6 +867,9 @@ func (t *rtype) Name() string {
 	if hasPrefix(s, "func(") {
 		return ""
 	}
+	if hasPrefix(s, "interface {") {
+		return ""
+	}
 	switch s[0] {
 	case '[', '*', '<':
 		return ""
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 9e4c40553a..608c601abd 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -132,6 +132,9 @@ func (t *_type) name() string {
 	if hasPrefix(s, "func(") {
 		return ""
 	}
+	if hasPrefix(s, "interface {") {
+		return ""
+	}
 	switch s[0] {
 	case '[', '*', '<':
 		return ""
-- 
cgit v1.3


From 30172f1811a3e08487c6191d1923f8608a338496 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 27 Apr 2016 14:30:01 -0400
Subject: runtime: make {add,subtract}{b,1} nosplit

These are used at the bottom level of various GC operations that must
not be preempted. To be on the safe side, mark them all nosplit.

Change-Id: I8f7360e79c9852bd044df71413b8581ad764380c
Reviewed-on: https://go-review.googlesource.com/22504
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/mbitmap.go | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/runtime')

diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index f025ce1c68..e8eb6a7e22 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -86,6 +86,7 @@ const (
 
 // addb returns the byte pointer p+n.
 //go:nowritebarrier
+//go:nosplit
 func addb(p *byte, n uintptr) *byte {
 	// Note: wrote out full expression instead of calling add(p, n)
 	// to reduce the number of temporaries generated by the
@@ -95,6 +96,7 @@ func addb(p *byte, n uintptr) *byte {
 
 // subtractb returns the byte pointer p-n.
 //go:nowritebarrier
+//go:nosplit
 func subtractb(p *byte, n uintptr) *byte {
 	// Note: wrote out full expression instead of calling add(p, -n)
 	// to reduce the number of temporaries generated by the
@@ -104,6 +106,7 @@ func subtractb(p *byte, n uintptr) *byte {
 
 // add1 returns the byte pointer p+1.
 //go:nowritebarrier
+//go:nosplit
 func add1(p *byte) *byte {
 	// Note: wrote out full expression instead of calling addb(p, 1)
 	// to reduce the number of temporaries generated by the
-- 
cgit v1.3


From b49b71ae192c72faf699edd321ff0637f90e794c Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 18 Mar 2016 11:27:59 -0400
Subject: runtime: don't rescan globals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the runtime rescans globals during mark 2 and mark
termination. This costs as much as 500µs/MB in STW time, which is
enough to surpass the 10ms STW limit with only 20MB of globals.

It's also basically unnecessary. The compiler already generates write
barriers for global -> heap pointer updates and the regular write
barrier doesn't check whether the slot is a global or in the heap.
Some less common write barriers do cause problems.
heapBitsBulkBarrier, which is used by typedmemmove and related
functions, currently depends on having access to the pointer bitmap
and as a result ignores writes to globals. Likewise, the
reflect-related write barriers reflect_typedmemmovepartial and
callwritebarrier ignore non-heap destinations; though it appears they
can never be called with global pointers anyway.

This commit makes heapBitsBulkBarrier issue write barriers for writes
to global pointers using the data and BSS pointer bitmaps, removes the
inheap checks from the reflection write barriers, and eliminates the
rescans during mark 2 and mark termination. It also adds a test that
writes to globals have write barriers.

Programs with large data+BSS segments (with pointers) aren't common,
but for programs that do have large data+BSS segments, this
significantly reduces pause time:

name \ 95%ile-time/markTerm              old         new  delta
LargeBSS/bss:1GB/gomaxprocs:4  148200µs ± 6%  302µs ±52%  -99.80% (p=0.008 n=5+5)

This very slightly improves the go1 benchmarks:

name                      old time/op    new time/op    delta
BinaryTree17-12              2.62s ± 3%     2.62s ± 4%    ~     (p=0.904 n=20+20)
Fannkuch11-12                2.15s ± 1%     2.13s ± 0%  -1.29%  (p=0.000 n=18+20)
FmtFprintfEmpty-12          48.3ns ± 2%    47.6ns ± 1%  -1.52%  (p=0.000 n=20+16)
FmtFprintfString-12          152ns ± 0%     152ns ± 1%    ~     (p=0.725 n=18+18)
FmtFprintfInt-12             150ns ± 1%     149ns ± 1%  -1.14%  (p=0.000 n=19+20)
FmtFprintfIntInt-12          250ns ± 0%     244ns ± 1%  -2.12%  (p=0.000 n=20+18)
FmtFprintfPrefixedInt-12     219ns ± 1%     217ns ± 1%  -1.20%  (p=0.000 n=19+20)
FmtFprintfFloat-12           280ns ± 0%     281ns ± 1%  +0.47%  (p=0.000 n=19+19)
FmtManyArgs-12               928ns ± 0%     923ns ± 1%  -0.53%  (p=0.000 n=19+18)
GobDecode-12                7.21ms ± 1%    7.24ms ± 2%    ~     (p=0.091 n=19+19)
GobEncode-12                6.07ms ± 1%    6.05ms ± 1%  -0.36%  (p=0.002 n=20+17)
Gzip-12                      265ms ± 1%     265ms ± 1%    ~     (p=0.496 n=20+19)
Gunzip-12                   39.6ms ± 1%    39.3ms ± 1%  -0.85%  (p=0.000 n=19+19)
HTTPClientServer-12         74.0µs ± 2%    73.8µs ± 1%    ~     (p=0.569 n=20+19)
JSONEncode-12               15.4ms ± 1%    15.3ms ± 1%  -0.25%  (p=0.049 n=17+17)
JSONDecode-12               53.7ms ± 2%    53.0ms ± 1%  -1.29%  (p=0.000 n=18+17)
Mandelbrot200-12            3.97ms ± 1%    3.97ms ± 0%    ~     (p=0.072 n=17+18)
GoParse-12                  3.35ms ± 2%    3.36ms ± 1%  +0.51%  (p=0.005 n=18+20)
RegexpMatchEasy0_32-12      72.7ns ± 2%    72.2ns ± 1%  -0.70%  (p=0.005 n=19+19)
RegexpMatchEasy0_1K-12       246ns ± 1%     245ns ± 0%  -0.60%  (p=0.000 n=18+16)
RegexpMatchEasy1_32-12      72.8ns ± 1%    72.5ns ± 1%  -0.37%  (p=0.011 n=18+18)
RegexpMatchEasy1_1K-12       380ns ± 1%     385ns ± 1%  +1.34%  (p=0.000 n=20+19)
RegexpMatchMedium_32-12      115ns ± 2%     115ns ± 1%  +0.44%  (p=0.047 n=20+20)
RegexpMatchMedium_1K-12     35.4µs ± 1%    35.5µs ± 1%    ~     (p=0.079 n=18+19)
RegexpMatchHard_32-12       1.83µs ± 0%    1.80µs ± 1%  -1.76%  (p=0.000 n=18+18)
RegexpMatchHard_1K-12       55.1µs ± 0%    54.3µs ± 1%  -1.42%  (p=0.000 n=18+19)
Revcomp-12                   386ms ± 1%     381ms ± 1%  -1.14%  (p=0.000 n=18+18)
Template-12                 61.5ms ± 2%    61.5ms ± 2%    ~     (p=0.647 n=19+20)
TimeParse-12                 338ns ± 0%     336ns ± 1%  -0.72%  (p=0.000 n=14+19)
TimeFormat-12                350ns ± 0%     357ns ± 0%  +2.05%  (p=0.000 n=19+18)
[Geo mean]                  55.3µs         55.0µs       -0.41%

Change-Id: I57e8720385a1b991aeebd111b6874354308e2a6b
Reviewed-on: https://go-review.googlesource.com/20829
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Rick Hudson <rlh@golang.org>
---
 src/runtime/mbarrier.go | 19 +++++++++++++++----
 src/runtime/mbitmap.go  | 50 +++++++++++++++++++++++++++++++++++++++++++++++--
 src/runtime/mgc.go      |  7 -------
 src/runtime/mgcmark.go  | 24 ++++++++++++++----------
 test/writebarrier.go    | 15 +++++++++++++++
 5 files changed, 92 insertions(+), 23 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index f03bf18ebc..637d9b886a 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -87,6 +87,17 @@ import (
 // frames that have potentially been active since the concurrent scan,
 // so it depends on write barriers to track changes to pointers in
 // stack frames that have not been active.
+//
+//
+// Global writes:
+//
+// The Go garbage collector requires write barriers when heap pointers
+// are stored in globals. Many garbage collectors ignore writes to
+// globals and instead pick up global -> heap pointers during
+// termination. This increases pause time, so we instead rely on write
+// barriers for writes to globals so that we don't have to rescan
+// global during mark termination.
+//
 //go:nowritebarrierrec
 func gcmarkwb_m(slot *uintptr, ptr uintptr) {
 	if writeBarrier.needed {
@@ -185,7 +196,7 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
 	if writeBarrier.cgo {
 		cgoCheckMemmove(typ, dst, src, off, size)
 	}
-	if !writeBarrier.needed || typ.kind&kindNoPointers != 0 || size < sys.PtrSize || !inheap(uintptr(dst)) {
+	if !writeBarrier.needed || typ.kind&kindNoPointers != 0 || size < sys.PtrSize {
 		return
 	}
 
@@ -201,11 +212,11 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
 // values have just been copied to frame, starting at retoffset
 // and continuing to framesize. The entire frame (not just the return
 // values) is described by typ. Because the copy has already
-// happened, we call writebarrierptr_nostore, and we must be careful
-// not to be preempted before the write barriers have been run.
+// happened, we call writebarrierptr_nostore, and this is nosplit so
+// the copy and write barrier appear atomic to GC.
 //go:nosplit
 func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) {
-	if !writeBarrier.needed || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < sys.PtrSize || !inheap(uintptr(frame)) {
+	if !writeBarrier.needed || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < sys.PtrSize {
 		return
 	}
 	heapBitsBulkBarrier(uintptr(add(frame, retoffset)), framesize-retoffset)
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index e8eb6a7e22..3df697ee5c 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -384,10 +384,10 @@ func (h heapBits) setCheckmarked(size uintptr) {
 
 // heapBitsBulkBarrier executes writebarrierptr_nostore
 // for every pointer slot in the memory range [p, p+size),
-// using the heap bitmap to locate those pointer slots.
+// using the heap, data, or BSS bitmap to locate those pointer slots.
 // This executes the write barriers necessary after a memmove.
 // Both p and size must be pointer-aligned.
-// The range [p, p+size) must lie within a single allocation.
+// The range [p, p+size) must lie within a single object.
 //
 // Callers should call heapBitsBulkBarrier immediately after
 // calling memmove(p, src, size). This function is marked nosplit
@@ -431,6 +431,22 @@ func heapBitsBulkBarrier(p, size uintptr) {
 			systemstack(func() {
 				gcUnwindBarriers(gp, p)
 			})
+			return
+		}
+
+		// If p is a global, use the data or BSS bitmaps to
+		// execute write barriers.
+		for datap := &firstmoduledata; datap != nil; datap = datap.next {
+			if datap.data <= p && p < datap.edata {
+				bulkBarrierBitmap(p, size, p-datap.data, datap.gcdatamask.bytedata)
+				return
+			}
+		}
+		for datap := &firstmoduledata; datap != nil; datap = datap.next {
+			if datap.bss <= p && p < datap.ebss {
+				bulkBarrierBitmap(p, size, p-datap.bss, datap.gcbssmask.bytedata)
+				return
+			}
 		}
 		return
 	}
@@ -445,6 +461,36 @@ func heapBitsBulkBarrier(p, size uintptr) {
 	}
 }
 
+// bulkBarrierBitmap executes write barriers for [p, p+size) using a
+// 1-bit pointer bitmap. p is assumed to start maskOffset bytes into
+// the data covered by the bitmap in bits.
+//
+// This is used by heapBitsBulkBarrier for writes to data and BSS.
+//
+//go:nosplit
+func bulkBarrierBitmap(p, size, maskOffset uintptr, bits *uint8) {
+	word := maskOffset / sys.PtrSize
+	bits = addb(bits, word/8)
+	mask := uint8(1) << (word % 8)
+
+	for i := uintptr(0); i < size; i += sys.PtrSize {
+		if mask == 0 {
+			bits = addb(bits, 1)
+			if *bits == 0 {
+				// Skip 8 words.
+				i += 7 * sys.PtrSize
+				continue
+			}
+			mask = 1
+		}
+		if *bits&mask != 0 {
+			x := (*uintptr)(unsafe.Pointer(p + i))
+			writebarrierptr_nostore(x, *x)
+		}
+		mask <<= 1
+	}
+}
+
 // typeBitsBulkBarrier executes writebarrierptr_nostore
 // for every pointer slot in the memory range [p, p+size),
 // using the type bitmap to locate those pointer slots.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 328ff4cd88..ae8338ac10 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1086,13 +1086,6 @@ top:
 		// cached workbufs.
 		atomic.Xadd(&work.nwait, -1)
 
-		// Rescan global data and BSS. There may still work
-		// workers running at this point, so bump "jobs" down
-		// before "next" so they won't try running root jobs
-		// until we set next.
-		atomic.Store(&work.markrootJobs, uint32(fixedRootCount+work.nDataRoots+work.nBSSRoots))
-		atomic.Store(&work.markrootNext, fixedRootCount)
-
 		// GC is set up for mark 2. Let Gs blocked on the
 		// transition lock go while we flush caches.
 		semrelease(&work.markDoneSema)
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 7f481dee22..b5a9ff9b56 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -42,18 +42,22 @@ func gcMarkRootPrepare() {
 	}
 
 	work.nDataRoots = 0
-	for datap := &firstmoduledata; datap != nil; datap = datap.next {
-		nDataRoots := nBlocks(datap.edata - datap.data)
-		if nDataRoots > work.nDataRoots {
-			work.nDataRoots = nDataRoots
+	work.nBSSRoots = 0
+
+	// Only scan globals once per cycle; preferably concurrently.
+	if !work.markrootDone {
+		for datap := &firstmoduledata; datap != nil; datap = datap.next {
+			nDataRoots := nBlocks(datap.edata - datap.data)
+			if nDataRoots > work.nDataRoots {
+				work.nDataRoots = nDataRoots
+			}
 		}
-	}
 
-	work.nBSSRoots = 0
-	for datap := &firstmoduledata; datap != nil; datap = datap.next {
-		nBSSRoots := nBlocks(datap.ebss - datap.bss)
-		if nBSSRoots > work.nBSSRoots {
-			work.nBSSRoots = nBSSRoots
+		for datap := &firstmoduledata; datap != nil; datap = datap.next {
+			nBSSRoots := nBlocks(datap.ebss - datap.bss)
+			if nBSSRoots > work.nBSSRoots {
+				work.nBSSRoots = nBSSRoots
+			}
 		}
 	}
 
diff --git a/test/writebarrier.go b/test/writebarrier.go
index 2ff0ee9584..f2431ed5ca 100644
--- a/test/writebarrier.go
+++ b/test/writebarrier.go
@@ -196,3 +196,18 @@ func f20(x, y *int, i int) []*int {
 	a := []*int{x, y} // ERROR "write barrier"
 	return a
 }
+
+var x21 *int
+var y21 struct {
+	x *int
+}
+var z21 int
+
+func f21(x *int) {
+	// Global -> heap pointer updates must have write barriers.
+	x21 = x                   // ERROR "write barrier"
+	y21.x = x                 // ERROR "write barrier"
+	x21 = &z21                // no barrier
+	y21.x = &z21              // no barrier
+	y21 = struct{ x *int }{x} // ERROR "write barrier"
+}
-- 
cgit v1.3