[dev.garbage] Merge branch 'master' into dev.garbage

Change-Id: I47ac4112befc07d3674d7a88827227199edd93b4
author: Austin Clements <austin@google.com> 2016-04-05 16:37:29 -0400
committer: Austin Clements <austin@google.com> 2016-04-05 16:37:54 -0400
commit: 6d6e16001bb04d4bf60c6d14d1f64684a043ef6c (patch)
tree: 53d7dd0bbe4f919fec4a15bde9811a9608b54627 /src/runtime
parent: 22972d2207424edc481b7c127788f573a726dfe7 (diff)
parent: 5e1b7bdecf7a8b5b5d06633758bf53e475902414 (diff)
download: go-6d6e16001bb04d4bf60c6d14d1f64684a043ef6c.tar.xz
42 files changed, 1231 insertions, 355 deletions
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 2d16f4940a..dec79189bc 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -54,6 +54,7 @@ bad_proc: // show that the program requires MMX.
 has_cpuid:
 	MOVL	$0, AX
 	CPUID
+	MOVL	AX, SI
 	CMPL	AX, $0
 	JE	nocpuinfo
 
@@ -69,6 +70,7 @@ has_cpuid:
 	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
 notintel:
 
+	// Load EAX=1 cpuid flags
 	MOVL	$1, AX
 	CPUID
 	MOVL	CX, AX // Move to global variable clobbers CX when generating PIC
@@ -79,6 +81,14 @@ notintel:
 	TESTL	$(1<<23), DX	// MMX
 	JZ 	bad_proc
 
+	// Load EAX=7/ECX=0 cpuid flags
+	CMPL	SI, $7
+	JLT	nocpuinfo
+	MOVL	$7, AX
+	MOVL	$0, CX
+	CPUID
+	MOVL	BX, runtime·cpuid_ebx7(SB)
+
 nocpuinfo:	
 
 	// if there is an _cgo_init, call it to let it
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index b4df1d80d7..83db4d3e81 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -28,6 +28,7 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
 	// find out information about the processor we're on
 	MOVQ	$0, AX
 	CPUID
+	MOVQ	AX, SI
 	CMPQ	AX, $0
 	JE	nocpuinfo
 
@@ -42,15 +43,25 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
 	JNE	notintel
 	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
 notintel:
-	// Do nothing.
 
+	// Load EAX=1 cpuid flags
 	MOVQ	$1, AX
 	CPUID
 	MOVL	CX, runtime·cpuid_ecx(SB)
 	MOVL	DX, runtime·cpuid_edx(SB)
+
+	// Load EAX=7/ECX=0 cpuid flags
+	CMPQ	SI, $7
+	JLT	no7
+	MOVL	$7, AX
+	MOVL	$0, CX
+	CPUID
+	MOVL	BX, runtime·cpuid_ebx7(SB)
+no7:
 	// Detect AVX and AVX2 as per 14.7.1  Detection of AVX2 chapter of [1]
 	// [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
 	// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+	MOVL	runtime·cpuid_ecx(SB), CX
 	ANDL    $0x18000000, CX // check for OSXSAVE and AVX bits
 	CMPL    CX, $0x18000000
 	JNE     noavx
@@ -61,12 +72,8 @@ notintel:
 	CMPL    AX, $6 // Check for OS support of YMM registers
 	JNE     noavx
 	MOVB    $1, runtime·support_avx(SB)
-	MOVL    $7, AX
-	MOVL    $0, CX
-	CPUID
-	ANDL    $0x20, BX // check for AVX2 bit
-	CMPL    BX, $0x20
-	JNE     noavx2
+	TESTL   $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
+	JEQ     noavx2
 	MOVB    $1, runtime·support_avx2(SB)
 	JMP     nocpuinfo
 noavx:
diff --git a/src/runtime/cgo.go b/src/runtime/cgo.go
index 8ba31cd481..35d7a07e15 100644
--- a/src/runtime/cgo.go
+++ b/src/runtime/cgo.go
@@ -16,6 +16,7 @@ import "unsafe"
 //go:linkname _cgo_thread_start _cgo_thread_start
 //go:linkname _cgo_sys_thread_create _cgo_sys_thread_create
 //go:linkname _cgo_notify_runtime_init_done _cgo_notify_runtime_init_done
+//go:linkname _cgo_callers _cgo_callers
 
 var (
 	_cgo_init                     unsafe.Pointer
@@ -24,6 +25,7 @@ var (
 	_cgo_thread_start             unsafe.Pointer
 	_cgo_sys_thread_create        unsafe.Pointer
 	_cgo_notify_runtime_init_done unsafe.Pointer
+	_cgo_callers                  unsafe.Pointer
 )
 
 // iscgo is set to true by the runtime/cgo package
diff --git a/src/runtime/cgo/callbacks_traceback.go b/src/runtime/cgo/callbacks_traceback.go
new file mode 100644
index 0000000000..f754846722
--- /dev/null
+++ b/src/runtime/cgo/callbacks_traceback.go
@@ -0,0 +1,17 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+package cgo
+
+import _ "unsafe" // for go:linkname
+
+// Calls the traceback function passed to SetCgoTraceback.
+
+//go:cgo_import_static x_cgo_callers
+//go:linkname x_cgo_callers x_cgo_callers
+//go:linkname _cgo_callers _cgo_callers
+var x_cgo_callers byte
+var _cgo_callers = &x_cgo_callers
diff --git a/src/runtime/cgo/gcc_libinit_windows.c b/src/runtime/cgo/gcc_libinit_windows.c
index eb798ce5e8..50887b844d 100644
--- a/src/runtime/cgo/gcc_libinit_windows.c
+++ b/src/runtime/cgo/gcc_libinit_windows.c
@@ -2,21 +2,89 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// +build cgo
+#define WIN64_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+
 #include <stdio.h>
 #include <stdlib.h>
 
+static volatile long runtime_init_once_gate = 0;
+static volatile long runtime_init_once_done = 0;
+
+static CRITICAL_SECTION runtime_init_cs;
+
+static HANDLE runtime_init_wait;
+static int runtime_init_done;
+
+// Pre-initialize the runtime synchronization objects
+void
+_cgo_preinit_init() {
+	 runtime_init_wait = CreateEvent(NULL, TRUE, FALSE, NULL);
+	 if (runtime_init_wait == NULL) {
+		fprintf(stderr, "runtime: failed to create runtime initialization wait event.\n");
+		abort();
+	 }
+
+	 InitializeCriticalSection(&runtime_init_cs);
+}
+
+// Make sure that the preinit sequence has run.
+void
+_cgo_maybe_run_preinit() {
+	 if (!InterlockedExchangeAdd(&runtime_init_once_done, 0)) {
+			if (InterlockedIncrement(&runtime_init_once_gate) == 1) {
+				 _cgo_preinit_init();
+				 InterlockedIncrement(&runtime_init_once_done);
+			} else {
+				 // Decrement to avoid overflow.
+				 InterlockedDecrement(&runtime_init_once_gate);
+				 while(!InterlockedExchangeAdd(&runtime_init_once_done, 0)) {
+						Sleep(0);
+				 }
+			}
+	 }
+}
+
 void
-x_cgo_sys_thread_create(void* (*func)(void*), void* arg) {
-	fprintf(stderr, "x_cgo_sys_thread_create not implemented");
-	abort();
+x_cgo_sys_thread_create(void (*func)(void*), void* arg) {
+	uintptr_t thandle;
+
+	thandle = _beginthread(func, 0, arg);
+	if(thandle == -1) {
+		fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
+		abort();
+	}
+}
+
+int
+_cgo_is_runtime_initialized() {
+	 EnterCriticalSection(&runtime_init_cs);
+	 int status = runtime_init_done;
+	 LeaveCriticalSection(&runtime_init_cs);
+	 return status;
 }
 
 void
 _cgo_wait_runtime_init_done() {
-	// TODO(spetrovic): implement this method.
+	 _cgo_maybe_run_preinit();
+	while (!_cgo_is_runtime_initialized()) {
+			WaitForSingleObject(runtime_init_wait, INFINITE);
+	}
 }
 
 void
 x_cgo_notify_runtime_init_done(void* dummy) {
-	// TODO(spetrovic): implement this method.
-}
-\ No newline at end of file
+	 _cgo_maybe_run_preinit();
+
+	 EnterCriticalSection(&runtime_init_cs);
+	runtime_init_done = 1;
+	 LeaveCriticalSection(&runtime_init_cs);
+
+	 if (!SetEvent(runtime_init_wait)) {
+		fprintf(stderr, "runtime: failed to signal runtime initialization complete.\n");
+		abort();
+	}
+}
+
diff --git a/src/runtime/cgo/gcc_traceback.c b/src/runtime/cgo/gcc_traceback.c
new file mode 100644
index 0000000000..4fdfbe4d9f
--- /dev/null
+++ b/src/runtime/cgo/gcc_traceback.c
@@ -0,0 +1,29 @@
+// Copyright 2016 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build cgo
+// +build linux
+
+#include <stdint.h>
+
+struct cgoTracebackArg {
+	uintptr_t  Context;
+	uintptr_t* Buf;
+	uintptr_t  Max;
+};
+
+// Call the user's traceback function and then call sigtramp.
+// The runtime signal handler will jump to this code.
+// We do it this way so that the user's traceback function will be called
+// by a C function with proper unwind info.
+void
+x_cgo_callers(uintptr_t sig, void *info, void *context, void (*cgoTraceback)(struct cgoTracebackArg*), uintptr_t* cgoCallers, void (*sigtramp)(uintptr_t, void*, void*)) {
+	struct cgoTracebackArg arg;
+
+	arg.Context = 0;
+	arg.Buf = cgoCallers;
+	arg.Max = 32; // must match len(runtime.cgoCallers)
+	(*cgoTraceback)(&arg);
+	sigtramp(sig, info, context);
+}
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index 7a683d7524..d5248803a4 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -84,6 +84,10 @@ import (
 	"unsafe"
 )
 
+// Addresses collected in a cgo backtrace when crashing.
+// Length must match arg.Max in x_cgo_callers in runtime/cgo/gcc_traceback.c.
+type cgoCallers [32]uintptr
+
 // Call from Go to C.
 //go:nosplit
 func cgocall(fn, arg unsafe.Pointer) int32 {
@@ -109,6 +113,9 @@ func cgocall(fn, arg unsafe.Pointer) int32 {
 	mp.ncgo++
 	defer endcgo(mp)
 
+	// Reset traceback.
+	mp.cgoCallers[0] = 0
+
 	/*
 	 * Announce we are entering a system call
 	 * so that the scheduler knows to create another
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
index 911821bea5..219f2b449b 100644
--- a/src/runtime/chan_test.go
+++ b/src/runtime/chan_test.go
@@ -777,7 +777,7 @@ func BenchmarkChanContended(b *testing.B) {
 	})
 }
 
-func BenchmarkChanSync(b *testing.B) {
+func benchmarkChanSync(b *testing.B, work int) {
 	const CallsPerSched = 1000
 	procs := 2
 	N := int32(b.N / CallsPerSched / procs * procs)
@@ -793,10 +793,14 @@ func BenchmarkChanSync(b *testing.B) {
 				for g := 0; g < CallsPerSched; g++ {
 					if i%2 == 0 {
 						<-myc
+						localWork(work)
 						myc <- 0
+						localWork(work)
 					} else {
 						myc <- 0
+						localWork(work)
 						<-myc
+						localWork(work)
 					}
 				}
 			}
@@ -808,6 +812,14 @@ func BenchmarkChanSync(b *testing.B) {
 	}
 }
 
+func BenchmarkChanSync(b *testing.B) {
+	benchmarkChanSync(b, 0)
+}
+
+func BenchmarkChanSyncWork(b *testing.B) {
+	benchmarkChanSync(b, 1000)
+}
+
 func benchmarkChanProdCons(b *testing.B, chanSize, localWork int) {
 	const CallsPerSched = 1000
 	procs := runtime.GOMAXPROCS(-1)
@@ -981,3 +993,18 @@ func BenchmarkChanPopular(b *testing.B) {
 	}
 	wg.Wait()
 }
+
+var (
+	alwaysFalse = false
+	workSink    = 0
+)
+
+func localWork(w int) {
+	foo := 0
+	for i := 0; i < w; i++ {
+		foo /= (foo + 1)
+	}
+	if alwaysFalse {
+		workSink += foo
+	}
+}
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
index 2f7591a8d3..6547996b43 100644
--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -209,3 +209,15 @@ func TestCgoCCodeSIGPROF(t *testing.T) {
 		t.Errorf("expected %q got %v", want, got)
 	}
 }
+
+func TestCgoCrashTraceback(t *testing.T) {
+	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+	got := runTestProg(t, "testprogcgo", "CrashTraceback")
+	for i := 1; i <= 3; i++ {
+		if !strings.Contains(got, fmt.Sprintf("cgo symbolizer:%d", i)) {
+			t.Errorf("missing cgo symbolizer:%d", i)
+		}
+	}
+}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 3994d5caf8..fd33c9c3c8 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -196,3 +196,19 @@ func SetTracebackEnv(level string) {
 
 var ReadUnaligned32 = readUnaligned32
 var ReadUnaligned64 = readUnaligned64
+
+func CountPagesInUse() (pagesInUse, counted uintptr) {
+	stopTheWorld("CountPagesInUse")
+
+	pagesInUse = uintptr(mheap_.pagesInUse)
+
+	for _, s := range h_allspans {
+		if s.state == mSpanInUse {
+			counted += s.npages
+		}
+	}
+
+	startTheWorld()
+
+	return
+}
diff --git a/src/runtime/export_windows_test.go b/src/runtime/export_windows_test.go
index 9e744fc35f..66c103709c 100644
--- a/src/runtime/export_windows_test.go
+++ b/src/runtime/export_windows_test.go
@@ -9,9 +9,14 @@ package runtime
 import "unsafe"
 
 var TestingWER = &testingWER
+var OsYield = osyield
 
 func NumberOfProcessors() int32 {
 	var info systeminfo
 	stdcall1(_GetSystemInfo, uintptr(unsafe.Pointer(&info)))
 	return int32(info.dwnumberofprocessors)
 }
+
+func LoadLibraryExStatus() (useEx, haveEx, haveFlags bool) {
+	return useLoadLibraryEx, _LoadLibraryExW != nil, _AddDllDirectory != nil
+}
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index c8c96bb4ee..d53d3ee000 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -473,3 +473,20 @@ func testIfaceEqual(x interface{}) {
 		a = true
 	}
 }
+
+func TestPageAccounting(t *testing.T) {
+	// Grow the heap in small increments. This used to drop the
+	// pages-in-use count below zero because of a rounding
+	// mismatch (golang.org/issue/15022).
+	const blockSize = 64 << 10
+	blocks := make([]*[blockSize]byte, (64<<20)/blockSize)
+	for i := range blocks {
+		blocks[i] = new([blockSize]byte)
+	}
+
+	// Check that the running page count matches reality.
+	pagesInUse, counted := runtime.CountPagesInUse()
+	if pagesInUse != counted {
+		t.Fatalf("mheap_.pagesInUse is %d, but direct count is %d", pagesInUse, counted)
+	}
+}
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index ced87ea816..3ce1e237d3 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -19,25 +19,28 @@ var (
 	hash      [hashSize]*itab
 )
 
+func itabhash(inter *interfacetype, typ *_type) uint32 {
+	// compiler has provided some good hash codes for us.
+	h := inter.typ.hash
+	h += 17 * typ.hash
+	// TODO(rsc): h += 23 * x.mhash ?
+	return h % hashSize
+}
+
 func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
 	if len(inter.mhdr) == 0 {
 		throw("internal error - misuse of itab")
 	}
 
 	// easy case
-	x := typ.uncommon()
-	if x == nil {
+	if typ.tflag&tflagUncommon == 0 {
 		if canfail {
 			return nil
 		}
 		panic(&TypeAssertionError{"", typ._string, inter.typ._string, inter.mhdr[0].name.name()})
 	}
 
-	// compiler has provided some good hash codes for us.
-	h := inter.typ.hash
-	h += 17 * typ.hash
-	// TODO(rsc): h += 23 * x.mhash ?
-	h %= hashSize
+	h := itabhash(inter, typ)
 
 	// look twice - once without lock, once with.
 	// common case will be no lock contention.
@@ -56,10 +59,9 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
 						// was already done once using the , ok form
 						// and we have a cached negative result.
 						// the cached result doesn't record which
-						// interface function was missing, so jump
-						// down to the interface check, which will
-						// do more work but give a better error.
-						goto search
+						// interface function was missing, so try
+						// adding the itab again, which will throw an error.
+						additab(m, locked != 0, false)
 					}
 				}
 				if locked != 0 {
@@ -73,8 +75,19 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
 	m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys))
 	m.inter = inter
 	m._type = typ
+	additab(m, true, canfail)
+	unlock(&ifaceLock)
+	if m.bad != 0 {
+		return nil
+	}
+	return m
+}
+
+func additab(m *itab, locked, canfail bool) {
+	inter := m.inter
+	typ := m._type
+	x := typ.uncommon()
 
-search:
 	// both inter and typ have method sorted by name,
 	// and interface names are unique,
 	// so can iterate over both in lock step;
@@ -107,7 +120,7 @@ search:
 		}
 		// didn't find method
 		if !canfail {
-			if locked != 0 {
+			if locked {
 				unlock(&ifaceLock)
 			}
 			panic(&TypeAssertionError{"", typ._string, inter.typ._string, iname})
@@ -116,22 +129,22 @@ search:
 		break
 	nextimethod:
 	}
-	if locked == 0 {
+	if !locked {
 		throw("invalid itab locking")
 	}
+	h := itabhash(inter, typ)
 	m.link = hash[h]
 	atomicstorep(unsafe.Pointer(&hash[h]), unsafe.Pointer(m))
-	unlock(&ifaceLock)
-	if m.bad != 0 {
-		return nil
-	}
-	return m
 }
 
-func typ2Itab(t *_type, inter *interfacetype, cache **itab) *itab {
-	tab := getitab(inter, t, false)
-	atomicstorep(unsafe.Pointer(cache), unsafe.Pointer(tab))
-	return tab
+func itabsinit() {
+	lock(&ifaceLock)
+	for m := &firstmoduledata; m != nil; m = m.next {
+		for _, i := range m.itablinks {
+			additab(i, true, false)
+		}
+	}
+	unlock(&ifaceLock)
 }
 
 func convT2E(t *_type, elem unsafe.Pointer, x unsafe.Pointer) (e eface) {
@@ -157,18 +170,14 @@ func convT2E(t *_type, elem unsafe.Pointer, x unsafe.Pointer) (e eface) {
 	return
 }
 
-func convT2I(t *_type, inter *interfacetype, cache **itab, elem unsafe.Pointer, x unsafe.Pointer) (i iface) {
+func convT2I(tab *itab, elem unsafe.Pointer, x unsafe.Pointer) (i iface) {
+	t := tab._type
 	if raceenabled {
 		raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2I))
 	}
 	if msanenabled {
 		msanread(elem, t.size)
 	}
-	tab := (*itab)(atomic.Loadp(unsafe.Pointer(cache)))
-	if tab == nil {
-		tab = getitab(inter, t, false)
-		atomicstorep(unsafe.Pointer(cache), unsafe.Pointer(tab))
-	}
 	if isDirectIface(t) {
 		i.tab = tab
 		typedmemmove(t, unsafe.Pointer(&i.data), elem)
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index 523d890a07..f03bf18ebc 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -159,31 +159,6 @@ func writebarrierptr_nostore(dst *uintptr, src uintptr) {
 	writebarrierptr_nostore1(dst, src)
 }
 
-//go:nosplit
-func writebarrierstring(dst *[2]uintptr, src [2]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-}
-
-//go:nosplit
-func writebarrierslice(dst *[3]uintptr, src [3]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	dst[2] = src[2]
-}
-
-//go:nosplit
-func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-}
-
-//go:generate go run wbfat_gen.go -- wbfat.go
-//
-// The above line generates multiword write barriers for
-// all the combinations of ptr+scalar up to four words.
-// The implementations are written to wbfat.go.
-
 // typedmemmove copies a value of type t to dst from src.
 //go:nosplit
 func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
diff --git a/src/runtime/memmove_386.s b/src/runtime/memmove_386.s
index d4baf2280a..52b35a6ac7 100644
--- a/src/runtime/memmove_386.s
+++ b/src/runtime/memmove_386.s
@@ -70,24 +70,29 @@ nosse2:
  * forward copy loop
  */
 forward:
+	// If REP MOVSB isn't fast, don't use it
+	TESTL	$(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
+	JEQ	fwdBy4
+
 	// Check alignment
 	MOVL	SI, AX
 	ORL	DI, AX
 	TESTL	$3, AX
-	JNE	unaligned_fwd
+	JEQ	fwdBy4
+
+	// Do 1 byte at a time
+	MOVL	BX, CX
+	REP;	MOVSB
+	RET
 
+fwdBy4:
+	// Do 4 bytes at a time
 	MOVL	BX, CX
 	SHRL	$2, CX
 	ANDL	$3, BX
-
 	REP;	MOVSL
 	JMP	tail
 
-unaligned_fwd:
-	MOVL	BX, CX
-	REP;	MOVSB
-	RET
-
 /*
  * check overlap
  */
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s
index 514eb169f1..39b4c3a2bb 100644
--- a/src/runtime/memmove_amd64.s
+++ b/src/runtime/memmove_amd64.s
@@ -77,25 +77,29 @@ forward:
 	CMPQ	BX, $2048
 	JLS	move_256through2048
 
+	// If REP MOVSB isn't fast, don't use it
+	TESTL	$(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB
+	JEQ	fwdBy8
+
 	// Check alignment
-	MOVQ	SI, AX
-	ORQ	DI, AX
+	MOVL	SI, AX
+	ORL	DI, AX
 	TESTL	$7, AX
-	JNE	unaligned_fwd
+	JEQ	fwdBy8
+
+	// Do 1 byte at a time
+	MOVQ	BX, CX
+	REP;	MOVSB
+	RET
 
-	// Aligned - do 8 bytes at a time
+fwdBy8:
+	// Do 8 bytes at a time
 	MOVQ	BX, CX
 	SHRQ	$3, CX
 	ANDQ	$7, BX
 	REP;	MOVSQ
 	JMP	tail
 
-unaligned_fwd:
-	// Unaligned - do 1 byte at a time
-	MOVQ	BX, CX
-	REP;	MOVSB
-	RET
-
 back:
 /*
  * check overlap
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index d6d91d2021..31d1a80183 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -96,7 +96,7 @@ func sweepone() uintptr {
 			mheap_.sweepdone = 1
 			_g_.m.locks--
 			if debug.gcpacertrace > 0 && idx == uint32(len(work.spans)) {
-				print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages\n")
+				print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n")
 			}
 			return ^uintptr(0)
 		}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 0f2f0637d2..895af9f07c 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -671,7 +671,7 @@ func (h *mheap) grow(npage uintptr) bool {
 	}
 	atomic.Store(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
-	h.pagesInUse += uint64(npage)
+	h.pagesInUse += uint64(s.npages)
 	h.freeSpanLocked(s, false, true, 0)
 	return true
 }
diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go
index 1c1ead8790..726dd649fe 100644
--- a/src/runtime/os1_linux.go
+++ b/src/runtime/os1_linux.go
@@ -305,6 +305,7 @@ func memlimit() uintptr {
 
 func sigreturn()
 func sigtramp()
+func cgoSigtramp()
 
 //go:nosplit
 //go:nowritebarrierrec
@@ -323,7 +324,11 @@ func setsig(i int32, fn uintptr, restart bool) {
 		sa.sa_restorer = funcPC(sigreturn)
 	}
 	if fn == funcPC(sighandler) {
-		fn = funcPC(sigtramp)
+		if iscgo {
+			fn = funcPC(cgoSigtramp)
+		} else {
+			fn = funcPC(sigtramp)
+		}
 	}
 	sa.sa_handler = fn
 	rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask))
@@ -354,7 +359,7 @@ func getsig(i int32) uintptr {
 	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
 		throw("rt_sigaction read failure")
 	}
-	if sa.sa_handler == funcPC(sigtramp) {
+	if sa.sa_handler == funcPC(sigtramp) || sa.sa_handler == funcPC(cgoSigtramp) {
 		return funcPC(sighandler)
 	}
 	return sa.sa_handler
diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index 8d46bca36d..315dd9816a 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -41,6 +41,7 @@ import (
 //go:cgo_import_dynamic runtime._SetUnhandledExceptionFilter SetUnhandledExceptionFilter%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer%6 "kernel32.dll"
 //go:cgo_import_dynamic runtime._SuspendThread SuspendThread%1 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SwitchToThread SwitchToThread%0 "kernel32.dll"
 //go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc%4 "kernel32.dll"
 //go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
 //go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult%5 "ws2_32.dll"
@@ -84,6 +85,7 @@ var (
 	_SetUnhandledExceptionFilter,
 	_SetWaitableTimer,
 	_SuspendThread,
+	_SwitchToThread,
 	_VirtualAlloc,
 	_VirtualFree,
 	_WSAGetOverlappedResult,
@@ -93,8 +95,11 @@ var (
 
 	// Following syscalls are only available on some Windows PCs.
 	// We will load syscalls, if available, before using them.
+	_AddDllDirectory,
 	_AddVectoredContinueHandler,
-	_GetQueuedCompletionStatusEx stdFunction
+	_GetQueuedCompletionStatusEx,
+	_LoadLibraryExW,
+	_ stdFunction
 )
 
 type sigset struct{}
@@ -105,21 +110,28 @@ func asmstdcall(fn unsafe.Pointer)
 
 var asmstdcallAddr unsafe.Pointer
 
+func windowsFindfunc(name []byte, lib uintptr) stdFunction {
+	f := stdcall2(_GetProcAddress, lib, uintptr(unsafe.Pointer(&name[0])))
+	return stdFunction(unsafe.Pointer(f))
+}
+
 func loadOptionalSyscalls() {
-	var buf [50]byte // large enough for longest string
-	strtoptr := func(s string) uintptr {
-		buf[copy(buf[:], s)] = 0 // nil-terminated for OS
-		return uintptr(noescape(unsafe.Pointer(&buf[0])))
-	}
-	l := stdcall1(_LoadLibraryA, strtoptr("kernel32.dll"))
-	findfunc := func(name string) stdFunction {
-		f := stdcall2(_GetProcAddress, l, strtoptr(name))
-		return stdFunction(unsafe.Pointer(f))
-	}
-	if l != 0 {
-		_AddVectoredContinueHandler = findfunc("AddVectoredContinueHandler")
-		_GetQueuedCompletionStatusEx = findfunc("GetQueuedCompletionStatusEx")
+	var (
+		kernel32dll                 = []byte("kernel32.dll\000")
+		addVectoredContinueHandler  = []byte("AddVectoredContinueHandler\000")
+		getQueuedCompletionStatusEx = []byte("GetQueuedCompletionStatusEx\000")
+		addDllDirectory             = []byte("AddDllDirectory\000")
+		loadLibraryExW              = []byte("LoadLibraryExW\000")
+	)
+
+	k32 := stdcall1(_LoadLibraryA, uintptr(unsafe.Pointer(&kernel32dll[0])))
+	if k32 == 0 {
+		throw("kernel32.dll not found")
 	}
+	_AddDllDirectory = windowsFindfunc(addDllDirectory, k32)
+	_AddVectoredContinueHandler = windowsFindfunc(addVectoredContinueHandler, k32)
+	_GetQueuedCompletionStatusEx = windowsFindfunc(getQueuedCompletionStatusEx, k32)
+	_LoadLibraryExW = windowsFindfunc(loadLibraryExW, k32)
 }
 
 //go:nosplit
@@ -128,6 +140,11 @@ func getLoadLibrary() uintptr {
 }
 
 //go:nosplit
+func getLoadLibraryEx() uintptr {
+	return uintptr(unsafe.Pointer(_LoadLibraryExW))
+}
+
+//go:nosplit
 func getGetProcAddress() uintptr {
 	return uintptr(unsafe.Pointer(_GetProcAddress))
 }
@@ -161,13 +178,33 @@ const (
 // in sys_windows_386.s and sys_windows_amd64.s
 func externalthreadhandler()
 
+// When loading DLLs, we prefer to use LoadLibraryEx with
+// LOAD_LIBRARY_SEARCH_* flags, if available. LoadLibraryEx is not
+// available on old Windows, though, and the LOAD_LIBRARY_SEARCH_*
+// flags are not available on some versions of Windows without a
+// security patch.
+//
+// https://msdn.microsoft.com/en-us/library/ms684179(v=vs.85).aspx says:
+// "Windows 7, Windows Server 2008 R2, Windows Vista, and Windows
+// Server 2008: The LOAD_LIBRARY_SEARCH_* flags are available on
+// systems that have KB2533623 installed. To determine whether the
+// flags are available, use GetProcAddress to get the address of the
+// AddDllDirectory, RemoveDllDirectory, or SetDefaultDllDirectories
+// function. If GetProcAddress succeeds, the LOAD_LIBRARY_SEARCH_*
+// flags can be used with LoadLibraryEx."
+var useLoadLibraryEx bool
+
 func osinit() {
 	asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
+	usleep2Addr = unsafe.Pointer(funcPC(usleep2))
+	switchtothreadAddr = unsafe.Pointer(funcPC(switchtothread))
 
 	setBadSignalMsg()
 
 	loadOptionalSyscalls()
 
+	useLoadLibraryEx = (_LoadLibraryExW != nil && _AddDllDirectory != nil)
+
 	disableWER()
 
 	externalthreadhandlerp = funcPC(externalthreadhandler)
@@ -368,8 +405,11 @@ func semacreate(mp *m) {
 	mp.waitsema = stdcall4(_CreateEventA, 0, 0, 0, 0)
 }
 
-// May run with m.p==nil, so write barriers are not allowed.
-//go:nowritebarrier
+// May run with m.p==nil, so write barriers are not allowed. This
+// function is called by newosproc0, so it is also required to
+// operate without stack guards.
+//go:nowritebarrierc
+//go:nosplit
 func newosproc(mp *m, stk unsafe.Pointer) {
 	const _STACK_SIZE_PARAM_IS_A_RESERVATION = 0x00010000
 	thandle := stdcall6(_CreateThread, 0, 0x20000,
@@ -381,6 +421,15 @@ func newosproc(mp *m, stk unsafe.Pointer) {
 	}
 }
 
+// Used by the C library build mode. On Linux this function would allocate a
+// stack, but that's not necessary for Windows. No stack guards are present
+// and the GC has not been initialized, so write barriers will fail.
+//go:nowritebarrierc
+//go:nosplit
+func newosproc0(mp *m, stk unsafe.Pointer) {
+	newosproc(mp, stk)
+}
+
 // Called to initialize a new m (including the bootstrap m).
 // Called on the parent thread (main thread in case of bootstrap), can allocate memory.
 func mpreinit(mp *m) {
@@ -546,17 +595,22 @@ func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
 }
 
 // in sys_windows_386.s and sys_windows_amd64.s
-func usleep1(usec uint32)
+func onosstack(fn unsafe.Pointer, arg uint32)
+func usleep2(usec uint32)
+func switchtothread()
+
+var usleep2Addr unsafe.Pointer
+var switchtothreadAddr unsafe.Pointer
 
 //go:nosplit
 func osyield() {
-	usleep1(1)
+	onosstack(switchtothreadAddr, 0)
 }
 
 //go:nosplit
 func usleep(us uint32) {
 	// Have 1us units; want 100ns units.
-	usleep1(10 * us)
+	onosstack(usleep2Addr, 10*us)
 }
 
 func ctrlhandler1(_type uint32) uint32 {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index c30ce7a5a3..5145c84aea 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -435,6 +435,7 @@ func schedinit() {
 	tracebackinit()
 	moduledataverify()
 	stackinit()
+	itabsinit()
 	mallocinit()
 	mcommoninit(_g_.m)
 
@@ -509,6 +510,11 @@ func mcommoninit(mp *m) {
 	// so we need to publish it safely.
 	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
 	unlock(&sched.lock)
+
+	// Allocate memory to hold a cgo traceback if the cgo call crashes.
+	if iscgo || GOOS == "solaris" || GOOS == "windows" {
+		mp.cgoCallers = new(cgoCallers)
+	}
 }
 
 // Mark gp ready to run.
@@ -715,9 +721,13 @@ func casgstatus(gp *g, oldval, newval uint32) {
 		throw("casgstatus")
 	}
 
+	// See http://golang.org/cl/21503 for justification of the yield delay.
+	const yieldDelay = 5 * 1000
+	var nextYield int64
+
 	// loop if gp->atomicstatus is in a scan state giving
 	// GC time to finish and change the state to oldval.
-	for !atomic.Cas(&gp.atomicstatus, oldval, newval) {
+	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
 		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
 			systemstack(func() {
 				throw("casgstatus: waiting for Gwaiting but is Grunnable")
@@ -730,6 +740,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
 		// 		gcphasework(gp)
 		// 	})
 		// }
+		// But meanwhile just yield.
+		if i == 0 {
+			nextYield = nanotime() + yieldDelay
+		}
+		if nanotime() < nextYield {
+			for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
+				procyield(1)
+			}
+		} else {
+			osyield()
+			nextYield = nanotime() + yieldDelay/2
+		}
 	}
 	if newval == _Grunning {
 		gp.gcscanvalid = false
@@ -767,12 +789,17 @@ func scang(gp *g) {
 
 	gp.gcscandone = false
 
+	// See http://golang.org/cl/21503 for justification of the yield delay.
+	const yieldDelay = 10 * 1000
+	var nextYield int64
+
 	// Endeavor to get gcscandone set to true,
 	// either by doing the stack scan ourselves or by coercing gp to scan itself.
 	// gp.gcscandone can transition from false to true when we're not looking
 	// (if we asked for preemption), so any time we lock the status using
 	// castogscanstatus we have to double-check that the scan is still not done.
-	for !gp.gcscandone {
+loop:
+	for i := 0; !gp.gcscandone; i++ {
 		switch s := readgstatus(gp); s {
 		default:
 			dumpgstatus(gp)
@@ -781,6 +808,7 @@ func scang(gp *g) {
 		case _Gdead:
 			// No stack.
 			gp.gcscandone = true
+			break loop
 
 		case _Gcopystack:
 		// Stack being switched. Go around again.
@@ -796,6 +824,7 @@ func scang(gp *g) {
 					gp.gcscandone = true
 				}
 				restartg(gp)
+				break loop
 			}
 
 		case _Gscanwaiting:
@@ -821,6 +850,16 @@ func scang(gp *g) {
 				casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
 			}
 		}
+
+		if i == 0 {
+			nextYield = nanotime() + yieldDelay
+		}
+		if nanotime() < nextYield {
+			procyield(10)
+		} else {
+			osyield()
+			nextYield = nanotime() + yieldDelay/2
+		}
 	}
 
 	gp.preemptscan = false // cancel scan request if no longer needed
@@ -3993,7 +4032,16 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool
 					// Instead of stealing runnext in this window, back off
 					// to give _p_ a chance to schedule runnext. This will avoid
 					// thrashing gs between different Ps.
-					usleep(100)
+					// A sync chan send/recv takes ~50ns as of time of writing,
+					// so 3us gives ~50x overshoot.
+					if GOOS != "windows" {
+						usleep(3)
+					} else {
+						// On windows system timer granularity is 1-15ms,
+						// which is way too much for this optimization.
+						// So just yield.
+						osyield()
+					}
 					if !_p_.runnext.cas(next, 0) {
 						continue
 					}
diff --git a/src/runtime/rt0_windows_386.s b/src/runtime/rt0_windows_386.s
index 0150cc2918..b9407a9879 100644
--- a/src/runtime/rt0_windows_386.s
+++ b/src/runtime/rt0_windows_386.s
@@ -12,5 +12,39 @@ TEXT _rt0_386_windows(SB),NOSPLIT,$12
 	MOVL	$-1, 0(SP) // return PC for main
 	JMP	_main(SB)
 
+// When building with -buildmode=(c-shared or c-archive), this
+// symbol is called. For dynamic libraries it is called when the
+// library is loaded. For static libraries it is called when the
+// final executable starts, during the C runtime initialization
+// phase.
+TEXT _rt0_386_windows_lib(SB),NOSPLIT,$0x1C
+	MOVL	BP, 0x08(SP)
+	MOVL	BX, 0x0C(SP)
+	MOVL	AX, 0x10(SP)
+	MOVL  CX, 0x14(SP)
+	MOVL  DX, 0x18(SP)
+
+	// Create a new thread to do the runtime initialization and return.
+	MOVL	_cgo_sys_thread_create(SB), AX
+	MOVL	$_rt0_386_windows_lib_go(SB), 0x00(SP)
+	MOVL	$0, 0x04(SP)
+
+	 // Top two items on the stack are passed to _cgo_sys_thread_create
+	 // as parameters. This is the calling convention on 32-bit Windows.
+	CALL	AX
+
+	MOVL	0x08(SP), BP
+	MOVL	0x0C(SP), BX
+	MOVL	0x10(SP), AX
+	MOVL	0x14(SP), CX
+	MOVL	0x18(SP), DX
+	RET
+
+TEXT _rt0_386_windows_lib_go(SB),NOSPLIT,$0
+	MOVL  $0, DI
+	MOVL	$0, SI
+	MOVL	$runtime·rt0_go(SB), AX
+	JMP	AX
+
 TEXT _main(SB),NOSPLIT,$0
 	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s
index 95dce06d71..2f73b37f31 100644
--- a/src/runtime/rt0_windows_amd64.s
+++ b/src/runtime/rt0_windows_amd64.s
@@ -12,6 +12,37 @@ TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8
 	MOVQ	$main(SB), AX
 	JMP	AX
 
+// When building with -buildmode=(c-shared or c-archive), this
+// symbol is called. For dynamic libraries it is called when the
+// library is loaded. For static libraries it is called when the
+// final executable starts, during the C runtime initialization
+// phase.
+TEXT _rt0_amd64_windows_lib(SB),NOSPLIT,$0x28
+	MOVQ	BP, 0x00(SP)
+	MOVQ	BX, 0x08(SP)
+	MOVQ	AX, 0x10(SP)
+	MOVQ  CX, 0x18(SP)
+	MOVQ  DX, 0x20(SP)
+
+	// Create a new thread to do the runtime initialization and return.
+	MOVQ	_cgo_sys_thread_create(SB), AX
+	MOVQ	$_rt0_amd64_windows_lib_go(SB), CX
+	MOVQ	$0, DX
+	CALL	AX
+
+	MOVQ	0x00(SP), BP
+	MOVQ	0x08(SP), BX
+	MOVQ	0x10(SP), AX
+	MOVQ	0x18(SP), CX
+	MOVQ	0x20(SP), DX
+	RET
+
+TEXT _rt0_amd64_windows_lib_go(SB),NOSPLIT,$0
+	MOVQ  $0, DI
+	MOVQ	$0, SI
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
+
 TEXT main(SB),NOSPLIT,$-8
 	MOVQ	$runtime·rt0_go(SB), AX
 	JMP	AX
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index a54dc552c1..e0137f7e97 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -373,8 +373,10 @@ type m struct {
 	newSigstack   bool // minit on C thread called sigaltstack
 	printlock     int8
 	fastrand      uint32
-	ncgocall      uint64 // number of cgo calls in total
-	ncgo          int32  // number of cgo calls currently in progress
+	ncgocall      uint64      // number of cgo calls in total
+	ncgo          int32       // number of cgo calls currently in progress
+	cgoCallersUse uint32      // if non-zero, cgoCallers in use temporarily
+	cgoCallers    *cgoCallers // cgo traceback if crashing in cgo call
 	park          note
 	alllink       *m // on allm
 	schedlink     muintptr
@@ -577,6 +579,8 @@ type _func struct {
 
 // layout of Itab known to compilers
 // allocated in non-garbage-collected memory
+// Needs to be in sync with
+// ../cmd/compile/internal/gc/reflect.go:/^func.dumptypestructs.
 type itab struct {
 	inter  *interfacetype
 	_type  *_type
@@ -699,6 +703,7 @@ var (
 	// Set on startup in asm_{x86,amd64}.s.
 	cpuid_ecx         uint32
 	cpuid_edx         uint32
+	cpuid_ebx7        uint32
 	lfenceBeforeRdtsc bool
 	support_avx       bool
 	support_avx2      bool
diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go
index 2cab5b8239..a86f7bf6b4 100644
--- a/src/runtime/signal_solaris.go
+++ b/src/runtime/signal_solaris.go
@@ -32,7 +32,7 @@ var sigtable = [...]sigTabT{
 	/* 19 */ {_SigNotify, "SIGPWR: power-fail restart"},
 	/* 20 */ {_SigNotify, "SIGWINCH: window size change"},
 	/* 21 */ {_SigNotify, "SIGURG: urgent socket condition"},
-	/* 22 */ {_SigNotify, "SIGPOLL: pollable event occured"},
+	/* 22 */ {_SigNotify, "SIGPOLL: pollable event occurred"},
 	/* 23 */ {_SigNotify + _SigDefault, "SIGSTOP: stop (cannot be caught or ignored)"},
 	/* 24 */ {_SigNotify + _SigDefault, "SIGTSTP: user stop requested from tty"},
 	/* 25 */ {_SigNotify + _SigDefault, "SIGCONT: stopped process has been continued"},
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 3e49b9431e..2d20e0a9c3 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -236,6 +236,9 @@ func intstring(buf *[4]byte, v int64) string {
 	} else {
 		s, b = rawstring(4)
 	}
+	if int64(rune(v)) != v {
+		v = runeerror
+	}
 	n := runetochar(b, rune(v))
 	return s[:n]
 }
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index cd328eb899..158bdcea0d 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -129,6 +129,7 @@ type moduledata struct {
 	end, gcdata, gcbss    uintptr
 
 	typelinks []*_type
+	itablinks []*itab
 
 	modulename   string
 	modulehashes []modulehash
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index 3bf5eb0df4..4fe07e0837 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -232,6 +232,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$12
 	CALL	runtime·sigtrampgo(SB)
 	RET
 
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	JMP	runtime·sigtramp(SB)
+
 TEXT runtime·sigreturn(SB),NOSPLIT,$0
 	MOVL	$173, AX	// rt_sigreturn
 	// Sigreturn expects same SP as signal handler,
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index 7cab649238..031e412673 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -234,8 +234,65 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$24
 	CALL AX
 	RET
 
+// Used instead of sigtramp in programs that use cgo.
+// Arguments from kernel are in DI, SI, DX.
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	// If no traceback function, do usual sigtramp.
+	MOVQ	runtime·cgoTraceback(SB), AX
+	TESTQ	AX, AX
+	JZ	sigtramp
+
+	// If no traceback support function, which means that
+	// runtime/cgo was not linked in, do usual sigtramp.
+	MOVQ	_cgo_callers(SB), AX
+	TESTQ	AX, AX
+	JZ	sigtramp
+
+	// Figure out if we are currently in a cgo call.
+	// If not, just do usual sigtramp.
+	get_tls(CX)
+	MOVQ	g(CX),AX
+	TESTQ	AX, AX
+	JZ	sigtramp        // g == nil
+	MOVQ	g_m(AX), AX
+	TESTQ	AX, AX
+	JZ	sigtramp        // g.m == nil
+	MOVL	m_ncgo(AX), CX
+	TESTL	CX, CX
+	JZ	sigtramp        // g.m.ncgo == 0
+	MOVQ	m_curg(AX), CX
+	TESTQ	CX, CX
+	JZ	sigtramp        // g.m.curg == nil
+	MOVQ	g_syscallsp(CX), CX
+	TESTQ	CX, CX
+	JZ	sigtramp        // g.m.curg.syscallsp == 0
+	MOVQ	m_cgoCallers(AX), R8
+	TESTQ	R8, R8
+	JZ	sigtramp        // g.m.cgoCallers == nil
+	MOVL	m_cgoCallersUse(AX), CX
+	TESTL	CX, CX
+	JNZ	sigtramp	// g.m.cgoCallersUse != 0
+
+	// Jump to a function in runtime/cgo.
+	// That function, written in C, will call the user's traceback
+	// function with proper unwind info, and will then call back here.
+	// The first three arguments are already in registers.
+	// Set the last three arguments now.
+	MOVQ	runtime·cgoTraceback(SB), CX
+	MOVQ	$runtime·sigtramp(SB), R9
+	MOVQ	_cgo_callers(SB), AX
+	JMP	AX
+
+sigtramp:
+	JMP	runtime·sigtramp(SB)
+
+// For cgo unwinding to work, this function must look precisely like
+// the one in glibc.  The glibc source code is:
+// https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/x86_64/sigaction.c
+// The code that cares about the precise instructions used is:
+// https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup
 TEXT runtime·sigreturn(SB),NOSPLIT,$0
-	MOVL	$15, AX	// rt_sigreturn
+	MOVQ	$15, AX	// rt_sigreturn
 	SYSCALL
 	INT $3	// not reached
 
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index 50a551320a..5e5fcf0e6f 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -361,6 +361,10 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$12
 	BL	(R11)
 	RET
 
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	MOVW  	$runtime·sigtramp(SB), R11
+	B	(R11)
+
 TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
 	MOVW	sig+0(FP), R0
 	MOVW	new+4(FP), R1
diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s
index 94c101a3d4..1bee8477ed 100644
--- a/src/runtime/sys_linux_arm64.s
+++ b/src/runtime/sys_linux_arm64.s
@@ -259,6 +259,10 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$24
 	BL	(R0)
 	RET
 
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	MOVD	$runtime·sigtramp(SB), R3
+	B	(R3)
+
 TEXT runtime·mmap(SB),NOSPLIT,$-8
 	MOVD	addr+0(FP), R0
 	MOVD	n+8(FP), R1
diff --git a/src/runtime/sys_linux_mips64x.s b/src/runtime/sys_linux_mips64x.s
index 26437ddde4..f6877cb32d 100644
--- a/src/runtime/sys_linux_mips64x.s
+++ b/src/runtime/sys_linux_mips64x.s
@@ -249,6 +249,10 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
 	JAL	(R1)
 	RET
 
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+	MOVV	$runtime·sigtramp(SB), R1
+	JMP	(R1)
+
 TEXT runtime·mmap(SB),NOSPLIT,$-8
 	MOVV	addr+0(FP), R4
 	MOVV	n+8(FP), R5
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index d063e025a6..56b842ac01 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -243,6 +243,21 @@ TEXT runtime·_sigtramp(SB),NOSPLIT,$64
 	MOVD	24(R1), R2
 	RET
 
+#ifdef GOARCH_ppc64le
+// ppc64le doesn't need function descriptors
+TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0
+#else
+// function descriptor for the real sigtramp
+TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0
+	DWORD	$runtime·_cgoSigtramp(SB)
+	DWORD	$0
+	DWORD	$0
+TEXT runtime·_cgoSigtramp(SB),NOSPLIT,$0
+#endif
+	MOVD	$runtime·sigtramp(SB), R12
+	MOVD	R12, CTR
+	JMP	(CTR)
+
 TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index 55cdcf407f..95130b733d 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -358,10 +358,11 @@ TEXT runtime·setldt(SB),NOSPLIT,$0
 	MOVL	CX, 0x14(FS)
 	RET
 
-// Sleep duration is in 100ns units.
-TEXT runtime·usleep1(SB),NOSPLIT,$0
-	MOVL	usec+0(FP), BX
-	MOVL	$runtime·usleep2(SB), AX // to hide from 8l
+// onosstack calls fn on OS stack.
+// func onosstack(fn unsafe.Pointer, arg uint32)
+TEXT runtime·onosstack(SB),NOSPLIT,$0
+	MOVL	fn+0(FP), AX		// to hide from 8l
+	MOVL	arg+4(FP), BX
 
 	// Execute call on m->g0 stack, in case we are not actually
 	// calling a system call wrapper, like when running under WINE.
@@ -423,6 +424,14 @@ TEXT runtime·usleep2(SB),NOSPLIT,$20
 	MOVL	BP, SP
 	RET
 
+// Runs on OS stack.
+TEXT runtime·switchtothread(SB),NOSPLIT,$0
+	MOVL	SP, BP
+	MOVL	runtime·_SwitchToThread(SB), AX
+	CALL	AX
+	MOVL	BP, SP
+	RET
+
 // func now() (sec int64, nsec int32)
 TEXT time·now(SB),NOSPLIT,$8-12
 	CALL	runtime·unixnano(SB)
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index caa18e68e9..d550a818ce 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -381,10 +381,10 @@ TEXT runtime·settls(SB),NOSPLIT,$0
 	MOVQ	DI, 0x28(GS)
 	RET
 
-// Sleep duration is in 100ns units.
-TEXT runtime·usleep1(SB),NOSPLIT,$0
-	MOVL	usec+0(FP), BX
-	MOVQ	$runtime·usleep2(SB), AX // to hide from 6l
+// func onosstack(fn unsafe.Pointer, arg uint32)
+TEXT runtime·onosstack(SB),NOSPLIT,$0
+	MOVQ	fn+0(FP), AX		// to hide from 6l
+	MOVL	arg+8(FP), BX
 
 	// Execute call on m->g0 stack, in case we are not actually
 	// calling a system call wrapper, like when running under WINE.
@@ -445,6 +445,18 @@ TEXT runtime·usleep2(SB),NOSPLIT,$48
 	MOVQ	40(SP), SP
 	RET
 
+// Runs on OS stack.
+TEXT runtime·switchtothread(SB),NOSPLIT,$0
+	MOVQ	SP, AX
+	ANDQ	$~15, SP	// alignment as per Windows requirement
+	SUBQ	$(48), SP	// room for SP and 4 args as per Windows requirement
+				// plus one extra word to keep stack 16 bytes aligned
+	MOVQ	AX, 32(SP)
+	MOVQ	runtime·_SwitchToThread(SB), AX
+	CALL	AX
+	MOVQ	32(SP), SP
+	RET
+
 // func now() (sec int64, nsec int32)
 TEXT time·now(SB),NOSPLIT,$8-12
 	CALL	runtime·unixnano(SB)
diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go
index ebfa32ff8c..cd23b8da6b 100644
--- a/src/runtime/syscall_windows.go
+++ b/src/runtime/syscall_windows.go
@@ -88,6 +88,41 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) {
 	return callbackasmAddr(n)
 }
 
+const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
+
+//go:linkname syscall_loadsystemlibrary syscall.loadsystemlibrary
+//go:nosplit
+func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) {
+	c := &getg().m.syscall
+
+	if useLoadLibraryEx {
+		c.fn = getLoadLibraryEx()
+		c.n = 3
+		args := struct {
+			lpFileName *uint16
+			hFile      uintptr // always 0
+			flags      uint32
+		}{filename, 0, _LOAD_LIBRARY_SEARCH_SYSTEM32}
+		c.args = uintptr(noescape(unsafe.Pointer(&args)))
+	} else {
+		// User is on Windows XP or something ancient.
+		// The caller wanted to only load the filename DLL
+		// from the System32 directory but that facility
+		// doesn't exist, so just load it the normal way. This
+		// is a potential security risk, but so is Windows XP.
+		c.fn = getLoadLibrary()
+		c.n = 1
+		c.args = uintptr(noescape(unsafe.Pointer(&filename)))
+	}
+
+	cgocall(asmstdcallAddr, unsafe.Pointer(c))
+	handle = c.r1
+	if handle == 0 {
+		err = c.err
+	}
+	return
+}
+
 //go:linkname syscall_loadlibrary syscall.loadlibrary
 //go:nosplit
 func syscall_loadlibrary(filename *uint16) (handle, err uintptr) {
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
index f3434d2026..ff045338c1 100644
--- a/src/runtime/syscall_windows_test.go
+++ b/src/runtime/syscall_windows_test.go
@@ -7,6 +7,8 @@ package runtime_test
 import (
 	"bytes"
 	"fmt"
+	"internal/syscall/windows/sysdll"
+	"internal/testenv"
 	"io/ioutil"
 	"os"
 	"os/exec"
@@ -771,3 +773,246 @@ func TestNumCPU(t *testing.T) {
 		t.Fatalf("SetProcessAffinityMask didn't set newmask of 0x%x. Current mask is 0x%x.", newmask, mask)
 	}
 }
+
+// See Issue 14959
+func TestDLLPreloadMitigation(t *testing.T) {
+	if _, err := exec.LookPath("gcc"); err != nil {
+		t.Skip("skipping test: gcc is missing")
+	}
+
+	tmpdir, err := ioutil.TempDir("", "TestDLLPreloadMitigation")
+	if err != nil {
+		t.Fatal("TempDir failed: ", err)
+	}
+	defer func() {
+		err := os.RemoveAll(tmpdir)
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	dir0, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Chdir(dir0)
+
+	const src = `
+#include <stdint.h>
+#include <windows.h>
+
+uintptr_t cfunc() {
+   SetLastError(123);
+}
+`
+	srcname := "nojack.c"
+	err = ioutil.WriteFile(filepath.Join(tmpdir, srcname), []byte(src), 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	name := "nojack.dll"
+	cmd := exec.Command("gcc", "-shared", "-s", "-Werror", "-o", name, srcname)
+	cmd.Dir = tmpdir
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("failed to build dll: %v - %v", err, string(out))
+	}
+	dllpath := filepath.Join(tmpdir, name)
+
+	dll := syscall.MustLoadDLL(dllpath)
+	dll.MustFindProc("cfunc")
+	dll.Release()
+
+	// Get into the directory with the DLL we'll load by base name
+	// ("nojack.dll") Think of this as the user double-clicking an
+	// installer from their Downloads directory where a browser
+	// silently downloaded some malicious DLLs.
+	os.Chdir(tmpdir)
+
+	// First before we can load a DLL from the current directory,
+	// loading it only as "nojack.dll", without an absolute path.
+	delete(sysdll.IsSystemDLL, name) // in case test was run repeatedly
+	dll, err = syscall.LoadDLL(name)
+	if err != nil {
+		t.Fatalf("failed to load %s by base name before sysdll registration: %v", name, err)
+	}
+	dll.Release()
+
+	// And now verify that if we register it as a system32-only
+	// DLL, the implicit loading from the current directory no
+	// longer works.
+	sysdll.IsSystemDLL[name] = true
+	dll, err = syscall.LoadDLL(name)
+	if err == nil {
+		dll.Release()
+		if wantLoadLibraryEx() {
+			t.Fatalf("Bad: insecure load of DLL by base name %q before sysdll registration: %v", name, err)
+		}
+		t.Skip("insecure load of DLL, but expected")
+	}
+}
+
+// wantLoadLibraryEx reports whether we expect LoadLibraryEx to work for tests.
+func wantLoadLibraryEx() bool {
+	return testenv.Builder() == "windows-amd64-gce" || testenv.Builder() == "windows-386-gce"
+}
+
+func TestLoadLibraryEx(t *testing.T) {
+	use, have, flags := runtime.LoadLibraryExStatus()
+	if use {
+		return // success.
+	}
+	if wantLoadLibraryEx() {
+		t.Fatalf("Expected LoadLibraryEx+flags to be available. (LoadLibraryEx=%v; flags=%v)",
+			have, flags)
+	}
+	t.Skipf("LoadLibraryEx not usable, but not expected. (LoadLibraryEx=%v; flags=%v)",
+		have, flags)
+}
+
+var (
+	modwinmm    = syscall.NewLazyDLL("winmm.dll")
+	modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+	proctimeBeginPeriod = modwinmm.NewProc("timeBeginPeriod")
+	proctimeEndPeriod   = modwinmm.NewProc("timeEndPeriod")
+
+	procCreateEvent = modkernel32.NewProc("CreateEventW")
+	procSetEvent    = modkernel32.NewProc("SetEvent")
+)
+
+func timeBeginPeriod(period uint32) {
+	syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0)
+}
+
+func timeEndPeriod(period uint32) {
+	syscall.Syscall(proctimeEndPeriod.Addr(), 1, uintptr(period), 0, 0)
+}
+
+func createEvent() (syscall.Handle, error) {
+	r0, _, e0 := syscall.Syscall6(procCreateEvent.Addr(), 4, 0, 0, 0, 0, 0, 0)
+	if r0 == 0 {
+		return 0, syscall.Errno(e0)
+	}
+	return syscall.Handle(r0), nil
+}
+
+func setEvent(h syscall.Handle) error {
+	r0, _, e0 := syscall.Syscall(procSetEvent.Addr(), 1, uintptr(h), 0, 0)
+	if r0 == 0 {
+		return syscall.Errno(e0)
+	}
+	return nil
+}
+
+func benchChanToSyscallPing(b *testing.B) {
+	n := b.N
+	ch := make(chan int)
+	event, err := createEvent()
+	if err != nil {
+		b.Fatal(err)
+	}
+	go func() {
+		for i := 0; i < n; i++ {
+			syscall.WaitForSingleObject(event, syscall.INFINITE)
+			ch <- 1
+		}
+	}()
+	for i := 0; i < n; i++ {
+		err := setEvent(event)
+		if err != nil {
+			b.Fatal(err)
+		}
+		<-ch
+	}
+}
+
+func BenchmarkChanToSyscallPing1ms(b *testing.B) {
+	timeBeginPeriod(1)
+	benchChanToSyscallPing(b)
+	timeEndPeriod(1)
+}
+
+func BenchmarkChanToSyscallPing15ms(b *testing.B) {
+	benchChanToSyscallPing(b)
+}
+
+func benchSyscallToSyscallPing(b *testing.B) {
+	n := b.N
+	event1, err := createEvent()
+	if err != nil {
+		b.Fatal(err)
+	}
+	event2, err := createEvent()
+	if err != nil {
+		b.Fatal(err)
+	}
+	go func() {
+		for i := 0; i < n; i++ {
+			syscall.WaitForSingleObject(event1, syscall.INFINITE)
+			err := setEvent(event2)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	}()
+	for i := 0; i < n; i++ {
+		err := setEvent(event1)
+		if err != nil {
+			b.Fatal(err)
+		}
+		syscall.WaitForSingleObject(event2, syscall.INFINITE)
+	}
+}
+
+func BenchmarkSyscallToSyscallPing1ms(b *testing.B) {
+	timeBeginPeriod(1)
+	benchSyscallToSyscallPing(b)
+	timeEndPeriod(1)
+}
+
+func BenchmarkSyscallToSyscallPing15ms(b *testing.B) {
+	benchSyscallToSyscallPing(b)
+}
+
+func benchChanToChanPing(b *testing.B) {
+	n := b.N
+	ch1 := make(chan int)
+	ch2 := make(chan int)
+	go func() {
+		for i := 0; i < n; i++ {
+			<-ch1
+			ch2 <- 1
+		}
+	}()
+	for i := 0; i < n; i++ {
+		ch1 <- 1
+		<-ch2
+	}
+}
+
+func BenchmarkChanToChanPing1ms(b *testing.B) {
+	timeBeginPeriod(1)
+	benchChanToChanPing(b)
+	timeEndPeriod(1)
+}
+
+func BenchmarkChanToChanPing15ms(b *testing.B) {
+	benchChanToChanPing(b)
+}
+
+func benchOsYield(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		runtime.OsYield()
+	}
+}
+
+func BenchmarkOsYield1ms(b *testing.B) {
+	timeBeginPeriod(1)
+	benchOsYield(b)
+	timeEndPeriod(1)
+}
+
+func BenchmarkOsYield15ms(b *testing.B) {
+	benchOsYield(b)
+}
diff --git a/src/runtime/testdata/testprogcgo/threadpanic_windows.c b/src/runtime/testdata/testprogcgo/threadpanic_windows.c
index cf960db53a..6f896634a6 100644
--- a/src/runtime/testdata/testprogcgo/threadpanic_windows.c
+++ b/src/runtime/testdata/testprogcgo/threadpanic_windows.c
@@ -2,12 +2,13 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+#include <process.h>
 #include <stdlib.h>
 #include <stdio.h>
 
 void gopanic(void);
 
-static void*
+static unsigned int
 die(void* x)
 {
 	gopanic();
diff --git a/src/runtime/testdata/testprogcgo/traceback.go b/src/runtime/testdata/testprogcgo/traceback.go
new file mode 100644
index 0000000000..bb3e70a44f
--- /dev/null
+++ b/src/runtime/testdata/testprogcgo/traceback.go
@@ -0,0 +1,80 @@
+// Copyright 2016 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// This program will crash.
+// We want the stack trace to include the C functions.
+// We use a fake traceback, and a symbolizer that dumps a string we recognize.
+
+/*
+#cgo CFLAGS: -g -O0
+
+#include <stdint.h>
+
+char *p;
+
+static int f3() {
+	*p = 0;
+	return 0;
+}
+
+static int f2() {
+	return f3();
+}
+
+static int f1() {
+	return f2();
+}
+
+struct cgoTracebackArg {
+	uintptr_t  context;
+	uintptr_t* buf;
+	uintptr_t  max;
+};
+
+struct cgoSymbolizerArg {
+	uintptr_t   pc;
+	const char* file;
+	uintptr_t   lineno;
+	const char* func;
+	uintptr_t   entry;
+	uintptr_t   more;
+	uintptr_t   data;
+};
+
+void cgoTraceback(void* parg) {
+	struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg);
+	arg->buf[0] = 1;
+	arg->buf[1] = 2;
+	arg->buf[2] = 3;
+	arg->buf[3] = 0;
+}
+
+void cgoSymbolizer(void* parg) {
+	struct cgoSymbolizerArg* arg = (struct cgoSymbolizerArg*)(parg);
+	if (arg->pc != arg->data + 1) {
+		arg->file = "unexpected data";
+	} else {
+		arg->file = "cgo symbolizer";
+	}
+	arg->lineno = arg->data + 1;
+	arg->data++;
+}
+*/
+import "C"
+
+import (
+	"runtime"
+	"unsafe"
+)
+
+func init() {
+	register("CrashTraceback", CrashTraceback)
+}
+
+func CrashTraceback() {
+	runtime.SetCgoTraceback(0, unsafe.Pointer(C.cgoTraceback), nil, unsafe.Pointer(C.cgoSymbolizer))
+	C.f1()
+}
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 872b2ef903..16b9278641 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -579,6 +580,22 @@ func tracebacktrap(pc, sp, lr uintptr, gp *g) {
 }
 
 func traceback1(pc, sp, lr uintptr, gp *g, flags uint) {
+	// If the goroutine is in cgo, and we have a cgo traceback, print that.
+	if iscgo && gp.m != nil && gp.m.ncgo > 0 && gp.syscallsp != 0 && gp.m.cgoCallers != nil && gp.m.cgoCallers[0] != 0 {
+		// Lock cgoCallers so that a signal handler won't
+		// change it, copy the array, reset it, unlock it.
+		// We are locked to the thread and are not running
+		// concurrently with a signal handler.
+		// We just have to stop a signal handler from interrupting
+		// in the middle of our copy.
+		atomic.Store(&gp.m.cgoCallersUse, 1)
+		cgoCallers := *gp.m.cgoCallers
+		gp.m.cgoCallers[0] = 0
+		atomic.Store(&gp.m.cgoCallersUse, 0)
+
+		printCgoTraceback(&cgoCallers)
+	}
+
 	var n int
 	if readgstatus(gp)&^_Gscan == _Gsyscall {
 		// Override registers if blocked in system call.
@@ -739,3 +756,233 @@ func isSystemGoroutine(gp *g) bool {
 		pc == timerprocPC ||
 		pc == gcBgMarkWorkerPC
 }
+
+// SetCgoTraceback records three C functions to use to gather
+// traceback information from C code and to convert that traceback
+// information into symbolic information. These are used when printing
+// stack traces for a program that uses cgo.
+//
+// The traceback and context functions may be called from a signal
+// handler, and must therefore use only async-signal safe functions.
+// The symbolizer function may be called while the program is
+// crashing, and so must be cautious about using memory.  None of the
+// functions may call back into Go.
+//
+// The context function will be called with a single argument, a
+// pointer to a struct:
+//
+// struct {
+//	Context uintptr
+// }
+//
+// In C syntax, this struct will be
+//
+// struct {
+//   uintptr_t Context;
+// };
+//
+// If the Context field is 0, the context function is being called to
+// record the current traceback context. It should record whatever
+// information is needed about the current point of execution to later
+// produce a stack trace, probably the stack pointer and PC. In this
+// case the context function will be called from C code.
+//
+// If the Context field is not 0, then it is a value returned by a
+// previous call to the context function. This case is called when the
+// context is no longer needed; that is, when the Go code is returning
+// to its C code caller. This permits permits the context function to
+// release any associated resources.
+//
+// While it would be correct for the context function to record a
+// complete a stack trace whenever it is called, and simply copy that
+// out in the traceback function, in a typical program the context
+// function will be called many times without ever recording a
+// traceback for that context. Recording a complete stack trace in a
+// call to the context function is likely to be inefficient.
+//
+// The traceback function will be called with a single argument, a
+// pointer to a struct:
+//
+// struct {
+//	Context uintptr
+//	Buf     *uintptr
+//	Max     uintptr
+// }
+//
+// In C syntax, this struct will be
+//
+// struct {
+//   uintptr_t  Context;
+//   uintptr_t* Buf;
+//   uintptr_t  Max;
+// };
+//
+// The Context field will be zero to gather a traceback from the
+// current program execution point. In this case, the traceback
+// function will be called from C code.
+//
+// Otherwise Context will be a value previously returned by a call to
+// the context function. The traceback function should gather a stack
+// trace from that saved point in the program execution. The traceback
+// function may be called from an execution thread other than the one
+// that recorded the context, but only when the context is known to be
+// valid and unchanging. The traceback function may also be called
+// deeper in the call stack on the same thread that recorded the
+// context. The traceback function may be called multiple times with
+// the same Context value; it will usually be appropriate to cache the
+// result, if possible, the first time this is called for a specific
+// context value.
+//
+// Buf is where the traceback information should be stored. It should
+// be PC values, such that Buf[0] is the PC of the caller, Buf[1] is
+// the PC of that function's caller, and so on.  Max is the maximum
+// number of entries to store.  The function should store a zero to
+// indicate the top of the stack, or that the caller is on a different
+// stack, presumably a Go stack.
+//
+// Unlike runtime.Callers, the PC values returned should, when passed
+// to the symbolizer function, return the file/line of the call
+// instruction.  No additional subtraction is required or appropriate.
+//
+// The symbolizer function will be called with a single argument, a
+// pointer to a struct:
+//
+// struct {
+//	PC      uintptr // program counter to fetch information for
+//	File    *byte   // file name (NUL terminated)
+//	Lineno  uintptr // line number
+//	Func    *byte   // function name (NUL terminated)
+//	Entry   uintptr // function entry point
+//	More    uintptr // set non-zero if more info for this PC
+//	Data    uintptr // unused by runtime, available for function
+// }
+//
+// In C syntax, this struct will be
+//
+// struct {
+//   uintptr_t PC;
+//   char*     File;
+//   uintptr_t Lineno;
+//   char*     Func;
+//   uintptr_t Entry;
+//   uintptr_t More;
+//   uintptr_t Data;
+// };
+//
+// The PC field will be a value returned by a call to the traceback
+// function.
+//
+// The first time the function is called for a particular traceback,
+// all the fields except PC will be 0. The function should fill in the
+// other fields if possible, setting them to 0/nil if the information
+// is not available. The Data field may be used to store any useful
+// information across calls. The More field should be set to non-zero
+// if there is more information for this PC, zero otherwise. If More
+// is set non-zero, the function will be called again with the same
+// PC, and may return different information (this is intended for use
+// with inlined functions). If More is zero, the function will be
+// called with the next PC value in the traceback. When the traceback
+// is complete, the function will be called once more with PC set to
+// zero; this may be used to free any information. Each call will
+// leave the fields of the struct set to the same values they had upon
+// return, except for the PC field when the More field is zero. The
+// function must not keep a copy of the struct pointer between calls.
+//
+// When calling SetCgoTraceback, the version argument is the version
+// number of the structs that the functions expect to receive.
+// Currently this must be zero.
+//
+// The symbolizer function may be nil, in which case the results of
+// the traceback function will be displayed as numbers. If the
+// traceback function is nil, the symbolizer function will never be
+// called. The context function may be nil, in which case the
+// traceback function will only be called with the context field set
+// to zero.  If the context function is nil, then calls from Go to C
+// to Go will not show a traceback for the C portion of the call stack.
+func SetCgoTraceback(version int, traceback, context, symbolizer unsafe.Pointer) {
+	if version != 0 {
+		panic("unsupported version")
+	}
+	if context != nil {
+		panic("SetCgoTraceback: context function not yet implemented")
+	}
+	cgoTraceback = traceback
+	cgoContext = context
+	cgoSymbolizer = symbolizer
+}
+
+var cgoTraceback unsafe.Pointer
+var cgoContext unsafe.Pointer
+var cgoSymbolizer unsafe.Pointer
+
+// cgoTracebackArg is the type passed to cgoTraceback.
+type cgoTracebackArg struct {
+	context uintptr
+	buf     *uintptr
+	max     uintptr
+}
+
+// cgoContextArg is the type passed to cgoContext.
+type cgoContextArg struct {
+	context uintptr
+}
+
+// cgoSymbolizerArg is the type passed to cgoSymbolizer.
+type cgoSymbolizerArg struct {
+	pc       uintptr
+	file     *byte
+	lineno   uintptr
+	funcName *byte
+	entry    uintptr
+	more     uintptr
+	data     uintptr
+}
+
+// cgoTraceback prints a traceback of callers.
+func printCgoTraceback(callers *cgoCallers) {
+	if cgoSymbolizer == nil {
+		for _, c := range callers {
+			if c == 0 {
+				break
+			}
+			print("non-Go function at pc=", hex(c), "\n")
+		}
+		return
+	}
+
+	call := cgocall
+	if panicking > 0 {
+		// We do not want to call into the scheduler when panicking.
+		call = asmcgocall
+	}
+
+	var arg cgoSymbolizerArg
+	for _, c := range callers {
+		if c == 0 {
+			break
+		}
+		arg.pc = c
+		for {
+			call(cgoSymbolizer, noescape(unsafe.Pointer(&arg)))
+			if arg.funcName != nil {
+				// Note that we don't print any argument
+				// information here, not even parentheses.
+				// The symbolizer must add that if
+				// appropriate.
+				println(gostringnocopy(arg.funcName))
+			} else {
+				println("non-Go function")
+			}
+			print("\t")
+			if arg.file != nil {
+				print(gostringnocopy(arg.file), ":", arg.lineno, " ")
+			}
+			print("pc=", hex(c), "\n")
+			if arg.more == 0 {
+				break
+			}
+		}
+	}
+	arg.pc = 0
+	call(cgoSymbolizer, noescape(unsafe.Pointer(&arg)))
+}
diff --git a/src/runtime/wbfat.go b/src/runtime/wbfat.go
deleted file mode 100644
index 8fe2cefd8d..0000000000
--- a/src/runtime/wbfat.go
+++ /dev/null
@@ -1,190 +0,0 @@
-// generated by wbfat_gen.go; use go generate
-
-package runtime
-
-//go:nosplit
-func writebarrierfat01(dst *[2]uintptr, _ uintptr, src [2]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-}
-
-//go:nosplit
-func writebarrierfat10(dst *[2]uintptr, _ uintptr, src [2]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-}
-
-//go:nosplit
-func writebarrierfat11(dst *[2]uintptr, _ uintptr, src [2]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-}
-
-//go:nosplit
-func writebarrierfat001(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	dst[0] = src[0]
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-}
-
-//go:nosplit
-func writebarrierfat010(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-}
-
-//go:nosplit
-func writebarrierfat011(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-}
-
-//go:nosplit
-func writebarrierfat100(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	dst[2] = src[2]
-}
-
-//go:nosplit
-func writebarrierfat101(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-}
-
-//go:nosplit
-func writebarrierfat110(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-}
-
-//go:nosplit
-func writebarrierfat111(dst *[3]uintptr, _ uintptr, src [3]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-}
-
-//go:nosplit
-func writebarrierfat0001(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	dst[1] = src[1]
-	dst[2] = src[2]
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat0010(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat0011(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat0100(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat0101(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat0110(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat0111(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	dst[0] = src[0]
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat1000(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	dst[2] = src[2]
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat1001(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	dst[2] = src[2]
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat1010(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat1011(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	dst[1] = src[1]
-	writebarrierptr(&dst[2], src[2])
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat1100(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat1101(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	dst[2] = src[2]
-	writebarrierptr(&dst[3], src[3])
-}
-
-//go:nosplit
-func writebarrierfat1110(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-	dst[3] = src[3]
-}
-
-//go:nosplit
-func writebarrierfat1111(dst *[4]uintptr, _ uintptr, src [4]uintptr) {
-	writebarrierptr(&dst[0], src[0])
-	writebarrierptr(&dst[1], src[1])
-	writebarrierptr(&dst[2], src[2])
-	writebarrierptr(&dst[3], src[3])
-}
diff --git a/src/runtime/wbfat_gen.go b/src/runtime/wbfat_gen.go
deleted file mode 100644
index cac19f5804..0000000000
--- a/src/runtime/wbfat_gen.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"log"
-	"os"
-)
-
-func main() {
-	flag.Parse()
-	if flag.NArg() > 0 {
-		f, err := os.Create(flag.Arg(0))
-		if err != nil {
-			log.Fatal(err)
-		}
-		os.Stdout = f
-	}
-	fmt.Printf("// generated by wbfat_gen.go; use go generate\n\n")
-	fmt.Printf("package runtime\n")
-	for i := uint(2); i <= 4; i++ {
-		for j := 1; j < 1<<i; j++ {
-			fmt.Printf("\n//go:nosplit\n")
-			fmt.Printf("func writebarrierfat%0*b(dst *[%d]uintptr, _ uintptr, src [%d]uintptr) {\n", int(i), j, i, i)
-			for k := uint(0); k < i; k++ {
-				if j&(1<<(i-1-k)) != 0 {
-					fmt.Printf("\twritebarrierptr(&dst[%d], src[%d])\n", k, k)
-				} else {
-					fmt.Printf("\tdst[%d] = src[%d]\n", k, k)
-				}
-			}
-			fmt.Printf("}\n")
-		}
-	}
-}
author	Austin Clements <austin@google.com>	2016-04-05 16:37:29 -0400
committer	Austin Clements <austin@google.com>	2016-04-05 16:37:54 -0400
commit	6d6e16001bb04d4bf60c6d14d1f64684a043ef6c (patch)
tree	53d7dd0bbe4f919fec4a15bde9811a9608b54627 /src/runtime
parent	22972d2207424edc481b7c127788f573a726dfe7 (diff)
parent	5e1b7bdecf7a8b5b5d06633758bf53e475902414 (diff)
download	go-6d6e16001bb04d4bf60c6d14d1f64684a043ef6c.tar.xz