From 31cf1c17792d4da9dae2504c703633a0db8072c7 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Apr 2016 11:47:32 +1200
Subject: runtime: clamp OS-reported number of processors to _MaxGomaxprocs

So that all Go processes do not die on startup on a system with >256 CPUs.

I tested this by hacking osinit to set ncpu to 1000.

Updates #15131

Change-Id: I52e061a0de97be41d684dd8b748fa9087d6f1aef
Reviewed-on: https://go-review.googlesource.com/21599
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/runtime/proc.go | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 5145c84aea..1f55b0fa21 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -449,6 +449,9 @@ func schedinit() {
 
 	sched.lastpoll = uint64(nanotime())
 	procs := int(ncpu)
+	if procs > _MaxGomaxprocs {
+		procs = _MaxGomaxprocs
+	}
 	if n := atoi(gogetenv("GOMAXPROCS")); n > 0 {
 		if n > _MaxGomaxprocs {
 			n = _MaxGomaxprocs
-- 
cgit v1.3


From e4f1d9cf2e948eb0f0bb91d7c253ab61dfff3a59 Mon Sep 17 00:00:00 2001
From: Emmanuel Odeke <emm.odeke@gmail.com>
Date: Sun, 27 Mar 2016 17:29:53 -0700
Subject: runtime: make execution error panic values implement the Error
 interface

Make execution panics implement Error as
mandated by https://golang.org/ref/spec#Run_time_panics,
instead of panics with strings.

Fixes #14965

Change-Id: I7827f898b9b9c08af541db922cc24fa0800ff18a
Reviewed-on: https://go-review.googlesource.com/21214
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/chan.go       | 10 +++++-----
 src/runtime/crash_test.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 src/runtime/error.go      | 13 ++++++++++++-
 src/runtime/hashmap.go    |  4 ++--
 src/runtime/malloc.go     |  2 +-
 src/runtime/proc.go       |  2 +-
 src/runtime/select.go     |  2 +-
 7 files changed, 68 insertions(+), 11 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 954b389f47..8543cb4c9c 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -64,7 +64,7 @@ func makechan(t *chantype, size int64) *hchan {
 		throw("makechan: bad alignment")
 	}
 	if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/elem.size) {
-		panic("makechan: size out of range")
+		panic(plainError("makechan: size out of range"))
 	}
 
 	var c *hchan
@@ -171,7 +171,7 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin
 
 	if c.closed != 0 {
 		unlock(&c.lock)
-		panic("send on closed channel")
+		panic(plainError("send on closed channel"))
 	}
 
 	if sg := c.recvq.dequeue(); sg != nil {
@@ -231,7 +231,7 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin
 		if c.closed == 0 {
 			throw("chansend: spurious wakeup")
 		}
-		panic("send on closed channel")
+		panic(plainError("send on closed channel"))
 	}
 	gp.param = nil
 	if mysg.releasetime > 0 {
@@ -302,13 +302,13 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) {
 
 func closechan(c *hchan) {
 	if c == nil {
-		panic("close of nil channel")
+		panic(plainError("close of nil channel"))
 	}
 
 	lock(&c.lock)
 	if c.closed != 0 {
 		unlock(&c.lock)
-		panic("close of closed channel")
+		panic(plainError("close of closed channel"))
 	}
 
 	if raceenabled {
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 85fcc69fed..2941b8e8f8 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -273,6 +273,52 @@ func TestGoexitInPanic(t *testing.T) {
 	}
 }
 
+// Issue 14965: Runtime panics should be of type runtime.Error
+func TestRuntimePanicWithRuntimeError(t *testing.T) {
+	testCases := [...]func(){
+		0: func() {
+			var m map[uint64]bool
+			m[1234] = true
+		},
+		1: func() {
+			ch := make(chan struct{})
+			close(ch)
+			close(ch)
+		},
+		2: func() {
+			var ch = make(chan struct{})
+			close(ch)
+			ch <- struct{}{}
+		},
+		3: func() {
+			var s = make([]int, 2)
+			_ = s[2]
+		},
+		4: func() {
+			n := -1
+			_ = make(chan bool, n)
+		},
+		5: func() {
+			close((chan bool)(nil))
+		},
+	}
+
+	for i, fn := range testCases {
+		got := panicValue(fn)
+		if _, ok := got.(runtime.Error); !ok {
+			t.Errorf("test #%d: recovered value %v(type %T) does not implement runtime.Error", i, got, got)
+		}
+	}
+}
+
+func panicValue(fn func()) (recovered interface{}) {
+	defer func() {
+		recovered = recover()
+	}()
+	fn()
+	return
+}
+
 func TestPanicAfterGoexit(t *testing.T) {
 	// an uncaught panic should still work after goexit
 	output := runTestProg(t, "testprog", "PanicAfterGoexit")
diff --git a/src/runtime/error.go b/src/runtime/error.go
index 3e1ec4bc5a..15f6bdf014 100644
--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -50,6 +50,17 @@ func (e errorString) Error() string {
 	return "runtime error: " + string(e)
 }
 
+// plainError represents a runtime error described a string without
+// the prefix "runtime error: " after invoking errorString.Error().
+// See Issue #14965.
+type plainError string
+
+func (e plainError) RuntimeError() {}
+
+func (e plainError) Error() string {
+	return string(e)
+}
+
 type stringer interface {
 	String() string
 }
@@ -82,5 +93,5 @@ func printany(i interface{}) {
 
 // called from generated code
 func panicwrap(pkg, typ, meth string) {
-	panic("value method " + pkg + "." + typ + "." + meth + " called using nil *" + typ + " pointer")
+	panic(plainError("value method " + pkg + "." + typ + "." + meth + " called using nil *" + typ + " pointer"))
 }
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 80b2b5338c..9e18192cd8 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -194,7 +194,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	}
 
 	if hint < 0 || int64(int32(hint)) != hint {
-		panic("makemap: size out of range")
+		panic(plainError("makemap: size out of range"))
 		// TODO: make hint an int, then none of this nonsense
 	}
 
@@ -428,7 +428,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
 
 func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
 	if h == nil {
-		panic("assignment to entry in nil map")
+		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
 		callerpc := getcallerpc(unsafe.Pointer(&t))
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 5f1e2f64c0..ee4728c9a5 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -793,7 +793,7 @@ func newarray(typ *_type, n uintptr) unsafe.Pointer {
 		flags |= flagNoScan
 	}
 	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/typ.size) {
-		panic("runtime: allocation size out of range")
+		panic(plainError("runtime: allocation size out of range"))
 	}
 	return mallocgc(typ.size*n, typ, flags)
 }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 1f55b0fa21..1a9dbd6c53 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -381,7 +381,7 @@ func badmcall2(fn func(*g)) {
 }
 
 func badreflectcall() {
-	panic("runtime: arg size to reflect.call more than 1GB")
+	panic(plainError("arg size to reflect.call more than 1GB"))
 }
 
 func lockedOSThread() bool {
diff --git a/src/runtime/select.go b/src/runtime/select.go
index c80c833b15..9810db5453 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -594,7 +594,7 @@ retc:
 sclose:
 	// send on closed channel
 	selunlock(scases, lockorder)
-	panic("send on closed channel")
+	panic(plainError("send on closed channel"))
 }
 
 func (c *hchan) sortkey() uintptr {
-- 
cgit v1.3


From 7d469179e6e3dafe16700b7fc1cf8683ad9453fa Mon Sep 17 00:00:00 2001
From: David Crawshaw <crawshaw@golang.org>
Date: Mon, 28 Mar 2016 10:32:27 -0400
Subject: cmd/compile, etc: store method tables as offsets

This CL introduces the typeOff type and a lookup method of the same
name that can turn a typeOff offset into an *rtype.

In a typical Go binary (built with buildmode=exe, pie, c-archive, or
c-shared), there is one moduledata and all typeOff values are offsets
relative to firstmoduledata.types. This makes computing the pointer
cheap in typical programs.

With buildmode=shared (and one day, buildmode=plugin) there are
multiple modules whose relative offset is determined at runtime.
We identify a type in the general case by the pair of the original
*rtype that references it and its typeOff value. We determine
the module from the original pointer, and then use the typeOff from
there to compute the final *rtype.

To ensure there is only one *rtype representing each type, the
runtime initializes a typemap for each module, using any identical
type from an earlier module when resolving that offset. This means
that types computed from an offset match the type mapped by the
pointer dynamic relocations.

A series of followup CLs will replace other *rtype values with typeOff
(and name/*string with nameOff).

For types created at runtime by reflect, type offsets are treated as
global IDs and reference into a reflect offset map kept by the runtime.

darwin/amd64:
	cmd/go:  -57KB (0.6%)
	jujud:  -557KB (0.8%)

linux/amd64 PIE:
	cmd/go: -361KB (3.0%)
	jujud:  -3.5MB (4.2%)

For #6853.

Change-Id: Icf096fd884a0a0cb9f280f46f7a26c70a9006c96
Reviewed-on: https://go-review.googlesource.com/21285
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: David Crawshaw <crawshaw@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/compile/internal/gc/reflect.go |  75 +++++---
 src/cmd/internal/obj/link.go           |  15 +-
 src/cmd/link/internal/ld/deadcode.go   |  14 +-
 src/cmd/link/internal/ld/decodesym.go  |  22 +--
 src/reflect/export_test.go             |   2 +-
 src/reflect/type.go                    | 267 +++++++++++++++++++++-------
 src/reflect/value.go                   |  15 +-
 src/runtime/iface.go                   |  10 +-
 src/runtime/proc.go                    |   3 +-
 src/runtime/runtime1.go                |  33 ++++
 src/runtime/symtab.go                  |   2 +
 src/runtime/type.go                    | 307 ++++++++++++++++++++++++++++++++-
 12 files changed, 637 insertions(+), 128 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index ea67634260..2bd50b4665 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -75,7 +75,7 @@ func uncommonSize(t *Type) int { // Sizeof(runtime.uncommontype{})
 	if t.Sym == nil && len(methods(t)) == 0 {
 		return 0
 	}
-	return 2*Widthptr + 2*Widthint
+	return 2 * Widthptr
 }
 
 func makefield(name string, t *Type) *Field {
@@ -580,13 +580,23 @@ func dextratype(s *Sym, ot int, t *Type, dataAdd int) int {
 
 	ot = dgopkgpath(s, ot, typePkg(t))
 
-	// slice header
-	ot = dsymptr(s, ot, s, ot+Widthptr+2*Widthint+dataAdd)
-
-	n := len(m)
-	ot = duintxx(s, ot, uint64(n), Widthint)
-	ot = duintxx(s, ot, uint64(n), Widthint)
+	dataAdd += Widthptr + 2 + 2
+	if Widthptr == 8 {
+		dataAdd += 4
+	}
+	mcount := len(m)
+	if mcount != int(uint16(mcount)) {
+		Fatalf("too many methods on %s: %d", t, mcount)
+	}
+	if dataAdd != int(uint16(dataAdd)) {
+		Fatalf("methods are too far away on %s: %d", t, dataAdd)
+	}
 
+	ot = duint16(s, ot, uint16(mcount))
+	ot = duint16(s, ot, uint16(dataAdd))
+	if Widthptr == 8 {
+		ot = duint32(s, ot, 0) // align for following pointers
+	}
 	return ot
 }
 
@@ -609,6 +619,7 @@ func typePkg(t *Type) *Pkg {
 // dextratypeData dumps the backing array for the []method field of
 // runtime.uncommontype.
 func dextratypeData(s *Sym, ot int, t *Type) int {
+	lsym := Linksym(s)
 	for _, a := range methods(t) {
 		// ../../../../runtime/type.go:/method
 		exported := exportname(a.name)
@@ -617,21 +628,24 @@ func dextratypeData(s *Sym, ot int, t *Type) int {
 			pkg = a.pkg
 		}
 		ot = dname(s, ot, a.name, "", pkg, exported)
-		ot = dmethodptr(s, ot, dtypesym(a.mtype))
-		ot = dmethodptr(s, ot, a.isym)
-		ot = dmethodptr(s, ot, a.tsym)
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(dtypesym(a.mtype)))
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.isym))
+		ot = dmethodptrOffLSym(lsym, ot, Linksym(a.tsym))
+		if Widthptr == 8 {
+			ot = duintxxLSym(lsym, ot, 0, 4) // pad to reflect.method size
+		}
 	}
 	return ot
 }
 
-func dmethodptr(s *Sym, off int, x *Sym) int {
-	duintptr(s, off, 0)
-	r := obj.Addrel(Linksym(s))
-	r.Off = int32(off)
-	r.Siz = uint8(Widthptr)
-	r.Sym = Linksym(x)
-	r.Type = obj.R_METHOD
-	return off + Widthptr
+func dmethodptrOffLSym(s *obj.LSym, ot int, x *obj.LSym) int {
+	duintxxLSym(s, ot, 0, 4)
+	r := obj.Addrel(s)
+	r.Off = int32(ot)
+	r.Siz = 4
+	r.Sym = x
+	r.Type = obj.R_METHODOFF
+	return ot + 4
 }
 
 var kinds = []int{
@@ -1286,18 +1300,29 @@ ok:
 	ggloblsym(s, int32(ot), int16(dupok|obj.RODATA))
 
 	// generate typelink.foo pointing at s = type.foo.
+	//
 	// The linker will leave a table of all the typelinks for
-	// types in the binary, so reflect can find them.
-	// We only need the link for unnamed composites that
-	// we want be able to find.
-	if t.Sym == nil {
+	// types in the binary, so the runtime can find them.
+	//
+	// When buildmode=shared, all types are in typelinks so the
+	// runtime can deduplicate type pointers.
+	keep := Ctxt.Flag_dynlink
+	if !keep && t.Sym == nil {
+		// For an unnamed type, we only need the link if the type can
+		// be created at run time by reflect.PtrTo and similar
+		// functions. If the type exists in the program, those
+		// functions must return the existing type structure rather
+		// than creating a new one.
 		switch t.Etype {
 		case TPTR32, TPTR64, TARRAY, TCHAN, TFUNC, TMAP, TSTRUCT:
-			slink := typelinkLSym(t)
-			dsymptrOffLSym(slink, 0, Linksym(s), 0)
-			ggloblLSym(slink, 4, int16(dupok|obj.RODATA))
+			keep = true
 		}
 	}
+	if keep {
+		slink := typelinkLSym(t)
+		dsymptrOffLSym(slink, 0, Linksym(s), 0)
+		ggloblLSym(slink, 4, int16(dupok|obj.RODATA))
+	}
 
 	return s
 }
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index 42aaa5f4f0..55c9f4f9e2 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -457,8 +457,8 @@ const (
 	// R_ADDRMIPS (only used on mips64) resolves to a 32-bit external address,
 	// by loading the address into a register with two instructions (lui, ori).
 	R_ADDRMIPS
-	// R_ADDROFF resolves to an offset from the beginning of the section holding
-	// the data being relocated to the referenced symbol.
+	// R_ADDROFF resolves to a 32-bit offset from the beginning of the section
+	// holding the data being relocated to the referenced symbol.
 	R_ADDROFF
 	R_SIZE
 	R_CALL
@@ -492,11 +492,12 @@ const (
 	// should be linked into the final binary, even if there are no other
 	// direct references. (This is used for types reachable by reflection.)
 	R_USETYPE
-	// R_METHOD resolves to an *rtype for a method.
-	// It is used when linking from the uncommonType of another *rtype, and
-	// may be set to zero by the linker if it determines the method text is
-	// unreachable by the linked program.
-	R_METHOD
+	// R_METHODOFF resolves to a 32-bit offset from the beginning of the section
+	// holding the data being relocated to the referenced symbol.
+	// It is a variant of R_ADDROFF used when linking from the uncommonType of a
+	// *rtype, and may be set to zero by the linker if it determines the method
+	// text is unreachable by the linked program.
+	R_METHODOFF
 	R_POWER_TOC
 	R_GOTPCREL
 	// R_JMPMIPS (only used on mips64) resolves to non-PC-relative target address
diff --git a/src/cmd/link/internal/ld/deadcode.go b/src/cmd/link/internal/ld/deadcode.go
index 51fae02ef0..c83a104a54 100644
--- a/src/cmd/link/internal/ld/deadcode.go
+++ b/src/cmd/link/internal/ld/deadcode.go
@@ -19,7 +19,7 @@ import (
 //
 // This flood fill is wrapped in logic for pruning unused methods.
 // All methods are mentioned by relocations on their receiver's *rtype.
-// These relocations are specially defined as R_METHOD by the compiler
+// These relocations are specially defined as R_METHODOFF by the compiler
 // so we can detect and manipulated them here.
 //
 // There are three ways a method of a reachable type can be invoked:
@@ -100,7 +100,7 @@ func deadcode(ctxt *Link) {
 		d.flood()
 	}
 
-	// Remove all remaining unreached R_METHOD relocations.
+	// Remove all remaining unreached R_METHODOFF relocations.
 	for _, m := range d.markableMethods {
 		for _, r := range m.r {
 			d.cleanupReloc(r)
@@ -167,7 +167,7 @@ var markextra = []string{
 type methodref struct {
 	m   methodsig
 	src *LSym     // receiver type symbol
-	r   [3]*Reloc // R_METHOD relocations to fields of runtime.method
+	r   [3]*Reloc // R_METHODOFF relocations to fields of runtime.method
 }
 
 func (m methodref) ifn() *LSym { return m.r[1].Sym }
@@ -190,7 +190,7 @@ type deadcodepass struct {
 
 func (d *deadcodepass) cleanupReloc(r *Reloc) {
 	if r.Sym.Attr.Reachable() {
-		r.Type = obj.R_ADDR
+		r.Type = obj.R_ADDROFF
 	} else {
 		if Debug['v'] > 1 {
 			fmt.Fprintf(d.ctxt.Bso, "removing method %s\n", r.Sym.Name)
@@ -217,7 +217,7 @@ func (d *deadcodepass) mark(s, parent *LSym) {
 func (d *deadcodepass) markMethod(m methodref) {
 	for _, r := range m.r {
 		d.mark(r.Sym, m.src)
-		r.Type = obj.R_ADDR
+		r.Type = obj.R_ADDROFF
 	}
 }
 
@@ -291,14 +291,14 @@ func (d *deadcodepass) flood() {
 			}
 		}
 
-		mpos := 0 // 0-3, the R_METHOD relocs of runtime.uncommontype
+		mpos := 0 // 0-3, the R_METHODOFF relocs of runtime.uncommontype
 		var methods []methodref
 		for i := 0; i < len(s.R); i++ {
 			r := &s.R[i]
 			if r.Sym == nil {
 				continue
 			}
-			if r.Type != obj.R_METHOD {
+			if r.Type != obj.R_METHODOFF {
 				d.mark(r.Sym, s)
 				continue
 			}
diff --git a/src/cmd/link/internal/ld/decodesym.go b/src/cmd/link/internal/ld/decodesym.go
index 7daa8bc812..5fa8b4c81f 100644
--- a/src/cmd/link/internal/ld/decodesym.go
+++ b/src/cmd/link/internal/ld/decodesym.go
@@ -47,9 +47,9 @@ func decode_inuxi(p []byte, sz int) uint64 {
 	}
 }
 
-func commonsize() int      { return 6*SysArch.PtrSize + 8 }                 // runtime._type
-func structfieldSize() int { return 3 * SysArch.PtrSize }                   // runtime.structfield
-func uncommonSize() int    { return 2*SysArch.PtrSize + 2*SysArch.IntSize } // runtime.uncommontype
+func commonsize() int      { return 6*SysArch.PtrSize + 8 } // runtime._type
+func structfieldSize() int { return 3 * SysArch.PtrSize }   // runtime.structfield
+func uncommonSize() int    { return 2 * SysArch.PtrSize }   // runtime.uncommontype
 
 // Type.commonType.kind
 func decodetype_kind(s *LSym) uint8 {
@@ -341,12 +341,14 @@ func decodetype_methods(s *LSym) []methodsig {
 		// just Sizeof(rtype)
 	}
 
-	numMethods := int(decode_inuxi(s.P[off+2*SysArch.PtrSize:], SysArch.IntSize))
-	r := decode_reloc(s, int32(off+SysArch.PtrSize))
-	if r.Sym != s {
-		panic(fmt.Sprintf("method slice pointer in %s leads to a different symbol %s", s, r.Sym))
+	mcount := int(decode_inuxi(s.P[off+SysArch.PtrSize:], 2))
+	moff := int(decode_inuxi(s.P[off+SysArch.PtrSize+2:], 2))
+	off += moff          // offset to array of reflect.method values
+	var sizeofMethod int // sizeof reflect.method in program
+	if SysArch.PtrSize == 4 {
+		sizeofMethod = 4 * SysArch.PtrSize
+	} else {
+		sizeofMethod = 3 * SysArch.PtrSize
 	}
-	off = int(r.Add)                    // array of reflect.method values
-	sizeofMethod := 4 * SysArch.PtrSize // sizeof reflect.method in program
-	return decode_methodsig(s, off, sizeofMethod, numMethods)
+	return decode_methodsig(s, off, sizeofMethod, mcount)
 }
diff --git a/src/reflect/export_test.go b/src/reflect/export_test.go
index 037c953718..2769e0db40 100644
--- a/src/reflect/export_test.go
+++ b/src/reflect/export_test.go
@@ -90,7 +90,7 @@ func FirstMethodNameBytes(t Type) *byte {
 	if ut == nil {
 		panic("type has no methods")
 	}
-	m := ut.methods[0]
+	m := ut.methods()[0]
 	if *m.name.data(0)&(1<<2) == 0 {
 		panic("method name does not have pkgPath *string")
 	}
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 7104fde60a..c7ed402be2 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -288,10 +288,10 @@ type typeAlg struct {
 
 // Method on non-interface type
 type method struct {
-	name name           // name of method
-	mtyp *rtype         // method type (without receiver)
-	ifn  unsafe.Pointer // fn used in interface call (one-word receiver)
-	tfn  unsafe.Pointer // fn used for normal method call
+	name name    // name of method
+	mtyp typeOff // method type (without receiver)
+	ifn  textOff // fn used in interface call (one-word receiver)
+	tfn  textOff // fn used for normal method call
 }
 
 // uncommonType is present only for types with names or methods
@@ -299,8 +299,9 @@ type method struct {
 // Using a pointer to this struct reduces the overall size required
 // to describe an unnamed type with no methods.
 type uncommonType struct {
-	pkgPath *string  // import path; nil for built-in types like int, string
-	methods []method // methods associated with type
+	pkgPath *string // import path; nil for built-in types like int, string
+	mcount  uint16  // number of methods
+	moff    uint16  // offset from this uncommontype to [mcount]method
 }
 
 // ChanDir represents a channel type's direction.
@@ -589,6 +590,10 @@ var kindNames = []string{
 	UnsafePointer: "unsafe.Pointer",
 }
 
+func (t *uncommonType) methods() []method {
+	return (*[1 << 16]method)(add(unsafe.Pointer(t), uintptr(t.moff)))[:t.mcount:t.mcount]
+}
+
 func (t *uncommonType) PkgPath() string {
 	if t == nil || t.pkgPath == nil {
 		return ""
@@ -596,13 +601,55 @@ func (t *uncommonType) PkgPath() string {
 	return *t.pkgPath
 }
 
+// resolveTypeOff resolves an *rtype offset from a base type.
+// The (*rtype).typeOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
+
+// resolveTextOff resolves an function pointer offset from a base type.
+// The (*rtype).textOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
+
+// addReflectOff adds a pointer to the reflection lookup map in the runtime.
+// It returns a new ID that can be used as a typeOff or textOff, and will
+// be resolved correctly. Implemented in the runtime package.
+func addReflectOff(ptr unsafe.Pointer) int32
+
+// resolveReflectType adds a *rtype to the reflection lookup map in the runtime.
+// It returns a new typeOff that can be used to refer to the pointer.
+func resolveReflectType(t *rtype) typeOff {
+	return typeOff(addReflectOff(unsafe.Pointer(t)))
+}
+
+// resolveReflectText adds a function pointer to the reflection lookup map in
+// the runtime. It returns a new textOff that can be used to refer to the
+// pointer.
+func resolveReflectText(ptr unsafe.Pointer) textOff {
+	return textOff(addReflectOff(ptr))
+}
+
+type typeOff int32 // offset to an *rtype
+type textOff int32 // offset from top of text section
+
+func (t *rtype) typeOff(off typeOff) *rtype {
+	if off == 0 {
+		return nil
+	}
+	return (*rtype)(resolveTypeOff(unsafe.Pointer(t), int32(off)))
+}
+
+func (t *rtype) textOff(off textOff) unsafe.Pointer {
+	return resolveTextOff(unsafe.Pointer(t), int32(off))
+}
+
 func (t *rtype) uncommon() *uncommonType {
 	if t.tflag&tflagUncommon == 0 {
 		return nil
 	}
 	switch t.Kind() {
 	case Struct:
-		return &(*structTypeWithMethods)(unsafe.Pointer(t)).u
+		return &(*structTypeUncommon)(unsafe.Pointer(t)).u
 	case Ptr:
 		type u struct {
 			ptrType
@@ -688,7 +735,7 @@ func (t *rtype) NumMethod() int {
 	if ut == nil {
 		return 0
 	}
-	return len(ut.methods)
+	return int(ut.mcount)
 }
 
 func (t *rtype) Method(i int) (m Method) {
@@ -698,10 +745,10 @@ func (t *rtype) Method(i int) (m Method) {
 	}
 	ut := t.uncommon()
 
-	if ut == nil || i < 0 || i >= len(ut.methods) {
+	if ut == nil || i < 0 || i >= int(ut.mcount) {
 		panic("reflect: Method index out of range")
 	}
-	p := &ut.methods[i]
+	p := ut.methods()[i]
 	m.Name = p.name.name()
 	fl := flag(Func)
 	if !p.name.isExported() {
@@ -712,8 +759,9 @@ func (t *rtype) Method(i int) (m Method) {
 		m.PkgPath = *pkgPath
 		fl |= flagStickyRO
 	}
-	if p.mtyp != nil {
-		ft := (*funcType)(unsafe.Pointer(p.mtyp))
+	if p.mtyp != 0 {
+		mtyp := t.typeOff(p.mtyp)
+		ft := (*funcType)(unsafe.Pointer(mtyp))
 		in := make([]Type, 0, 1+len(ft.in()))
 		in = append(in, t)
 		for _, arg := range ft.in() {
@@ -723,9 +771,10 @@ func (t *rtype) Method(i int) (m Method) {
 		for _, ret := range ft.out() {
 			out = append(out, ret)
 		}
-		mt := FuncOf(in, out, p.mtyp.IsVariadic())
+		mt := FuncOf(in, out, ft.IsVariadic())
 		m.Type = mt
-		fn := unsafe.Pointer(&p.tfn)
+		tfn := t.textOff(p.tfn)
+		fn := unsafe.Pointer(&tfn)
 		m.Func = Value{mt.(*rtype), fn, fl}
 	}
 	m.Index = i
@@ -741,8 +790,9 @@ func (t *rtype) MethodByName(name string) (m Method, ok bool) {
 	if ut == nil {
 		return Method{}, false
 	}
-	for i := range ut.methods {
-		p := &ut.methods[i]
+	utmethods := ut.methods()
+	for i := 0; i < int(ut.mcount); i++ {
+		p := utmethods[i]
 		if p.name.name() == name {
 			return t.Method(i), true
 		}
@@ -1430,10 +1480,11 @@ func implements(T, V *rtype) bool {
 		return false
 	}
 	i := 0
-	for j := 0; j < len(v.methods); j++ {
+	vmethods := v.methods()
+	for j := 0; j < int(v.mcount); j++ {
 		tm := &t.methods[i]
-		vm := &v.methods[j]
-		if vm.name.name() == tm.name.name() && vm.mtyp == tm.typ {
+		vm := vmethods[j]
+		if vm.name.name() == tm.name.name() && V.typeOff(vm.mtyp) == tm.typ {
 			if i++; i >= len(t.methods) {
 				return true
 			}
@@ -2161,21 +2212,55 @@ func SliceOf(t Type) Type {
 	return cachePut(ckey, &slice.rtype)
 }
 
-// structTypeWithMethods is a structType created at runtime with StructOf.
-// It is needed to pin the []method slice from its associated uncommonType struct.
-// Keep in sync with the memory layout of structType.
-type structTypeWithMethods struct {
-	structType
-	u uncommonType
-}
-
 // The structLookupCache caches StructOf lookups.
 // StructOf does not share the common lookupCache since we need to pin
-// the *structType and its associated *uncommonType (especially the
-// []method slice field of that uncommonType.)
+// the memory associated with *structTypeFixedN.
 var structLookupCache struct {
 	sync.RWMutex
-	m map[uint32][]*structTypeWithMethods // keyed by hash calculated in StructOf
+	m map[uint32][]interface {
+		common() *rtype
+	} // keyed by hash calculated in StructOf
+}
+
+type structTypeUncommon struct {
+	structType
+	u uncommonType
+}
+
+// A *rtype representing a struct is followed directly in memory by an
+// array of method objects representing the methods attached to the
+// struct. To get the same layout for a run time generated type, we
+// need an array directly following the uncommonType memory. The types
+// structTypeFixed4, ...structTypeFixedN are used to do this.
+//
+// A similar strategy is used for funcTypeFixed4, ...funcTypeFixedN.
+
+// TODO(crawshaw): as these structTypeFixedN and funcTypeFixedN structs
+// have no methods, they could be defined at runtime using the StructOf
+// function.
+
+type structTypeFixed4 struct {
+	structType
+	u uncommonType
+	m [4]method
+}
+
+type structTypeFixed8 struct {
+	structType
+	u uncommonType
+	m [8]method
+}
+
+type structTypeFixed16 struct {
+	structType
+	u uncommonType
+	m [16]method
+}
+
+type structTypeFixed32 struct {
+	structType
+	u uncommonType
+	m [32]method
 }
 
 // StructOf returns the struct type containing fields.
@@ -2192,7 +2277,7 @@ func StructOf(fields []StructField) Type {
 		typalign   uint8
 		comparable = true
 		hashable   = true
-		typ        = new(structTypeWithMethods)
+		methods    []method
 
 		fs   = make([]structField, len(fields))
 		repr = make([]byte, 0, 64)
@@ -2269,7 +2354,6 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-
 					} else {
 						tfn = MakeFunc(m.typ, func(in []Value) []Value {
 							var args []Value
@@ -2287,47 +2371,59 @@ func StructOf(fields []StructField) Type {
 							}
 							return recv.Field(ifield).Method(imethod).Call(args)
 						})
-
 					}
 
-					typ.u.methods = append(
-						typ.u.methods,
-						method{
-							name: m.name,
-							mtyp: m.typ,
-							ifn:  unsafe.Pointer(&ifn),
-							tfn:  unsafe.Pointer(&tfn),
-						},
-					)
+					methods = append(methods, method{
+						name: m.name,
+						mtyp: resolveReflectType(m.typ),
+						ifn:  resolveReflectText(unsafe.Pointer(&ifn)),
+						tfn:  resolveReflectText(unsafe.Pointer(&tfn)),
+					})
 				}
 			case Ptr:
 				ptr := (*ptrType)(unsafe.Pointer(ft))
 				if unt := ptr.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ptr.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ptr.textOff(m.ifn)),
+							tfn:  resolveReflectText(ptr.textOff(m.tfn)),
+						})
 					}
 				}
 				if unt := ptr.elem.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ptr.elem.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ptr.elem.textOff(m.ifn)),
+							tfn:  resolveReflectText(ptr.elem.textOff(m.tfn)),
+						})
 					}
 				}
 			default:
 				if unt := ft.uncommon(); unt != nil {
-					for _, m := range unt.methods {
+					for _, m := range unt.methods() {
 						if m.name.pkgPath() != nil {
 							// TODO(sbinet)
 							panic("reflect: embedded interface with unexported method(s) not implemented")
 						}
-						typ.u.methods = append(typ.u.methods, m)
+						methods = append(methods, method{
+							name: m.name,
+							mtyp: resolveReflectType(ft.typeOff(m.mtyp)),
+							ifn:  resolveReflectText(ft.textOff(m.ifn)),
+							tfn:  resolveReflectText(ft.textOff(m.tfn)),
+						})
+
 					}
 				}
 			}
@@ -2359,6 +2455,49 @@ func StructOf(fields []StructField) Type {
 
 		fs[i] = f
 	}
+
+	var typ *structType
+	var ut *uncommonType
+	var typPin interface {
+		common() *rtype
+	} // structTypeFixedN
+
+	switch {
+	case len(methods) == 0:
+		t := new(structTypeUncommon)
+		typ = &t.structType
+		ut = &t.u
+		typPin = t
+	case len(methods) <= 4:
+		t := new(structTypeFixed4)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 8:
+		t := new(structTypeFixed8)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 16:
+		t := new(structTypeFixed16)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	case len(methods) <= 32:
+		t := new(structTypeFixed32)
+		typ = &t.structType
+		ut = &t.u
+		copy(t.m[:], methods)
+		typPin = t
+	default:
+		panic("reflect.StructOf: too many methods")
+	}
+	ut.mcount = uint16(len(methods))
+	ut.moff = uint16(unsafe.Sizeof(uncommonType{}))
+
 	if len(fs) > 0 {
 		repr = append(repr, ' ')
 	}
@@ -2372,15 +2511,16 @@ func StructOf(fields []StructField) Type {
 	// Make the struct type.
 	var istruct interface{} = struct{}{}
 	prototype := *(**structType)(unsafe.Pointer(&istruct))
-	typ.structType = *prototype
-	typ.structType.fields = fs
+	*typ = *prototype
+	typ.fields = fs
 
 	// Look in cache
 	structLookupCache.RLock()
-	for _, t := range structLookupCache.m[hash] {
-		if haveIdenticalUnderlyingType(&typ.rtype, &t.rtype) {
+	for _, st := range structLookupCache.m[hash] {
+		t := st.common()
+		if haveIdenticalUnderlyingType(&typ.rtype, t) {
 			structLookupCache.RUnlock()
-			return &t.rtype
+			return t
 		}
 	}
 	structLookupCache.RUnlock()
@@ -2389,11 +2529,14 @@ func StructOf(fields []StructField) Type {
 	structLookupCache.Lock()
 	defer structLookupCache.Unlock()
 	if structLookupCache.m == nil {
-		structLookupCache.m = make(map[uint32][]*structTypeWithMethods)
+		structLookupCache.m = make(map[uint32][]interface {
+			common() *rtype
+		})
 	}
-	for _, t := range structLookupCache.m[hash] {
-		if haveIdenticalUnderlyingType(&typ.rtype, &t.rtype) {
-			return &t.rtype
+	for _, st := range structLookupCache.m[hash] {
+		t := st.common()
+		if haveIdenticalUnderlyingType(&typ.rtype, t) {
+			return t
 		}
 	}
 
@@ -2403,9 +2546,8 @@ func StructOf(fields []StructField) Type {
 			// even if 't' wasn't a structType with methods, we should be ok
 			// as the 'u uncommonType' field won't be accessed except when
 			// tflag&tflagUncommon is set.
-			tt := (*structTypeWithMethods)(unsafe.Pointer(t))
-			structLookupCache.m[hash] = append(structLookupCache.m[hash], tt)
-			return &tt.rtype
+			structLookupCache.m[hash] = append(structLookupCache.m[hash], t)
+			return t
 		}
 	}
 
@@ -2414,7 +2556,7 @@ func StructOf(fields []StructField) Type {
 	typ.size = size
 	typ.align = typalign
 	typ.fieldAlign = typalign
-	if len(typ.u.methods) > 0 {
+	if len(methods) > 0 {
 		typ.tflag |= tflagUncommon
 	}
 	if !hasPtr {
@@ -2514,7 +2656,7 @@ func StructOf(fields []StructField) Type {
 		typ.kind &^= kindDirectIface
 	}
 
-	structLookupCache.m[hash] = append(structLookupCache.m[hash], typ)
+	structLookupCache.m[hash] = append(structLookupCache.m[hash], typPin)
 	return &typ.rtype
 }
 
@@ -2533,6 +2675,7 @@ func runtimeStructField(field StructField) structField {
 		}
 	}
 
+	_ = resolveReflectType(field.Type.common())
 	return structField{
 		name:   newName(field.Name, string(field.Tag), field.PkgPath, exported),
 		typ:    field.Type.common(),
diff --git a/src/reflect/value.go b/src/reflect/value.go
index 262545d973..d72c14e9e1 100644
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@@ -566,15 +566,16 @@ func methodReceiver(op string, v Value, methodIndex int) (rcvrtype, t *rtype, fn
 	} else {
 		rcvrtype = v.typ
 		ut := v.typ.uncommon()
-		if ut == nil || uint(i) >= uint(len(ut.methods)) {
+		if ut == nil || uint(i) >= uint(ut.mcount) {
 			panic("reflect: internal error: invalid method index")
 		}
-		m := &ut.methods[i]
+		m := ut.methods()[i]
 		if !m.name.isExported() {
 			panic("reflect: " + op + " of unexported method")
 		}
-		fn = unsafe.Pointer(&m.ifn)
-		t = m.mtyp
+		ifn := v.typ.textOff(m.ifn)
+		fn = unsafe.Pointer(&ifn)
+		t = v.typ.typeOff(m.mtyp)
 	}
 	return
 }
@@ -1687,11 +1688,11 @@ func (v Value) Type() Type {
 	}
 	// Method on concrete type.
 	ut := v.typ.uncommon()
-	if ut == nil || uint(i) >= uint(len(ut.methods)) {
+	if ut == nil || uint(i) >= uint(ut.mcount) {
 		panic("reflect: internal error: invalid method index")
 	}
-	m := &ut.methods[i]
-	return m.mtyp
+	m := ut.methods()[i]
+	return v.typ.typeOff(m.mtyp)
 }
 
 // Uint returns v's underlying value, as a uint64.
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index a4c962fb7a..700bdc2f48 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -93,7 +93,8 @@ func additab(m *itab, locked, canfail bool) {
 	// so can iterate over both in lock step;
 	// the loop is O(ni+nt) not O(ni*nt).
 	ni := len(inter.mhdr)
-	nt := len(x.mhdr)
+	nt := int(x.mcount)
+	xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt]
 	j := 0
 	for k := 0; k < ni; k++ {
 		i := &inter.mhdr[k]
@@ -104,15 +105,16 @@ func additab(m *itab, locked, canfail bool) {
 			ipkg = inter.pkgpath
 		}
 		for ; j < nt; j++ {
-			t := &x.mhdr[j]
-			if t.mtyp == itype && t.name.name() == iname {
+			t := &xmhdr[j]
+			if typ.typeOff(t.mtyp) == itype && t.name.name() == iname {
 				pkgPath := t.name.pkgPath()
 				if pkgPath == nil {
 					pkgPath = x.pkgpath
 				}
 				if t.name.isExported() || pkgPath == ipkg {
 					if m != nil {
-						*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = t.ifn
+						ifn := typ.textOff(t.ifn)
+						*(*unsafe.Pointer)(add(unsafe.Pointer(&m.fun[0]), uintptr(k)*sys.PtrSize)) = ifn
 					}
 					goto nextimethod
 				}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 1a9dbd6c53..98a986cd63 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -435,9 +435,10 @@ func schedinit() {
 	tracebackinit()
 	moduledataverify()
 	stackinit()
-	itabsinit()
 	mallocinit()
 	mcommoninit(_g_.m)
+	typelinksinit()
+	itabsinit()
 
 	msigsave(_g_.m)
 	initSigmask = _g_.m.sigmask
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index e1956569fd..02aeedaf75 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -486,3 +486,36 @@ func reflect_typelinks() ([]unsafe.Pointer, [][]int32) {
 	}
 	return sections, ret
 }
+
+// reflect_resolveTypeOff resolves an *rtype offset from a base type.
+//go:linkname reflect_resolveTypeOff reflect.resolveTypeOff
+func reflect_resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
+	return unsafe.Pointer((*_type)(rtype).typeOff(typeOff(off)))
+}
+
+// reflect_resolveTextOff resolves an function pointer offset from a base type.
+//go:linkname reflect_resolveTextOff reflect.resolveTextOff
+func reflect_resolveTextOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
+	return (*_type)(rtype).textOff(textOff(off))
+
+}
+
+// reflect_addReflectOff adds a pointer to the reflection offset lookup map.
+//go:linkname reflect_addReflectOff reflect.addReflectOff
+func reflect_addReflectOff(ptr unsafe.Pointer) int32 {
+	lock(&reflectOffs.lock)
+	if reflectOffs.m == nil {
+		reflectOffs.m = make(map[int32]unsafe.Pointer)
+		reflectOffs.minv = make(map[unsafe.Pointer]int32)
+		reflectOffs.next = -1
+	}
+	id, found := reflectOffs.minv[ptr]
+	if !found {
+		id = reflectOffs.next
+		reflectOffs.next-- // use negative offsets as IDs to aid debugging
+		reflectOffs.m[id] = ptr
+		reflectOffs.minv[ptr] = id
+	}
+	unlock(&reflectOffs.lock)
+	return id
+}
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 8c70f22c1f..2df390253a 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -137,6 +137,8 @@ type moduledata struct {
 
 	gcdatamask, gcbssmask bitvector
 
+	typemap map[typeOff]*_type // offset to *_rtype in previous module
+
 	next *moduledata
 }
 
diff --git a/src/runtime/type.go b/src/runtime/type.go
index fbf6f9973c..86131d3ff3 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -131,6 +131,92 @@ func (t *_type) name() string {
 	return t._string[i+1:]
 }
 
+// reflectOffs holds type offsets defined at run time by the reflect package.
+//
+// When a type is defined at run time, its *rtype data lives on the heap.
+// There are a wide range of possible addresses the heap may use, that
+// may not be representable as a 32-bit offset. Moreover the GC may
+// one day start moving heap memory, in which case there is no stable
+// offset that can be defined.
+//
+// To provide stable offsets, we add pin *rtype objects in a global map
+// and treat the offset as an identifier. We use negative offsets that
+// do not overlap with any compile-time module offsets.
+//
+// Entries are created by reflect.addReflectOff.
+var reflectOffs struct {
+	lock mutex
+	next int32
+	m    map[int32]unsafe.Pointer
+	minv map[unsafe.Pointer]int32
+}
+
+func (t *_type) typeOff(off typeOff) *_type {
+	if off == 0 {
+		return nil
+	}
+	base := uintptr(unsafe.Pointer(t))
+	var md *moduledata
+	for next := &firstmoduledata; next != nil; next = next.next {
+		if base >= next.types && base < next.etypes {
+			md = next
+			break
+		}
+	}
+	if md == nil {
+		lock(&reflectOffs.lock)
+		res := reflectOffs.m[int32(off)]
+		unlock(&reflectOffs.lock)
+		if res == nil {
+			println("runtime: typeOff", hex(off), "base", hex(base), "not in ranges:")
+			for next := &firstmoduledata; next != nil; next = next.next {
+				println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+			}
+			throw("runtime: type offset base pointer out of range")
+		}
+		return (*_type)(res)
+	}
+	if t := md.typemap[off]; t != nil {
+		return t
+	}
+	res := md.types + uintptr(off)
+	if res > md.etypes {
+		println("runtime: typeOff", hex(off), "out of range", hex(md.types), "-", hex(md.etypes))
+		throw("runtime: type offset out of range")
+	}
+	return (*_type)(unsafe.Pointer(res))
+}
+
+func (t *_type) textOff(off textOff) unsafe.Pointer {
+	base := uintptr(unsafe.Pointer(t))
+	var md *moduledata
+	for next := &firstmoduledata; next != nil; next = next.next {
+		if base >= next.types && base < next.etypes {
+			md = next
+			break
+		}
+	}
+	if md == nil {
+		lock(&reflectOffs.lock)
+		res := reflectOffs.m[int32(off)]
+		unlock(&reflectOffs.lock)
+		if res == nil {
+			println("runtime: textOff", hex(off), "base", hex(base), "not in ranges:")
+			for next := &firstmoduledata; next != nil; next = next.next {
+				println("\ttypes", hex(next.types), "etypes", hex(next.etypes))
+			}
+			throw("runtime: text offset base pointer out of range")
+		}
+		return res
+	}
+	res := md.text + uintptr(off)
+	if res > md.etext {
+		println("runtime: textOff", hex(off), "out of range", hex(md.text), "-", hex(md.etext))
+		throw("runtime: text offset out of range")
+	}
+	return unsafe.Pointer(res)
+}
+
 func (t *functype) in() []*_type {
 	// See funcType in reflect/type.go for details on data layout.
 	uadd := uintptr(unsafe.Sizeof(functype{}))
@@ -154,16 +240,20 @@ func (t *functype) dotdotdot() bool {
 	return t.outCount&(1<<15) != 0
 }
 
+type typeOff int32
+type textOff int32
+
 type method struct {
 	name name
-	mtyp *_type
-	ifn  unsafe.Pointer
-	tfn  unsafe.Pointer
+	mtyp typeOff
+	ifn  textOff
+	tfn  textOff
 }
 
 type uncommontype struct {
 	pkgpath *string
-	mhdr    []method
+	mcount  uint16 // number of methods
+	moff    uint16 // offset from this uncommontype to [mcount]method
 }
 
 type imethod struct {
@@ -270,6 +360,18 @@ func (n *name) name() (s string) {
 	return s
 }
 
+func (n *name) tag() (s string) {
+	tl := n.tagLen()
+	if tl == 0 {
+		return ""
+	}
+	nl := n.nameLen()
+	hdr := (*stringStruct)(unsafe.Pointer(&s))
+	hdr.str = unsafe.Pointer(n.data(3 + nl + 2))
+	hdr.len = tl
+	return s
+}
+
 func (n *name) pkgPath() *string {
 	if *n.data(0)&(1<<2) == 0 {
 		return nil
@@ -281,3 +383,200 @@ func (n *name) pkgPath() *string {
 	off = int(round(uintptr(off), sys.PtrSize))
 	return *(**string)(unsafe.Pointer(n.data(off)))
 }
+
+// typelinksinit scans the types from extra modules and builds the
+// moduledata typemap used to de-duplicate type pointers.
+func typelinksinit() {
+	if firstmoduledata.next == nil {
+		return
+	}
+	typehash := make(map[uint32][]*_type)
+
+	modules := []*moduledata{}
+	for md := &firstmoduledata; md != nil; md = md.next {
+		modules = append(modules, md)
+	}
+	prev, modules := modules[len(modules)-1], modules[:len(modules)-1]
+	for len(modules) > 0 {
+		// Collect types from the previous module into typehash.
+	collect:
+		for _, tl := range prev.typelinks {
+			var t *_type
+			if prev.typemap == nil {
+				t = (*_type)(unsafe.Pointer(prev.types + uintptr(tl)))
+			} else {
+				t = prev.typemap[typeOff(tl)]
+			}
+			// Add to typehash if not seen before.
+			tlist := typehash[t.hash]
+			for _, tcur := range tlist {
+				if tcur == t {
+					continue collect
+				}
+			}
+			typehash[t.hash] = append(tlist, t)
+		}
+
+		// If any of this module's typelinks match a type from a
+		// prior module, prefer that prior type by adding the offset
+		// to this module's typemap.
+		md := modules[len(modules)-1]
+		md.typemap = make(map[typeOff]*_type, len(md.typelinks))
+		for _, tl := range md.typelinks {
+			t := (*_type)(unsafe.Pointer(md.types + uintptr(tl)))
+			for _, candidate := range typehash[t.hash] {
+				if typesEqual(t, candidate) {
+					t = candidate
+					break
+				}
+			}
+			md.typemap[typeOff(tl)] = t
+		}
+
+		prev, modules = md, modules[:len(modules)-1]
+	}
+}
+
+// typesEqual reports whether two types are equal.
+//
+// Everywhere in the runtime and reflect packages, it is assumed that
+// there is exactly one *_type per Go type, so that pointer equality
+// can be used to test if types are equal. There is one place that
+// breaks this assumption: buildmode=shared. In this case a type can
+// appear as two different pieces of memory. This is hidden from the
+// runtime and reflect package by the per-module typemap built in
+// typelinksinit. It uses typesEqual to map types from later modules
+// back into earlier ones.
+//
+// Only typelinksinit needs this function.
+func typesEqual(t, v *_type) bool {
+	if t == v {
+		return true
+	}
+	kind := t.kind & kindMask
+	if kind != v.kind&kindMask {
+		return false
+	}
+	if t._string != v._string {
+		return false
+	}
+	ut := t.uncommon()
+	uv := v.uncommon()
+	if ut != nil || uv != nil {
+		if ut == nil || uv == nil {
+			return false
+		}
+		if !pkgPathEqual(ut.pkgpath, uv.pkgpath) {
+			return false
+		}
+	}
+	if kindBool <= kind && kind <= kindComplex128 {
+		return true
+	}
+	switch kind {
+	case kindString, kindUnsafePointer:
+		return true
+	case kindArray:
+		at := (*arraytype)(unsafe.Pointer(t))
+		av := (*arraytype)(unsafe.Pointer(v))
+		return typesEqual(at.elem, av.elem) && at.len == av.len
+	case kindChan:
+		ct := (*chantype)(unsafe.Pointer(t))
+		cv := (*chantype)(unsafe.Pointer(v))
+		return ct.dir == cv.dir && typesEqual(ct.elem, cv.elem)
+	case kindFunc:
+		ft := (*functype)(unsafe.Pointer(t))
+		fv := (*functype)(unsafe.Pointer(v))
+		if ft.outCount != fv.outCount || ft.inCount != fv.inCount {
+			return false
+		}
+		tin, vin := ft.in(), fv.in()
+		for i := 0; i < len(tin); i++ {
+			if !typesEqual(tin[i], vin[i]) {
+				return false
+			}
+		}
+		tout, vout := ft.out(), fv.out()
+		for i := 0; i < len(tout); i++ {
+			if !typesEqual(tout[i], vout[i]) {
+				return false
+			}
+		}
+		return true
+	case kindInterface:
+		it := (*interfacetype)(unsafe.Pointer(t))
+		iv := (*interfacetype)(unsafe.Pointer(v))
+		if !pkgPathEqual(it.pkgpath, iv.pkgpath) {
+			return false
+		}
+		if len(it.mhdr) != len(iv.mhdr) {
+			return false
+		}
+		for i := range it.mhdr {
+			tm := &it.mhdr[i]
+			vm := &iv.mhdr[i]
+			if tm.name.name() != vm.name.name() {
+				return false
+			}
+			if !pkgPathEqual(tm.name.pkgPath(), vm.name.pkgPath()) {
+				return false
+			}
+			if !typesEqual(tm._type, vm._type) {
+				return false
+			}
+		}
+		return true
+	case kindMap:
+		mt := (*maptype)(unsafe.Pointer(t))
+		mv := (*maptype)(unsafe.Pointer(v))
+		return typesEqual(mt.key, mv.key) && typesEqual(mt.elem, mv.elem)
+	case kindPtr:
+		pt := (*ptrtype)(unsafe.Pointer(t))
+		pv := (*ptrtype)(unsafe.Pointer(v))
+		return typesEqual(pt.elem, pv.elem)
+	case kindSlice:
+		st := (*slicetype)(unsafe.Pointer(t))
+		sv := (*slicetype)(unsafe.Pointer(v))
+		return typesEqual(st.elem, sv.elem)
+	case kindStruct:
+		st := (*structtype)(unsafe.Pointer(t))
+		sv := (*structtype)(unsafe.Pointer(v))
+		if len(st.fields) != len(sv.fields) {
+			return false
+		}
+		for i := range st.fields {
+			tf := &st.fields[i]
+			vf := &sv.fields[i]
+			if tf.name.name() != vf.name.name() {
+				return false
+			}
+			if !pkgPathEqual(tf.name.pkgPath(), vf.name.pkgPath()) {
+				return false
+			}
+			if !typesEqual(tf.typ, vf.typ) {
+				return false
+			}
+			if tf.name.tag() != vf.name.tag() {
+				return false
+			}
+			if tf.offset != vf.offset {
+				return false
+			}
+		}
+		return true
+	default:
+		println("runtime: impossible type kind", kind)
+		throw("runtime: impossible type kind")
+		return false
+	}
+}
+
+func pkgPathEqual(p, q *string) bool {
+	if p == q {
+		return true
+	}
+	if p == nil || q == nil {
+		return false
+	}
+	return *p == *q
+}
-- 
cgit v1.3


From a3703618eadeb74b60f2cb9a23fabe178d4b141d Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 5 Apr 2016 15:29:14 +0200
Subject: runtime: use per-goroutine sequence numbers in tracer

Currently tracer uses global sequencer and it introduces
significant slowdown on parallel machines (up to 10x).
Replace the global sequencer with per-goroutine sequencer.

If we assign per-goroutine sequence numbers to only 3 types
of events (start, unblock and syscall exit), it is enough to
restore consistent partial ordering of all events. Even these
events don't need sequence numbers all the time (if goroutine
starts on the same P where it was unblocked, then start does
not need sequence number).
The burden of restoring the order is put on trace parser.
Details of the algorithm are described in the comments.

On http benchmark with GOMAXPROCS=48:
no tracing: 5026 ns/op
tracing: 27803 ns/op (+453%)
with this change: 6369 ns/op (+26%, mostly for traceback)

Also trace size is reduced by ~22%. Average event size before: 4.63
bytes/event, after: 3.62 bytes/event.

Besides running trace tests, I've also tested with manually broken
cputicks (random skew for each event, per-P skew and episodic random skew).
In all cases broken timestamps were detected and no test failures.

Change-Id: I078bde421ccc386a66f6c2051ab207bcd5613efa
Reviewed-on: https://go-review.googlesource.com/21512
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/internal/trace/order.go       | 278 ++++++++++++++++++++++++++++++++++++++
 src/internal/trace/parser.go      | 208 +++++++++++++++-------------
 src/internal/trace/parser_test.go |   8 +-
 src/runtime/proc.go               |  25 +---
 src/runtime/runtime2.go           |  21 +--
 src/runtime/trace.go              | 121 +++++++++--------
 6 files changed, 478 insertions(+), 183 deletions(-)
 create mode 100644 src/internal/trace/order.go

(limited to 'src/runtime/proc.go')

diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go
new file mode 100644
index 0000000000..f9ec44c745
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,278 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"fmt"
+	"sort"
+)
+
+type eventBatch struct {
+	events   []*Event
+	selected bool
+}
+
+type orderEvent struct {
+	ev    *Event
+	batch int
+	g     uint64
+	init  gState
+	next  gState
+}
+
+type gStatus int
+
+type gState struct {
+	seq    uint64
+	status gStatus
+}
+
+const (
+	gDead gStatus = iota
+	gRunnable
+	gRunning
+	gWaiting
+
+	unordered = ^uint64(0)
+	garbage   = ^uint64(0) - 1
+	noseq     = ^uint64(0)
+	seqinc    = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+	pending := 0
+	var batches []*eventBatch
+	for _, v := range m {
+		pending += len(v)
+		batches = append(batches, &eventBatch{v, false})
+	}
+	gs := make(map[uint64]gState)
+	var frontier []orderEvent
+	for ; pending != 0; pending-- {
+		for i, b := range batches {
+			if b.selected || len(b.events) == 0 {
+				continue
+			}
+			ev := b.events[0]
+			g, init, next := stateTransition(ev)
+			if !transitionReady(g, gs[g], init) {
+				continue
+			}
+			frontier = append(frontier, orderEvent{ev, i, g, init, next})
+			b.events = b.events[1:]
+			b.selected = true
+			// Get rid of "Local" events, they are intended merely for ordering.
+			switch ev.Type {
+			case EvGoStartLocal:
+				ev.Type = EvGoStart
+			case EvGoUnblockLocal:
+				ev.Type = EvGoUnblock
+			case EvGoSysExitLocal:
+				ev.Type = EvGoSysExit
+			}
+		}
+		if len(frontier) == 0 {
+			return nil, fmt.Errorf("no consistent ordering of events possible")
+		}
+		sort.Sort(orderEventList(frontier))
+		f := frontier[0]
+		frontier[0] = frontier[len(frontier)-1]
+		frontier = frontier[:len(frontier)-1]
+		events = append(events, f.ev)
+		transition(gs, f.g, f.init, f.next)
+		if !batches[f.batch].selected {
+			panic("frontier batch is not selected")
+		}
+		batches[f.batch].selected = false
+	}
+
+	// At this point we have a consistent stream of events.
+	// Make sure time stamps respect the ordering.
+	// The tests will skip (not fail) the test case if they see this error.
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+
+	// The last part is giving correct timestamps to EvGoSysExit events.
+	// The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+	// is potentially acquired long before event emission. So far we've used
+	// timestamp of event emission (ev.Ts).
+	// We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+	// seemingly broken timestamps (misplaced event).
+	// We also can't simply update the timestamp and resort events, because
+	// if timestamps are broken we will misplace the event and later report
+	// logically broken trace (instead of reporting broken timestamps).
+	lastSysBlock := make(map[uint64]int64)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoSysBlock, EvGoInSyscall:
+			lastSysBlock[ev.G] = ev.Ts
+		case EvGoSysExit:
+			ts := int64(ev.Args[2])
+			if ts == 0 {
+				continue
+			}
+			block := lastSysBlock[ev.G]
+			if block == 0 {
+				return nil, fmt.Errorf("stray syscall exit")
+			}
+			if ts < block {
+				return nil, ErrTimeOrder
+			}
+			ev.Ts = ts
+		}
+	}
+	sort.Sort(eventList(events))
+
+	return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+	switch ev.Type {
+	case EvGoCreate:
+		g = ev.Args[0]
+		init = gState{0, gDead}
+		next = gState{1, gRunnable}
+	case EvGoWaiting, EvGoInSyscall:
+		g = ev.G
+		init = gState{1, gRunnable}
+		next = gState{2, gWaiting}
+	case EvGoStart:
+		g = ev.G
+		init = gState{ev.Args[1], gRunnable}
+		next = gState{ev.Args[1] + 1, gRunning}
+	case EvGoStartLocal:
+		// noseq means that this event is ready for merging as soon as
+		// frontier reaches it (EvGoStartLocal is emitted on the same P
+		// as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+		// is already merged).
+		// seqinc is a stub for cases when event increments g sequence,
+		// but since we don't know current seq we also don't know next seq.
+		g = ev.G
+		init = gState{noseq, gRunnable}
+		next = gState{seqinc, gRunning}
+	case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+		EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep, EvGoSysBlock:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gWaiting}
+	case EvGoSched, EvGoPreempt:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gRunnable}
+	case EvGoUnblock, EvGoSysExit:
+		g = ev.Args[0]
+		init = gState{ev.Args[1], gWaiting}
+		next = gState{ev.Args[1] + 1, gRunnable}
+	case EvGoUnblockLocal, EvGoSysExitLocal:
+		g = ev.Args[0]
+		init = gState{noseq, gWaiting}
+		next = gState{seqinc, gRunnable}
+	case EvGCStart:
+		g = garbage
+		init = gState{ev.Args[0], gDead}
+		next = gState{ev.Args[0] + 1, gDead}
+	default:
+		// no ordering requirements
+		g = unordered
+	}
+	return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+	return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+	if g == unordered {
+		return
+	}
+	curr := gs[g]
+	if !transitionReady(g, curr, init) {
+		panic("event sequences are broken")
+	}
+	switch next.seq {
+	case noseq:
+		next.seq = curr.seq
+	case seqinc:
+		next.seq = curr.seq + 1
+	}
+	gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+	for _, batch := range m {
+		events = append(events, batch...)
+	}
+	for _, ev := range events {
+		if ev.Type == EvGoSysExit {
+			// EvGoSysExit emission is delayed until the thread has a P.
+			// Give it the real sequence number and time stamp.
+			ev.seq = int64(ev.Args[1])
+			if ev.Args[2] != 0 {
+				ev.Ts = int64(ev.Args[2])
+			}
+		}
+	}
+	sort.Sort(eventSeqList(events))
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+	return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+	return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+	return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+	return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+	return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+	return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+	return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index d279ddeacf..e6f29445c1 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"os"
 	"os/exec"
-	"sort"
 	"strconv"
 	"strings"
 )
@@ -135,7 +134,12 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 		}
 		off += n
 		typ := buf[0] << 2 >> 2
-		narg := buf[0] >> 6
+		narg := buf[0]>>6 + 1
+		inlineArgs := byte(4)
+		if ver < 1007 {
+			narg++
+			inlineArgs++
+		}
 		if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
 			err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
 			return
@@ -180,8 +184,8 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 			continue
 		}
 		ev := rawEvent{typ: typ, off: off0}
-		if narg < 3 {
-			for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
+		if narg < inlineArgs {
+			for i := 0; i < int(narg); i++ {
 				var v uint64
 				v, off, err = readVal(r, off)
 				if err != nil {
@@ -191,7 +195,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
 				ev.args = append(ev.args, v)
 			}
 		} else {
-			// If narg == 3, the first value is length of the event in bytes.
+			// More than inlineArgs args, the first value is length of the event in bytes.
 			var v uint64
 			v, off, err = readVal(r, off)
 			if err != nil {
@@ -250,34 +254,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 	var lastP int
 	lastGs := make(map[int]uint64) // last goroutine running on P
 	stacks = make(map[uint64][]*Frame)
+	batches := make(map[int][]*Event) // events by P
 	for _, raw := range rawEvents {
 		desc := EventDescriptions[raw.typ]
 		if desc.Name == "" {
 			err = fmt.Errorf("missing description for event type %v", raw.typ)
 			return
 		}
-		if raw.typ != EvStack {
-			narg := len(desc.Args)
-			if desc.Stack {
-				narg++
-			}
-			if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
-				narg++ // sequence number
-				narg++ // timestamp
-			}
-			if len(raw.args) != narg {
-				err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
-					desc.Name, raw.off, narg, len(raw.args))
-				return
-			}
+		narg := argNum(raw, ver)
+		if len(raw.args) != narg {
+			err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+				desc.Name, raw.off, narg, len(raw.args))
+			return
 		}
 		switch raw.typ {
 		case EvBatch:
 			lastGs[lastP] = lastG
 			lastP = int(raw.args[0])
 			lastG = lastGs[lastP]
-			lastSeq = int64(raw.args[1])
-			lastTs = int64(raw.args[2])
+			if ver < 1007 {
+				lastSeq = int64(raw.args[1])
+				lastTs = int64(raw.args[2])
+			} else {
+				lastTs = int64(raw.args[1])
+			}
 		case EvFrequency:
 			ticksPerSec = int64(raw.args[0])
 			if ticksPerSec <= 0 {
@@ -328,18 +328,26 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 			}
 		default:
 			e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
-			e.seq = lastSeq + int64(raw.args[0])
-			e.Ts = lastTs + int64(raw.args[1])
-			lastSeq = e.seq
-			lastTs = e.Ts
-			for i := range desc.Args {
-				e.Args[i] = raw.args[i+2]
+			var argOffset int
+			if ver < 1007 {
+				e.seq = lastSeq + int64(raw.args[0])
+				e.Ts = lastTs + int64(raw.args[1])
+				lastSeq = e.seq
+				argOffset = 2
+			} else {
+				e.Ts = lastTs + int64(raw.args[0])
+				argOffset = 1
 			}
-			if desc.Stack {
-				e.StkID = raw.args[len(desc.Args)+2]
+			lastTs = e.Ts
+			for i := argOffset; i < narg; i++ {
+				if i == narg-1 && desc.Stack {
+					e.StkID = raw.args[i]
+				} else {
+					e.Args[i-argOffset] = raw.args[i]
+				}
 			}
 			switch raw.typ {
-			case EvGoStart:
+			case EvGoStart, EvGoStartLocal:
 				lastG = e.Args[0]
 				e.G = lastG
 			case EvGCStart, EvGCDone, EvGCScanStart, EvGCScanDone:
@@ -349,28 +357,30 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 				EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
 				EvGoSysBlock:
 				lastG = 0
-			case EvGoSysExit:
-				// EvGoSysExit emission is delayed until the thread has a P.
-				// Give it the real sequence number and time stamp.
-				e.seq = int64(e.Args[1])
-				if e.Args[2] != 0 {
-					e.Ts = int64(e.Args[2])
-				}
+			case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+				e.G = e.Args[0]
 			}
-			events = append(events, e)
+			batches[lastP] = append(batches[lastP], e)
 		}
 	}
-	if len(events) == 0 {
+	if len(batches) == 0 {
 		err = fmt.Errorf("trace is empty")
 		return
 	}
-
-	// Sort by sequence number and translate cpu ticks to real time.
-	sort.Sort(eventList(events))
 	if ticksPerSec == 0 {
 		err = fmt.Errorf("no EvFrequency event")
 		return
 	}
+	if ver < 1007 {
+		events, err = order1005(batches)
+	} else {
+		events, err = order1007(batches)
+	}
+	if err != nil {
+		return
+	}
+
+	// Translate cpu ticks to real time.
 	minTs := events[0].Ts
 	// Use floating point to avoid integer overflows.
 	freq := 1e9 / float64(ticksPerSec)
@@ -382,7 +392,6 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
 		}
 		if ev.Type == EvGoSysExit {
 			ev.P = SyscallP
-			ev.G = ev.Args[0]
 		}
 	}
 
@@ -543,19 +552,15 @@ func postProcessTrace(ver int, events []*Event) error {
 			p.evSweep.Link = ev
 			p.evSweep = nil
 		case EvGoWaiting:
-			g1 := gs[ev.Args[0]]
-			if g1.state != gRunnable {
-				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
 			}
-			g1.state = gWaiting
-			gs[ev.Args[0]] = g1
+			g.state = gWaiting
 		case EvGoInSyscall:
-			g1 := gs[ev.Args[0]]
-			if g1.state != gRunnable {
-				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
 			}
-			g1.state = gWaiting
-			gs[ev.Args[0]] = g1
+			g.state = gWaiting
 		case EvGoCreate:
 			if err := checkRunning(p, g, ev, true); err != nil {
 				return err
@@ -666,18 +671,6 @@ func postProcessTrace(ver int, events []*Event) error {
 	// TODO(dvyukov): restore stacks for EvGoStart events.
 	// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
 
-	// Last, after all the other consistency checks,
-	// make sure time stamps respect sequence numbers.
-	// The tests will skip (not fail) the test case if they see this error,
-	// so check everything else that could possibly be wrong first.
-	lastTs := int64(0)
-	for _, ev := range events {
-		if ev.Ts < lastTs {
-			return ErrTimeOrder
-		}
-		lastTs = ev.Ts
-	}
-
 	return nil
 }
 
@@ -773,30 +766,51 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
 	return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
 }
 
-type eventList []*Event
-
-func (l eventList) Len() int {
-	return len(l)
-}
-
-func (l eventList) Less(i, j int) bool {
-	return l[i].seq < l[j].seq
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+	for _, ev := range events {
+		PrintEvent(ev)
+	}
 }
 
-func (l eventList) Swap(i, j int) {
-	l[i], l[j] = l[j], l[i]
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+	desc := EventDescriptions[ev.Type]
+	fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+	for i, a := range desc.Args {
+		fmt.Printf(" %v=%v", a, ev.Args[i])
+	}
+	fmt.Printf("\n")
 }
 
-// Print dumps events to stdout. For debugging.
-func Print(events []*Event) {
-	for _, ev := range events {
-		desc := EventDescriptions[ev.Type]
-		fmt.Printf("%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
-		for i, a := range desc.Args {
-			fmt.Printf(" %v=%v", a, ev.Args[i])
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+	desc := EventDescriptions[raw.typ]
+	if raw.typ == EvStack {
+		return len(raw.args)
+	}
+	narg := len(desc.Args)
+	if desc.Stack {
+		narg++
+	}
+	switch raw.typ {
+	case EvBatch, EvFrequency, EvTimerGoroutine:
+		if ver < 1007 {
+			narg++ // there was an unused arg before 1.7
+		}
+	case EvGCStart, EvGoStart, EvGoUnblock:
+		if ver < 1007 {
+			narg-- // 1.7 added an additional seq arg
+		}
+		fallthrough
+	default:
+		narg++ // timestamp
+		if ver < 1007 {
+			narg++ // sequence
 		}
-		fmt.Printf("\n")
 	}
+	return narg
 }
 
 // Event types in the trace.
@@ -809,21 +823,21 @@ const (
 	EvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	EvProcStart      = 5  // start of P [timestamp, thread id]
 	EvProcStop       = 6  // stop of P [timestamp]
-	EvGCStart        = 7  // GC start [timestamp, stack id]
+	EvGCStart        = 7  // GC start [timestamp, seq, stack id]
 	EvGCDone         = 8  // GC done [timestamp]
 	EvGCScanStart    = 9  // GC scan start [timestamp]
 	EvGCScanDone     = 10 // GC scan done [timestamp]
 	EvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	EvGCSweepDone    = 12 // GC sweep done [timestamp]
 	EvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+	EvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
 	EvGoEnd          = 15 // goroutine ends [timestamp]
 	EvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
 	EvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
 	EvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
 	EvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
 	EvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-	EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+	EvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
 	EvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
 	EvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
 	EvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -831,7 +845,7 @@ const (
 	EvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
 	EvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
 	EvGoSysCall      = 28 // syscall enter [timestamp, stack]
-	EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+	EvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
 	EvGoSysBlock     = 30 // syscall blocks [timestamp]
 	EvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
 	EvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -840,7 +854,10 @@ const (
 	EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	EvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
 	EvString         = 37 // string dictionary entry [ID, length, string]
-	EvCount          = 38
+	EvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	EvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	EvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	EvCount          = 41
 )
 
 var EventDescriptions = [EvCount]struct {
@@ -850,27 +867,27 @@ var EventDescriptions = [EvCount]struct {
 	Args       []string
 }{
 	EvNone:           {"None", 1005, false, []string{}},
-	EvBatch:          {"Batch", 1005, false, []string{"p", "seq", "ticks"}},
-	EvFrequency:      {"Frequency", 1005, false, []string{"freq", "unused"}},
+	EvBatch:          {"Batch", 1005, false, []string{"p", "ticks"}}, // in 1.5 format it was {"p", "seq", "ticks"}
+	EvFrequency:      {"Frequency", 1005, false, []string{"freq"}},   // in 1.5 format it was {"freq", "unused"}
 	EvStack:          {"Stack", 1005, false, []string{"id", "siz"}},
 	EvGomaxprocs:     {"Gomaxprocs", 1005, true, []string{"procs"}},
 	EvProcStart:      {"ProcStart", 1005, false, []string{"thread"}},
 	EvProcStop:       {"ProcStop", 1005, false, []string{}},
-	EvGCStart:        {"GCStart", 1005, true, []string{}},
+	EvGCStart:        {"GCStart", 1005, true, []string{"seq"}}, // in 1.5 format it was {}
 	EvGCDone:         {"GCDone", 1005, false, []string{}},
 	EvGCScanStart:    {"GCScanStart", 1005, false, []string{}},
 	EvGCScanDone:     {"GCScanDone", 1005, false, []string{}},
 	EvGCSweepStart:   {"GCSweepStart", 1005, true, []string{}},
 	EvGCSweepDone:    {"GCSweepDone", 1005, false, []string{}},
 	EvGoCreate:       {"GoCreate", 1005, true, []string{"g", "stack"}},
-	EvGoStart:        {"GoStart", 1005, false, []string{"g"}},
+	EvGoStart:        {"GoStart", 1005, false, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
 	EvGoEnd:          {"GoEnd", 1005, false, []string{}},
 	EvGoStop:         {"GoStop", 1005, true, []string{}},
 	EvGoSched:        {"GoSched", 1005, true, []string{}},
 	EvGoPreempt:      {"GoPreempt", 1005, true, []string{}},
 	EvGoSleep:        {"GoSleep", 1005, true, []string{}},
 	EvGoBlock:        {"GoBlock", 1005, true, []string{}},
-	EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g"}},
+	EvGoUnblock:      {"GoUnblock", 1005, true, []string{"g", "seq"}}, // in 1.5 format it was {"g"}
 	EvGoBlockSend:    {"GoBlockSend", 1005, true, []string{}},
 	EvGoBlockRecv:    {"GoBlockRecv", 1005, true, []string{}},
 	EvGoBlockSelect:  {"GoBlockSelect", 1005, true, []string{}},
@@ -884,7 +901,10 @@ var EventDescriptions = [EvCount]struct {
 	EvGoInSyscall:    {"GoInSyscall", 1005, false, []string{"g"}},
 	EvHeapAlloc:      {"HeapAlloc", 1005, false, []string{"mem"}},
 	EvNextGC:         {"NextGC", 1005, false, []string{"mem"}},
-	EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g", "unused"}},
+	EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g"}}, // in 1.5 format it was {"g", "unused"}
 	EvFutileWakeup:   {"FutileWakeup", 1005, false, []string{}},
 	EvString:         {"String", 1007, false, []string{}},
+	EvGoStartLocal:   {"GoStartLocal", 1007, false, []string{"g"}},
+	EvGoUnblockLocal: {"GoUnblockLocal", 1007, true, []string{"g"}},
+	EvGoSysExitLocal: {"GoSysExitLocal", 1007, false, []string{"g", "ts"}},
 }
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
index 337d5a85d7..340f106484 100644
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -89,10 +89,10 @@ func TestParseVersion(t *testing.T) {
 func TestTimestampOverflow(t *testing.T) {
 	// Test that parser correctly handles large timestamps (long tracing).
 	w := newWriter()
-	w.emit(EvBatch, 0, 0, 0)
-	w.emit(EvFrequency, 1e9, 0)
+	w.emit(EvBatch, 0, 0)
+	w.emit(EvFrequency, 1e9)
 	for ts := uint64(1); ts < 1e16; ts *= 2 {
-		w.emit(EvGoCreate, 1, ts, ts, 1, 0)
+		w.emit(EvGoCreate, ts, ts, 0, 0)
 	}
 	if _, err := Parse(w, ""); err != nil {
 		t.Fatalf("failed to parse: %v", err)
@@ -110,7 +110,7 @@ func newWriter() *writer {
 }
 
 func (w *writer) emit(typ byte, args ...uint64) {
-	nargs := byte(len(args)) - 2
+	nargs := byte(len(args)) - 1
 	if nargs > 3 {
 		nargs = 3
 	}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 98a986cd63..a847823da4 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1796,23 +1796,7 @@ func execute(gp *g, inheritTime bool) {
 		// GoSysExit has to happen when we have a P, but before GoStart.
 		// So we emit it here.
 		if gp.syscallsp != 0 && gp.sysblocktraced {
-			// Since gp.sysblocktraced is true, we must emit an event.
-			// There is a race between the code that initializes sysexitseq
-			// and sysexitticks (in exitsyscall, which runs without a P,
-			// and therefore is not stopped with the rest of the world)
-			// and the code that initializes a new trace.
-			// The recorded sysexitseq and sysexitticks must therefore
-			// be treated as "best effort". If they are valid for this trace,
-			// then great, use them for greater accuracy.
-			// But if they're not valid for this trace, assume that the
-			// trace was started after the actual syscall exit (but before
-			// we actually managed to start the goroutine, aka right now),
-			// and assign a fresh time stamp to keep the log consistent.
-			seq, ts := gp.sysexitseq, gp.sysexitticks
-			if seq == 0 || int64(seq)-int64(trace.seqStart) < 0 {
-				seq, ts = tracestamp()
-			}
-			traceGoSysExit(seq, ts)
+			traceGoSysExit(gp.sysexitticks)
 		}
 		traceGoStart()
 	}
@@ -2481,7 +2465,6 @@ func exitsyscall(dummy int32) {
 	}
 
 	_g_.sysexitticks = 0
-	_g_.sysexitseq = 0
 	if trace.enabled {
 		// Wait till traceGoSysBlock event is emitted.
 		// This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -2492,7 +2475,7 @@ func exitsyscall(dummy int32) {
 		// Tracing code can invoke write barriers that cannot run without a P.
 		// So instead we remember the syscall exit time and emit the event
 		// in execute when we have a P.
-		_g_.sysexitseq, _g_.sysexitticks = tracestamp()
+		_g_.sysexitticks = cputicks()
 	}
 
 	_g_.m.locks--
@@ -2540,7 +2523,7 @@ func exitsyscallfast() bool {
 					// Denote blocking of the new syscall.
 					traceGoSysBlock(_g_.m.p.ptr())
 					// Denote completion of the current syscall.
-					traceGoSysExit(tracestamp())
+					traceGoSysExit(0)
 				})
 			}
 			_g_.m.p.ptr().syscalltick++
@@ -2564,7 +2547,7 @@ func exitsyscallfast() bool {
 						osyield()
 					}
 				}
-				traceGoSysExit(tracestamp())
+				traceGoSysExit(0)
 			}
 		})
 		if ok {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0fdea400de..8cfe6b06e6 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -332,16 +332,17 @@ type g struct {
 	waitsince      int64  // approx time when the g become blocked
 	waitreason     string // if status==Gwaiting
 	schedlink      guintptr
-	preempt        bool   // preemption signal, duplicates stackguard0 = stackpreempt
-	paniconfault   bool   // panic (instead of crash) on unexpected fault address
-	preemptscan    bool   // preempted g does scan for gc
-	gcscandone     bool   // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool   // false at start of gc cycle, true if G has not run since last scan
-	throwsplit     bool   // must not split stack
-	raceignore     int8   // ignore race detection events
-	sysblocktraced bool   // StartTrace has emitted EvGoInSyscall about this goroutine
-	sysexitticks   int64  // cputicks when syscall has returned (for tracing)
-	sysexitseq     uint64 // trace seq when syscall has returned (for tracing)
+	preempt        bool     // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault   bool     // panic (instead of crash) on unexpected fault address
+	preemptscan    bool     // preempted g does scan for gc
+	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
+	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan
+	throwsplit     bool     // must not split stack
+	raceignore     int8     // ignore race detection events
+	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
+	sysexitticks   int64    // cputicks when syscall has returned (for tracing)
+	traceseq       uint64   // trace event sequencer
+	tracelastp     puintptr // last P emitted an event for this goroutine
 	lockedm        *m
 	sig            uint32
 	writebuf       []byte
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 06fbdfac94..092f941f0c 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -13,7 +13,6 @@
 package runtime
 
 import (
-	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -27,21 +26,21 @@ const (
 	traceEvGomaxprocs     = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	traceEvProcStart      = 5  // start of P [timestamp, thread id]
 	traceEvProcStop       = 6  // stop of P [timestamp]
-	traceEvGCStart        = 7  // GC start [timestamp, stack id]
+	traceEvGCStart        = 7  // GC start [timestamp, seq, stack id]
 	traceEvGCDone         = 8  // GC done [timestamp]
 	traceEvGCScanStart    = 9  // GC scan start [timestamp]
 	traceEvGCScanDone     = 10 // GC scan done [timestamp]
 	traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
 	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
-	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id]
+	traceEvGoStart        = 14 // goroutine starts running [timestamp, goroutine id, seq]
 	traceEvGoEnd          = 15 // goroutine ends [timestamp]
 	traceEvGoStop         = 16 // goroutine stops (like in select{}) [timestamp, stack]
 	traceEvGoSched        = 17 // goroutine calls Gosched [timestamp, stack]
 	traceEvGoPreempt      = 18 // goroutine is preempted [timestamp, stack]
 	traceEvGoSleep        = 19 // goroutine calls Sleep [timestamp, stack]
 	traceEvGoBlock        = 20 // goroutine blocks [timestamp, stack]
-	traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+	traceEvGoUnblock      = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
 	traceEvGoBlockSend    = 22 // goroutine blocks on chan send [timestamp, stack]
 	traceEvGoBlockRecv    = 23 // goroutine blocks on chan recv [timestamp, stack]
 	traceEvGoBlockSelect  = 24 // goroutine blocks on select [timestamp, stack]
@@ -49,7 +48,7 @@ const (
 	traceEvGoBlockCond    = 26 // goroutine blocks on Cond [timestamp, stack]
 	traceEvGoBlockNet     = 27 // goroutine blocks on network [timestamp, stack]
 	traceEvGoSysCall      = 28 // syscall enter [timestamp, stack]
-	traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+	traceEvGoSysExit      = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
 	traceEvGoSysBlock     = 30 // syscall blocks [timestamp]
 	traceEvGoWaiting      = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
 	traceEvGoInSyscall    = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -58,7 +57,10 @@ const (
 	traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
 	traceEvFutileWakeup   = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
 	traceEvString         = 37 // string dictionary entry [ID, length, string]
-	traceEvCount          = 38
+	traceEvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	traceEvCount          = 41
 )
 
 const (
@@ -105,6 +107,7 @@ var trace struct {
 	ticksEnd      int64       // cputicks when tracing was stopped
 	timeStart     int64       // nanotime when tracing was started
 	timeEnd       int64       // nanotime when tracing was stopped
+	seqGC         uint64      // GC start/done sequencer
 	reading       traceBufPtr // buffer currently handed off to user
 	empty         traceBufPtr // stack of empty buffers
 	fullHead      traceBufPtr // queue of full buffers
@@ -122,31 +125,9 @@ var trace struct {
 	buf     traceBufPtr // global trace buffer, used when running without a p
 }
 
-var traceseq uint64 // global trace sequence number
-
-// tracestamp returns a consistent sequence number, time stamp pair
-// for use in a trace. We need to make sure that time stamp ordering
-// (assuming synchronized CPUs) and sequence ordering match.
-// To do that, we increment traceseq, grab ticks, and increment traceseq again.
-// We treat odd traceseq as a sign that another thread is in the middle
-// of the sequence and spin until it is done.
-// Not splitting stack to avoid preemption, just in case the call sites
-// that used to call xadd64 and cputicks are sensitive to that.
-//go:nosplit
-func tracestamp() (seq uint64, ts int64) {
-	seq = atomic.Load64(&traceseq)
-	for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) {
-		seq = atomic.Load64(&traceseq)
-	}
-	ts = cputicks()
-	atomic.Store64(&traceseq, seq+2)
-	return seq >> 1, ts
-}
-
 // traceBufHeader is per-P tracing buffer.
 type traceBufHeader struct {
 	link      traceBufPtr             // in trace.empty/full
-	lastSeq   uint64                  // sequence number of last event
 	lastTicks uint64                  // when we wrote the last event
 	pos       int                     // next write offset in arr
 	stk       [traceStackSize]uintptr // scratch buffer for traceback
@@ -194,13 +175,6 @@ func StartTrace() error {
 		return errorString("tracing is already enabled")
 	}
 
-	trace.seqStart, trace.ticksStart = tracestamp()
-	trace.timeStart = nanotime()
-	trace.headerWritten = false
-	trace.footerWritten = false
-	trace.strings = make(map[string]uint64)
-	trace.stringSeq = 0
-
 	// Can't set trace.enabled yet. While the world is stopped, exitsyscall could
 	// already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
 	// That would lead to an inconsistent trace:
@@ -213,12 +187,15 @@ func StartTrace() error {
 	for _, gp := range allgs {
 		status := readgstatus(gp)
 		if status != _Gdead {
-			traceGoCreate(gp, gp.startpc)
+			traceGoCreate(gp, gp.startpc) // also resets gp.traceseq/tracelastp
 		}
 		if status == _Gwaiting {
+			// traceEvGoWaiting is implied to have seq=1.
+			gp.traceseq++
 			traceEvent(traceEvGoWaiting, -1, uint64(gp.goid))
 		}
 		if status == _Gsyscall {
+			gp.traceseq++
 			traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
 		} else {
 			gp.sysblocktraced = false
@@ -226,6 +203,17 @@ func StartTrace() error {
 	}
 	traceProcStart()
 	traceGoStart()
+	// Note: ticksStart needs to be set after we emit traceEvGoInSyscall events.
+	// If we do it the other way around, it is possible that exitsyscall will
+	// query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp.
+	// It will lead to a false conclusion that cputicks is broken.
+	trace.ticksStart = cputicks()
+	trace.timeStart = nanotime()
+	trace.headerWritten = false
+	trace.footerWritten = false
+	trace.strings = make(map[string]uint64)
+	trace.stringSeq = 0
+	trace.seqGC = 0
 	_g_.m.startingtrace = false
 	trace.enabled = true
 
@@ -382,11 +370,9 @@ func ReadTrace() []byte {
 		var data []byte
 		data = append(data, traceEvFrequency|0<<traceArgCountShift)
 		data = traceAppend(data, uint64(freq))
-		data = traceAppend(data, 0)
 		if timers.gp != nil {
 			data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
 			data = traceAppend(data, uint64(timers.gp.goid))
-			data = traceAppend(data, 0)
 		}
 		// This will emit a bunch of full buffers, we will pick them up
 		// on the next iteration.
@@ -494,19 +480,14 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		(*bufp).set(buf)
 	}
 
-	seq, ticksraw := tracestamp()
-	seqDiff := seq - buf.lastSeq
-	ticks := uint64(ticksraw) / traceTickDiv
+	ticks := uint64(cputicks()) / traceTickDiv
 	tickDiff := ticks - buf.lastTicks
 	if buf.pos == 0 {
 		buf.byte(traceEvBatch | 1<<traceArgCountShift)
 		buf.varint(uint64(pid))
-		buf.varint(seq)
 		buf.varint(ticks)
-		seqDiff = 0
 		tickDiff = 0
 	}
-	buf.lastSeq = seq
 	buf.lastTicks = ticks
 	narg := byte(len(args))
 	if skip >= 0 {
@@ -525,7 +506,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		buf.varint(0)
 		lenp = &buf.arr[buf.pos-1]
 	}
-	buf.varint(seqDiff)
 	buf.varint(tickDiff)
 	for _, a := range args {
 		buf.varint(a)
@@ -892,7 +872,8 @@ func traceProcStop(pp *p) {
 }
 
 func traceGCStart() {
-	traceEvent(traceEvGCStart, 3)
+	traceEvent(traceEvGCStart, 3, trace.seqGC)
+	trace.seqGC++
 }
 
 func traceGCDone() {
@@ -916,13 +897,23 @@ func traceGCSweepDone() {
 }
 
 func traceGoCreate(newg *g, pc uintptr) {
+	newg.traceseq = 0
+	newg.tracelastp = getg().m.p
 	// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
 	id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum})
 	traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
 }
 
 func traceGoStart() {
-	traceEvent(traceEvGoStart, -1, uint64(getg().m.curg.goid))
+	_g_ := getg().m.curg
+	_p_ := _g_.m.p
+	_g_.traceseq++
+	if _g_.tracelastp == _p_ {
+		traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
+	} else {
+		_g_.tracelastp = _p_
+		traceEvent(traceEvGoStart, -1, uint64(_g_.goid), _g_.traceseq)
+	}
 }
 
 func traceGoEnd() {
@@ -930,10 +921,14 @@ func traceGoEnd() {
 }
 
 func traceGoSched() {
+	_g_ := getg()
+	_g_.tracelastp = _g_.m.p
 	traceEvent(traceEvGoSched, 1)
 }
 
 func traceGoPreempt() {
+	_g_ := getg()
+	_g_.tracelastp = _g_.m.p
 	traceEvent(traceEvGoPreempt, 1)
 }
 
@@ -945,19 +940,37 @@ func traceGoPark(traceEv byte, skip int, gp *g) {
 }
 
 func traceGoUnpark(gp *g, skip int) {
-	traceEvent(traceEvGoUnblock, skip, uint64(gp.goid))
+	_p_ := getg().m.p
+	gp.traceseq++
+	if gp.tracelastp == _p_ {
+		traceEvent(traceEvGoUnblockLocal, skip, uint64(gp.goid))
+	} else {
+		gp.tracelastp = _p_
+		traceEvent(traceEvGoUnblock, skip, uint64(gp.goid), gp.traceseq)
+	}
 }
 
 func traceGoSysCall() {
 	traceEvent(traceEvGoSysCall, 1)
 }
 
-func traceGoSysExit(seq uint64, ts int64) {
-	if int64(seq)-int64(trace.seqStart) < 0 {
-		// The timestamp was obtained during a previous tracing session, ignore.
-		return
-	}
-	traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
+func traceGoSysExit(ts int64) {
+	if ts != 0 && ts < trace.ticksStart {
+		// There is a race between the code that initializes sysexitticks
+		// (in exitsyscall, which runs without a P, and therefore is not
+		// stopped with the rest of the world) and the code that initializes
+		// a new trace. The recorded sysexitticks must therefore be treated
+		// as "best effort". If they are valid for this trace, then great,
+		// use them for greater accuracy. But if they're not valid for this
+		// trace, assume that the trace was started after the actual syscall
+		// exit (but before we actually managed to start the goroutine,
+		// aka right now), and assign a fresh time stamp to keep the log consistent.
+		ts = 0
+	}
+	_g_ := getg().m.curg
+	_g_.traceseq++
+	_g_.tracelastp = _g_.m.p
+	traceEvent(traceEvGoSysExit, -1, uint64(_g_.goid), _g_.traceseq, uint64(ts)/traceTickDiv)
 }
 
 func traceGoSysBlock(pp *p) {
-- 
cgit v1.3


From 1a2cf91f5e9e3dfb0873e61ed6907cc365857f6c Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 16:27:51 -0500
Subject: runtime: split gfree list into with-stacks and without-stacks

Currently all free Gs are added to one list. Split this into two
lists: one for free Gs with cached stacks and one for Gs without
cached stacks.

This lets us preferentially allocate Gs that already have a stack, but
more importantly, it sets us up to free cached G stacks concurrently.

Change-Id: Idbe486f708997e1c9d166662995283f02d1eeb3c
Reviewed-on: https://go-review.googlesource.com/20664
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go     | 34 ++++++++++++++++++++++++++--------
 src/runtime/runtime2.go |  7 ++++---
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index a847823da4..9c840882b6 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2798,8 +2798,13 @@ func gfput(_p_ *p, gp *g) {
 			_p_.gfreecnt--
 			gp = _p_.gfree
 			_p_.gfree = gp.schedlink.ptr()
-			gp.schedlink.set(sched.gfree)
-			sched.gfree = gp
+			if gp.stack.lo == 0 {
+				gp.schedlink.set(sched.gfreeNoStack)
+				sched.gfreeNoStack = gp
+			} else {
+				gp.schedlink.set(sched.gfreeStack)
+				sched.gfreeStack = gp
+			}
 			sched.ngfree++
 		}
 		unlock(&sched.gflock)
@@ -2811,12 +2816,20 @@ func gfput(_p_ *p, gp *g) {
 func gfget(_p_ *p) *g {
 retry:
 	gp := _p_.gfree
-	if gp == nil && sched.gfree != nil {
+	if gp == nil && (sched.gfreeStack != nil || sched.gfreeNoStack != nil) {
 		lock(&sched.gflock)
-		for _p_.gfreecnt < 32 && sched.gfree != nil {
+		for _p_.gfreecnt < 32 {
+			if sched.gfreeStack != nil {
+				// Prefer Gs with stacks.
+				gp = sched.gfreeStack
+				sched.gfreeStack = gp.schedlink.ptr()
+			} else if sched.gfreeNoStack != nil {
+				gp = sched.gfreeNoStack
+				sched.gfreeNoStack = gp.schedlink.ptr()
+			} else {
+				break
+			}
 			_p_.gfreecnt++
-			gp = sched.gfree
-			sched.gfree = gp.schedlink.ptr()
 			sched.ngfree--
 			gp.schedlink.set(_p_.gfree)
 			_p_.gfree = gp
@@ -2853,8 +2866,13 @@ func gfpurge(_p_ *p) {
 		_p_.gfreecnt--
 		gp := _p_.gfree
 		_p_.gfree = gp.schedlink.ptr()
-		gp.schedlink.set(sched.gfree)
-		sched.gfree = gp
+		if gp.stack.lo == 0 {
+			gp.schedlink.set(sched.gfreeNoStack)
+			sched.gfreeNoStack = gp
+		} else {
+			gp.schedlink.set(sched.gfreeStack)
+			sched.gfreeStack = gp
+		}
 		sched.ngfree++
 	}
 	unlock(&sched.gflock)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 8cfe6b06e6..0a988ce469 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -523,9 +523,10 @@ type schedt struct {
 	runqsize int32
 
 	// Global cache of dead G's.
-	gflock mutex
-	gfree  *g
-	ngfree int32
+	gflock       mutex
+	gfreeStack   *g
+	gfreeNoStack *g
+	ngfree       int32
 
 	// Central cache of sudog structs.
 	sudoglock  mutex
-- 
cgit v1.3


From c707d8385639dfda22dc06b112f5f7af78006a1f Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 18 Apr 2016 18:28:36 -0400
Subject: runtime: fix typos in comment about gcscanvalid

Change-Id: Id4ad7ebf88a21eba2bc5714b96570ed5cfaed757
Reviewed-on: https://go-review.googlesource.com/22210
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 9c840882b6..d5acbee0a7 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -643,17 +643,17 @@ func readgstatus(gp *g) uint32 {
 	return atomic.Load(&gp.atomicstatus)
 }
 
-// Ownership of gscanvalid:
+// Ownership of gcscanvalid:
 //
 // If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
-// then gp owns gp.gscanvalid, and other goroutines must not modify it.
+// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
 //
 // Otherwise, a second goroutine can lock the scan state by setting _Gscan
-// in the status bit and then modify gscanvalid, and then unlock the scan state.
+// in the status bit and then modify gcscanvalid, and then unlock the scan state.
 //
 // Note that the first condition implies an exception to the second:
 // if a second goroutine changes gp's status to _Grunning|_Gscan,
-// that second goroutine still does not have the right to modify gscanvalid.
+// that second goroutine still does not have the right to modify gcscanvalid.
 
 // The Gscanstatuses are acting like locks and this releases them.
 // If it proves to be a performance hit we should be able to make these
-- 
cgit v1.3


From 5b765ce310c594276ea919a9cb455cc894fee999 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 11 Mar 2016 14:08:10 -0500
Subject: runtime: don't clear gcscanvalid in casfrom_Gscanstatus

Currently we clear gcscanvalid in both casgstatus and
casfrom_Gscanstatus if the new status is _Grunning. This is very
important to do in casgstatus. However, this is potentially wrong in
casfrom_Gscanstatus because in this case the caller doesn't own gp and
hence the write is racy. Unlike the other _Gscan statuses, during
_Gscanrunning, the G is still running. This does not indicate that
it's transitioning into a running state. The scan simply hasn't
happened yet, so it's neither valid nor invalid.

Conveniently, this also means clearing gcscanvalid is unnecessary in
this case because the G was already in _Grunning, so we can simply
remove this code. What will happen instead is that the G will be
preempted to scan itself, that scan will set gcscanvalid to true, and
then the G will return to _Grunning via casgstatus, clearing
gcscanvalid.

This fix will become necessary shortly when we start keeping track of
the set of G's with dirty stacks, since it will no longer be
idempotent to simply set gcscanvalid to false.

Change-Id: I688c82e6fbf00d5dbbbff49efa66acb99ee86785
Reviewed-on: https://go-review.googlesource.com/20669
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/proc.go  | 3 ---
 src/runtime/stack.go | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index d5acbee0a7..dcdc7bedb8 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -681,9 +681,6 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
 		dumpgstatus(gp)
 		throw("casfrom_Gscanstatus: gp->status is not in scan state")
 	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
-	}
 }
 
 // This will return false if the gp is not in the expected status and the cas fails.
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index dcb1b06dbd..c4b1fb862e 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -1016,6 +1016,7 @@ func newstack() {
 			gp.preemptscan = false
 			gp.preempt = false
 			casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
+			// This clears gcscanvalid.
 			casgstatus(gp, _Gwaiting, _Grunning)
 			gp.stackguard0 = gp.stack.lo + _StackGuard
 			gogo(&gp.sched) // never return
-- 
cgit v1.3


From 2a889b9d931e58166350f785b16edc51e28ef19b Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 4 Mar 2016 11:58:26 -0500
Subject: runtime: make stack re-scan O(# dirty stacks)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the stack re-scan during mark termination is O(# stacks)
because we enqueue a root marking job for every goroutine. It takes
~34ns to process this root marking job for a valid (clean) stack, so
at around 300k goroutines we exceed the 10ms pause goal. A non-trivial
portion of this time is spent simply taking the cache miss to check
the gcscanvalid flag, so simply optimizing the path that handles clean
stacks can only improve this so much.

Fix this by keeping an explicit list of goroutines with dirty stacks
that need to be rescanned. When a goroutine first transitions to
running after a stack scan and marks its stack dirty, it adds itself
to this list. We enqueue root marking jobs only for the goroutines in
this list, so this improves stack re-scanning asymptotically by
completely eliminating time spent on clean goroutines.

This reduces mark termination time for 500k idle goroutines from 15ms
to 238µs. Overall performance effect is negligible.

name \ 95%ile-time/markTerm     old           new         delta
IdleGs/gs:500000/gomaxprocs:12  15000µs ± 0%  238µs ± 5%  -98.41% (p=0.000 n=10+10)

name              old time/op  new time/op  delta
XBenchGarbage-12  2.30ms ± 3%  2.29ms ± 1%  -0.43%  (p=0.049 n=17+18)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.57s ± 3%     2.59s ± 2%    ~     (p=0.141 n=19+20)
Fannkuch11-12                2.09s ± 0%     2.10s ± 1%  +0.53%  (p=0.000 n=19+19)
FmtFprintfEmpty-12          45.3ns ± 3%    45.2ns ± 2%    ~     (p=0.845 n=20+20)
FmtFprintfString-12          129ns ± 0%     127ns ± 0%  -1.55%  (p=0.000 n=16+16)
FmtFprintfInt-12             123ns ± 0%     119ns ± 1%  -3.24%  (p=0.000 n=19+19)
FmtFprintfIntInt-12          195ns ± 1%     189ns ± 1%  -3.11%  (p=0.000 n=17+17)
FmtFprintfPrefixedInt-12     193ns ± 1%     187ns ± 1%  -3.06%  (p=0.000 n=19+19)
FmtFprintfFloat-12           254ns ± 0%     255ns ± 1%  +0.35%  (p=0.001 n=14+17)
FmtManyArgs-12               781ns ± 0%     770ns ± 0%  -1.48%  (p=0.000 n=16+19)
GobDecode-12                7.00ms ± 1%    6.98ms ± 1%    ~     (p=0.563 n=19+19)
GobEncode-12                5.91ms ± 1%    5.92ms ± 0%    ~     (p=0.118 n=19+18)
Gzip-12                      219ms ± 1%     215ms ± 1%  -1.81%  (p=0.000 n=18+18)
Gunzip-12                   37.2ms ± 0%    37.4ms ± 0%  +0.45%  (p=0.000 n=17+19)
HTTPClientServer-12         76.9µs ± 3%    77.5µs ± 2%  +0.81%  (p=0.030 n=20+19)
JSONEncode-12               15.0ms ± 0%    14.8ms ± 1%  -0.88%  (p=0.001 n=15+19)
JSONDecode-12               50.6ms ± 0%    53.2ms ± 2%  +5.07%  (p=0.000 n=17+19)
Mandelbrot200-12            4.05ms ± 0%    4.05ms ± 1%    ~     (p=0.581 n=16+17)
GoParse-12                  3.34ms ± 1%    3.30ms ± 1%  -1.21%  (p=0.000 n=15+20)
RegexpMatchEasy0_32-12      69.6ns ± 1%    69.8ns ± 2%    ~     (p=0.566 n=19+19)
RegexpMatchEasy0_1K-12       238ns ± 1%     236ns ± 0%  -0.91%  (p=0.000 n=17+13)
RegexpMatchEasy1_32-12      69.8ns ± 1%    70.0ns ± 1%  +0.23%  (p=0.026 n=17+16)
RegexpMatchEasy1_1K-12       371ns ± 1%     363ns ± 1%  -2.07%  (p=0.000 n=19+19)
RegexpMatchMedium_32-12      107ns ± 2%     106ns ± 1%  -0.51%  (p=0.031 n=18+20)
RegexpMatchMedium_1K-12     33.0µs ± 0%    32.9µs ± 0%  -0.30%  (p=0.004 n=16+16)
RegexpMatchHard_32-12       1.70µs ± 0%    1.70µs ± 0%  +0.45%  (p=0.000 n=16+17)
RegexpMatchHard_1K-12       51.1µs ± 2%    51.4µs ± 1%  +0.53%  (p=0.000 n=17+19)
Revcomp-12                   378ms ± 1%     385ms ± 1%  +1.92%  (p=0.000 n=19+18)
Template-12                 64.3ms ± 2%    65.0ms ± 2%  +1.09%  (p=0.001 n=19+19)
TimeParse-12                 315ns ± 1%     317ns ± 2%    ~     (p=0.108 n=18+20)
TimeFormat-12                360ns ± 1%     337ns ± 0%  -6.30%  (p=0.000 n=18+13)
[Geo mean]                  51.8µs         51.6µs       -0.48%

Change-Id: Icf8994671476840e3998236e15407a505d4c760c
Reviewed-on: https://go-review.googlesource.com/20700
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mgc.go      |  18 ++++++-
 src/runtime/mgcmark.go  | 133 +++++++++++++++++++++++++++++++++++++++++-------
 src/runtime/proc.go     |  33 +++++++++++-
 src/runtime/runtime2.go |  11 +++-
 4 files changed, 171 insertions(+), 24 deletions(-)

(limited to 'src/runtime/proc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 425ed3a160..328ff4cd88 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -762,7 +762,7 @@ var work struct {
 	alldone note
 
 	// Number of roots of various root types. Set by gcMarkRootPrepare.
-	nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int
+	nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nRescanRoots int
 
 	// markrootDone indicates that roots have been marked at least
 	// once during the current GC cycle. This is checked by root
@@ -830,6 +830,14 @@ var work struct {
 		head, tail guintptr
 	}
 
+	// rescan is a list of G's that need to be rescanned during
+	// mark termination. A G adds itself to this list when it
+	// first invalidates its stack scan.
+	rescan struct {
+		lock mutex
+		list []guintptr
+	}
+
 	// Timing/utilization stats for this cycle.
 	stwprocs, maxprocs                 int32
 	tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start
@@ -1736,14 +1744,22 @@ func gcCopySpans() {
 func gcResetMarkState() {
 	// This may be called during a concurrent phase, so make sure
 	// allgs doesn't change.
+	if !(gcphase == _GCoff || gcphase == _GCmarktermination) {
+		// Accessing gcRescan is unsafe.
+		throw("bad GC phase")
+	}
 	lock(&allglock)
 	for _, gp := range allgs {
 		gp.gcscandone = false  // set to true in gcphasework
 		gp.gcscanvalid = false // stack has not been scanned
+		gp.gcRescan = -1
 		gp.gcAssistBytes = 0
 	}
 	unlock(&allglock)
 
+	// Clear rescan list.
+	work.rescan.list = work.rescan.list[:0]
+
 	work.bytesMarked = 0
 	work.initialHeapLive = memstats.heap_live
 	work.markrootDone = false
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index bad7c7e92b..7f481dee22 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -32,6 +32,8 @@ const (
 //
 // The caller must have call gcCopySpans().
 //
+// The world must be stopped.
+//
 //go:nowritebarrier
 func gcMarkRootPrepare() {
 	// Compute how many data and BSS root blocks there are.
@@ -63,24 +65,31 @@ func gcMarkRootPrepare() {
 		// after concurrent mark. In STW GC, this will happen
 		// during mark termination.
 		work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+
+		// On the first markroot, we need to scan all Gs. Gs
+		// may be created after this point, but it's okay that
+		// we ignore them because they begin life without any
+		// roots, so there's nothing to scan, and any roots
+		// they create during the concurrent phase will be
+		// scanned during mark termination. During mark
+		// termination, allglen isn't changing, so we'll scan
+		// all Gs.
+		work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+		work.nRescanRoots = 0
 	} else {
 		// We've already scanned span roots and kept the scan
 		// up-to-date during concurrent mark.
 		work.nSpanRoots = 0
-	}
 
-	// Snapshot of allglen. During concurrent scan, we just need
-	// to be consistent about how many markroot jobs we create and
-	// how many Gs we check. Gs may be created after this point,
-	// but it's okay that we ignore them because they begin life
-	// without any roots, so there's nothing to scan, and any
-	// roots they create during the concurrent phase will be
-	// scanned during mark termination. During mark termination,
-	// allglen isn't changing, so we'll scan all Gs.
-	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+		// On the second pass of markroot, we're just scanning
+		// dirty stacks. It's safe to access rescan since the
+		// world is stopped.
+		work.nStackRoots = 0
+		work.nRescanRoots = len(work.rescan.list)
+	}
 
 	work.markrootNext = 0
-	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
+	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
 }
 
 // gcMarkRootCheck checks that all roots have been scanned. It is
@@ -92,11 +101,24 @@ func gcMarkRootCheck() {
 	}
 
 	lock(&allglock)
-	// Check that gc work is done.
-	for i := 0; i < work.nStackRoots; i++ {
-		gp := allgs[i]
-		if !gp.gcscandone {
-			throw("scan missed a g")
+	// Check that stacks have been scanned.
+	if gcphase == _GCmarktermination {
+		for i := 0; i < len(allgs); i++ {
+			gp := allgs[i]
+			if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
+				println("gp", gp, "goid", gp.goid,
+					"status", readgstatus(gp),
+					"gcscandone", gp.gcscandone,
+					"gcscanvalid", gp.gcscanvalid)
+				throw("scan missed a g")
+			}
+		}
+	} else {
+		for i := 0; i < work.nStackRoots; i++ {
+			gp := allgs[i]
+			if !gp.gcscandone {
+				throw("scan missed a g")
+			}
 		}
 	}
 	unlock(&allglock)
@@ -109,12 +131,18 @@ var oneptrmask = [...]uint8{1}
 //
 // Preemption must be disabled (because this uses a gcWork).
 //
+// nowritebarrier is only advisory here.
+//
 //go:nowritebarrier
 func markroot(gcw *gcWork, i uint32) {
+	// TODO(austin): This is a bit ridiculous. Compute and store
+	// the bases in gcMarkRootPrepare instead of the counts.
 	baseData := uint32(fixedRootCount)
 	baseBSS := baseData + uint32(work.nDataRoots)
 	baseSpans := baseBSS + uint32(work.nBSSRoots)
 	baseStacks := baseSpans + uint32(work.nSpanRoots)
+	baseRescan := baseStacks + uint32(work.nStackRoots)
+	end := baseRescan + uint32(work.nRescanRoots)
 
 	// Note: if you add a case here, please also update heapdump.go:dumproots.
 	switch {
@@ -151,10 +179,14 @@ func markroot(gcw *gcWork, i uint32) {
 
 	default:
 		// the rest is scanning goroutine stacks
-		if uintptr(i-baseStacks) >= allglen {
+		var gp *g
+		if baseStacks <= i && i < baseRescan {
+			gp = allgs[i-baseStacks]
+		} else if baseRescan <= i && i < end {
+			gp = work.rescan.list[i-baseRescan].ptr()
+		} else {
 			throw("markroot: bad index")
 		}
-		gp := allgs[i-baseStacks]
 
 		// remember when we've first observed the G blocked
 		// needed only to output in traceback
@@ -163,13 +195,14 @@ func markroot(gcw *gcWork, i uint32) {
 			gp.waitsince = work.tstart
 		}
 
-		if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
+		if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC && readgstatus(gp) != _Gdead {
 			// GC background workers may be
 			// non-preemptible, so we may deadlock if we
 			// try to scan them during a concurrent phase.
 			// They also have tiny stacks, so just ignore
 			// them until mark termination.
 			gp.gcscandone = true
+			queueRescan(gp)
 			break
 		}
 
@@ -721,6 +754,14 @@ func scanstack(gp *g) {
 		gcw.dispose()
 	}
 	gcUnlockStackBarriers(gp)
+	if gcphase == _GCmark {
+		// gp may have added itself to the rescan list between
+		// when GC started and now. It's clean now, so remove
+		// it. This isn't safe during mark termination because
+		// mark termination is consuming this list, but it's
+		// also not necessary.
+		dequeueRescan(gp)
+	}
 	gp.gcscanvalid = true
 }
 
@@ -797,6 +838,60 @@ func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
 	}
 }
 
+// queueRescan adds gp to the stack rescan list and clears
+// gp.gcscanvalid. The caller must own gp and ensure that gp isn't
+// already on the rescan list.
+func queueRescan(gp *g) {
+	if gcphase == _GCoff {
+		gp.gcscanvalid = false
+		return
+	}
+	if gp.gcRescan != -1 {
+		throw("g already on rescan list")
+	}
+
+	lock(&work.rescan.lock)
+	gp.gcscanvalid = false
+
+	// Recheck gcphase under the lock in case there was a phase change.
+	if gcphase == _GCoff {
+		unlock(&work.rescan.lock)
+		return
+	}
+	if len(work.rescan.list) == cap(work.rescan.list) {
+		throw("rescan list overflow")
+	}
+	n := len(work.rescan.list)
+	gp.gcRescan = int32(n)
+	work.rescan.list = work.rescan.list[:n+1]
+	work.rescan.list[n].set(gp)
+	unlock(&work.rescan.lock)
+}
+
+// dequeueRescan removes gp from the stack rescan list, if gp is on
+// the rescan list. The caller must own gp.
+func dequeueRescan(gp *g) {
+	if gp.gcRescan == -1 {
+		return
+	}
+	if gcphase == _GCoff {
+		gp.gcRescan = -1
+		return
+	}
+
+	lock(&work.rescan.lock)
+	if work.rescan.list[gp.gcRescan].ptr() != gp {
+		throw("bad dequeueRescan")
+	}
+	// Careful: gp may itself be the last G on the list.
+	last := work.rescan.list[len(work.rescan.list)-1]
+	work.rescan.list[gp.gcRescan] = last
+	last.ptr().gcRescan = gp.gcRescan
+	gp.gcRescan = -1
+	work.rescan.list = work.rescan.list[:len(work.rescan.list)-1]
+	unlock(&work.rescan.lock)
+}
+
 type gcDrainFlags int
 
 const (
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index dcdc7bedb8..ee732e3cf7 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -402,6 +402,16 @@ func allgadd(gp *g) {
 	lock(&allglock)
 	allgs = append(allgs, gp)
 	allglen = uintptr(len(allgs))
+
+	// Grow GC rescan list if necessary.
+	if len(allgs) > cap(work.rescan.list) {
+		lock(&work.rescan.lock)
+		l := work.rescan.list
+		// Let append do the heavy lifting, but keep the
+		// length the same.
+		work.rescan.list = append(l[:cap(l)], 0)[:len(l)]
+		unlock(&work.rescan.lock)
+	}
 	unlock(&allglock)
 }
 
@@ -754,8 +764,9 @@ func casgstatus(gp *g, oldval, newval uint32) {
 			nextYield = nanotime() + yieldDelay/2
 		}
 	}
-	if newval == _Grunning {
-		gp.gcscanvalid = false
+	if newval == _Grunning && gp.gcscanvalid {
+		// Run queueRescan on the system stack so it has more space.
+		systemstack(func() { queueRescan(gp) })
 	}
 }
 
@@ -1405,6 +1416,8 @@ func newextram() {
 	gp.syscallpc = gp.sched.pc
 	gp.syscallsp = gp.sched.sp
 	gp.stktopsp = gp.sched.sp
+	gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary
+	gp.gcRescan = -1
 	// malg returns status as Gidle, change to Gsyscall before adding to allg
 	// where GC will see it.
 	casgstatus(gp, _Gidle, _Gsyscall)
@@ -2210,6 +2223,10 @@ func goexit0(gp *g) {
 	gp.waitreason = ""
 	gp.param = nil
 
+	// Note that gp's stack scan is now "valid" because it has no
+	// stack. We could dequeueRescan, but that takes a lock and
+	// isn't really necessary.
+	gp.gcscanvalid = true
 	dropg()
 
 	if _g_.m.locked&^_LockExternal != 0 {
@@ -2700,6 +2717,7 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr
 	if newg == nil {
 		newg = malg(_StackMin)
 		casgstatus(newg, _Gidle, _Gdead)
+		newg.gcRescan = -1
 		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
 	}
 	if newg.stack.hi == 0 {
@@ -2733,6 +2751,17 @@ func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr
 	if isSystemGoroutine(newg) {
 		atomic.Xadd(&sched.ngsys, +1)
 	}
+	// The stack is dirty from the argument frame, so queue it for
+	// scanning. Do this before setting it to runnable so we still
+	// own the G. If we're recycling a G, it may already be on the
+	// rescan list.
+	if newg.gcRescan == -1 {
+		queueRescan(newg)
+	} else {
+		// The recycled G is already on the rescan list. Just
+		// mark the stack dirty.
+		newg.gcscanvalid = false
+	}
 	casgstatus(newg, _Gdead, _Grunnable)
 
 	if _p_.goidcache == _p_.goidcacheend {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0a988ce469..d35b897c3e 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -336,7 +336,7 @@ type g struct {
 	paniconfault   bool     // panic (instead of crash) on unexpected fault address
 	preemptscan    bool     // preempted g does scan for gc
 	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan
+	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan; transition from true to false by calling queueRescan and false to true by calling dequeueRescan
 	throwsplit     bool     // must not split stack
 	raceignore     int8     // ignore race detection events
 	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
@@ -354,7 +354,14 @@ type g struct {
 	racectx        uintptr
 	waiting        *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
 
-	// Per-G gcController state
+	// Per-G GC state
+
+	// gcRescan is this G's index in work.rescan.list. If this is
+	// -1, this G is not on the rescan list.
+	//
+	// If gcphase != _GCoff and this G is visible to the garbage
+	// collector, writes to this are protected by work.rescan.lock.
+	gcRescan int32
 
 	// gcAssistBytes is this G's GC assist credit in terms of
 	// bytes allocated. If this is positive, then the G has credit
-- 
cgit v1.3