diff options
| author | Keith Randall <khr@golang.org> | 2015-05-28 13:49:20 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2015-05-28 13:51:18 -0700 |
| commit | 067e8dfd82163ddcbde248dbe5a1187a417e5d36 (patch) | |
| tree | 7bfb46b901d03498c7739c92bec21d81d3a2c485 /src/runtime | |
| parent | 247786c1745abc0c7185f7c15ca256edf68ed6d6 (diff) | |
| parent | ccc037699e2966b7c79ba84c67471cef5e67a3b8 (diff) | |
| download | go-067e8dfd82163ddcbde248dbe5a1187a417e5d36.tar.xz | |
[dev.ssa] Merge remote-tracking branch 'origin/master' into mergebranch
Semi-regular merge of tip to dev.ssa.
Complicated a bit by the move of cmd/internal/* to cmd/compile/internal/*.
Change-Id: I1c66d3c29bb95cce4a53c5a3476373aa5245303d
Diffstat (limited to 'src/runtime')
57 files changed, 2294 insertions, 1425 deletions
diff --git a/src/runtime/arch1_386.go b/src/runtime/arch1_386.go index b024d7a51f..d41696a6d6 100644 --- a/src/runtime/arch1_386.go +++ b/src/runtime/arch1_386.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '8' - _BigEndian = 0 - _CacheLineSize = 64 - _RuntimeGogoBytes = 64 - _PhysPageSize = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl - _PCQuantum = 1 - _Int64Align = 4 - hugePageSize = 1 << 21 + thechar = '8' + _BigEndian = 0 + _CacheLineSize = 64 + _PhysPageSize = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl + _PCQuantum = 1 + _Int64Align = 4 + hugePageSize = 1 << 21 ) diff --git a/src/runtime/arch1_amd64.go b/src/runtime/arch1_amd64.go index 932b2b7c55..15f4cc65fe 100644 --- a/src/runtime/arch1_amd64.go +++ b/src/runtime/arch1_amd64.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '6' - _BigEndian = 0 - _CacheLineSize = 64 - _RuntimeGogoBytes = 80 + (goos_solaris)*16 - _PhysPageSize = 4096 - _PCQuantum = 1 - _Int64Align = 8 - hugePageSize = 1 << 21 + thechar = '6' + _BigEndian = 0 + _CacheLineSize = 64 + _PhysPageSize = 4096 + _PCQuantum = 1 + _Int64Align = 8 + hugePageSize = 1 << 21 ) diff --git a/src/runtime/arch1_amd64p32.go b/src/runtime/arch1_amd64p32.go index 79421e848a..3c5456f933 100644 --- a/src/runtime/arch1_amd64p32.go +++ b/src/runtime/arch1_amd64p32.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '6' - _BigEndian = 0 - _CacheLineSize = 64 - _RuntimeGogoBytes = 64 - _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl) - _PCQuantum = 1 - _Int64Align = 8 - hugePageSize = 1 << 21 + thechar = '6' + _BigEndian = 0 + _CacheLineSize = 64 + _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl) + _PCQuantum = 1 + _Int64Align = 8 + hugePageSize = 1 << 21 ) diff --git a/src/runtime/arch1_arm.go b/src/runtime/arch1_arm.go index c3fe4f0cb3..0ec2093881 100644 --- a/src/runtime/arch1_arm.go +++ b/src/runtime/arch1_arm.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '5' - _BigEndian = 0 - _CacheLineSize = 32 - _RuntimeGogoBytes = 60 - _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl) - _PCQuantum = 4 - _Int64Align = 4 - hugePageSize = 0 + thechar = '5' + _BigEndian = 0 + _CacheLineSize = 32 + _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl) + _PCQuantum = 4 + _Int64Align = 4 + hugePageSize = 0 ) diff --git a/src/runtime/arch1_arm64.go b/src/runtime/arch1_arm64.go index 549a635ca4..1a3165c8b7 100644 --- a/src/runtime/arch1_arm64.go +++ b/src/runtime/arch1_arm64.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '7' - _BigEndian = 0 - _CacheLineSize = 32 - _RuntimeGogoBytes = 64 - _PhysPageSize = 4096*(1-goos_darwin) + 16384*goos_darwin - _PCQuantum = 4 - _Int64Align = 8 - hugePageSize = 0 + thechar = '7' + _BigEndian = 0 + _CacheLineSize = 32 + _PhysPageSize = 4096*(1-goos_darwin) + 16384*goos_darwin + _PCQuantum = 4 + _Int64Align = 8 + hugePageSize = 0 ) diff --git a/src/runtime/arch1_ppc64.go b/src/runtime/arch1_ppc64.go index ee453c09f2..de6dd91401 100644 --- a/src/runtime/arch1_ppc64.go +++ b/src/runtime/arch1_ppc64.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '9' - _BigEndian = 1 - _CacheLineSize = 64 - _RuntimeGogoBytes = 72 - _PhysPageSize = 65536 - _PCQuantum = 4 - _Int64Align = 8 - hugePageSize = 0 + thechar = '9' + _BigEndian = 1 + _CacheLineSize = 64 + _PhysPageSize = 65536 + _PCQuantum = 4 + _Int64Align = 8 + hugePageSize = 0 ) diff --git a/src/runtime/arch1_ppc64le.go b/src/runtime/arch1_ppc64le.go index aa028a10f3..9a55c71101 100644 --- a/src/runtime/arch1_ppc64le.go +++ b/src/runtime/arch1_ppc64le.go @@ -5,12 +5,11 @@ package runtime const ( - thechar = '9' - _BigEndian = 0 - _CacheLineSize = 64 - _RuntimeGogoBytes = 72 - _PhysPageSize = 65536 - _PCQuantum = 4 - _Int64Align = 8 - hugePageSize = 0 + thechar = '9' + _BigEndian = 0 + _CacheLineSize = 64 + _PhysPageSize = 65536 + _PCQuantum = 4 + _Int64Align = 8 + hugePageSize = 0 ) diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 36353d108f..0f9aeb8f37 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1693,8 +1693,10 @@ TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 RET // This is called from .init_array and follows the platform, not Go, ABI. -TEXT runtime·addmoduledata(SB),NOSPLIT,$0-8 +TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 + PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save MOVQ runtime·lastmoduledatap(SB), AX MOVQ DI, moduledata_next(AX) MOVQ DI, runtime·lastmoduledatap(SB) + POPQ R15 RET diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go index 50a30242d9..f84afe0362 100644 --- a/src/runtime/atomic_pointer.go +++ b/src/runtime/atomic_pointer.go @@ -20,18 +20,12 @@ import "unsafe" func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) { atomicstorep1(noescape(ptr), new) writebarrierptr_nostore((*uintptr)(ptr), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(ptr))) - } } //go:nosplit func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer { old := xchgp1(noescape(ptr), new) writebarrierptr_nostore((*uintptr)(ptr), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(ptr))) - } return old } @@ -41,9 +35,6 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { return false } writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr)))) - } return true } @@ -60,9 +51,6 @@ func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) { sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) atomicstorep1(noescape(unsafe.Pointer(ptr)), new) writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr)))) - } } //go:linkname sync_atomic_SwapUintptr sync/atomic.SwapUintptr @@ -73,9 +61,6 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr func sync_atomic_SwapPointer(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer { old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(ptr)), uintptr(new))) writebarrierptr_nostore((*uintptr)(ptr), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(ptr))) - } return old } @@ -89,8 +74,5 @@ func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Poin return false } writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) - if mheap_.shadow_enabled { - writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr)))) - } return true } diff --git a/src/runtime/debug.go b/src/runtime/debug.go index 3ecaac10bc..9aec3b03e0 100644 --- a/src/runtime/debug.go +++ b/src/runtime/debug.go @@ -22,17 +22,12 @@ func GOMAXPROCS(n int) int { return ret } - semacquire(&worldsema, false) - gp := getg() - gp.m.preemptoff = "GOMAXPROCS" - systemstack(stoptheworld) + stopTheWorld("GOMAXPROCS") - // newprocs will be processed by starttheworld + // newprocs will be processed by startTheWorld newprocs = int32(n) - gp.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() return ret } diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index e0c8b17bd3..3fddcc868f 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -76,24 +76,17 @@ func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) { } func GCMask(x interface{}) (ret []byte) { - e := (*eface)(unsafe.Pointer(&x)) - s := (*slice)(unsafe.Pointer(&ret)) systemstack(func() { - var len uintptr - var a *byte - getgcmask(e.data, e._type, &a, &len) - s.array = unsafe.Pointer(a) - s.len = int(len) - s.cap = s.len + ret = getgcmask(x) }) return } func RunSchedLocalQueueTest() { - systemstack(testSchedLocalQueue) + testSchedLocalQueue() } func RunSchedLocalQueueStealTest() { - systemstack(testSchedLocalQueueSteal) + testSchedLocalQueueSteal() } var StringHash = stringHash @@ -106,11 +99,6 @@ var MemclrBytes = memclrBytes var HashLoad = &hashLoad -// For testing. -func GogoBytes() int32 { - return _RuntimeGogoBytes -} - // entry point for testing func GostringW(w []uint16) (s string) { systemstack(func() { @@ -133,3 +121,34 @@ func Envs() []string { return envs } func SetEnvs(e []string) { envs = e } var BigEndian = _BigEndian + +// For benchmarking. + +func BenchSetType(n int, x interface{}) { + e := *(*eface)(unsafe.Pointer(&x)) + t := e._type + var size uintptr + var p unsafe.Pointer + switch t.kind & kindMask { + case _KindPtr: + t = (*ptrtype)(unsafe.Pointer(t)).elem + size = t.size + p = e.data + case _KindSlice: + slice := *(*struct { + ptr unsafe.Pointer + len, cap uintptr + })(e.data) + t = (*slicetype)(unsafe.Pointer(t)).elem + size = t.size * slice.len + p = slice.ptr + } + allocSize := roundupsize(size) + systemstack(func() { + for i := 0; i < n; i++ { + heapBitsSetType(uintptr(p), allocSize, size, t) + } + }) +} + +const PtrSize = ptrSize diff --git a/src/runtime/extern.go b/src/runtime/extern.go index 540d7b5124..476c3c5ae3 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -58,18 +58,6 @@ a comma-separated list of name=val pairs. Supported names are: scavenge: scavenge=1 enables debugging mode of heap scavenger. - wbshadow: setting wbshadow=1 enables a shadow copy of the heap - used to detect missing write barriers at the next write to a - given location. If a bug can be detected in this mode it is - typically easy to understand, since the crash says quite - clearly what kind of word has missed a write barrier. - Setting wbshadow=2 checks the shadow copy during garbage - collection as well. Bugs detected at garbage collection can be - difficult to understand, because there is no context for what - the found word means. Typically you have to reproduce the - problem with allocfreetrace=1 in order to understand the type - of the badly updated word. - gccheckmark: setting gccheckmark=1 enables verification of the garbage collector's concurrent mark phase by performing a second mark pass while the world is stopped. If the second diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 6abec4cca7..e3e0c3a583 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -6,6 +6,7 @@ package runtime_test import ( "os" + "reflect" "runtime" "runtime/debug" "testing" @@ -197,45 +198,166 @@ func TestHugeGCInfo(t *testing.T) { } } -func BenchmarkSetTypeNoPtr1(b *testing.B) { - type NoPtr1 struct { - p uintptr - } - var p *NoPtr1 - for i := 0; i < b.N; i++ { - p = &NoPtr1{} - } - _ = p +func BenchmarkSetTypePtr(b *testing.B) { + benchSetType(b, new(*byte)) } -func BenchmarkSetTypeNoPtr2(b *testing.B) { - type NoPtr2 struct { - p, q uintptr - } - var p *NoPtr2 - for i := 0; i < b.N; i++ { - p = &NoPtr2{} - } - _ = p + +func BenchmarkSetTypePtr8(b *testing.B) { + benchSetType(b, new([8]*byte)) } -func BenchmarkSetTypePtr1(b *testing.B) { - type Ptr1 struct { - p *byte - } - var p *Ptr1 - for i := 0; i < b.N; i++ { - p = &Ptr1{} - } - _ = p + +func BenchmarkSetTypePtr16(b *testing.B) { + benchSetType(b, new([16]*byte)) } -func BenchmarkSetTypePtr2(b *testing.B) { - type Ptr2 struct { - p, q *byte - } - var p *Ptr2 - for i := 0; i < b.N; i++ { - p = &Ptr2{} + +func BenchmarkSetTypePtr32(b *testing.B) { + benchSetType(b, new([32]*byte)) +} + +func BenchmarkSetTypePtr64(b *testing.B) { + benchSetType(b, new([64]*byte)) +} + +func BenchmarkSetTypePtr126(b *testing.B) { + benchSetType(b, new([126]*byte)) +} + +func BenchmarkSetTypePtr128(b *testing.B) { + benchSetType(b, new([128]*byte)) +} + +func BenchmarkSetTypePtrSlice(b *testing.B) { + benchSetType(b, make([]*byte, 1<<10)) +} + +type Node1 struct { + Value [1]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode1(b *testing.B) { + benchSetType(b, new(Node1)) +} + +func BenchmarkSetTypeNode1Slice(b *testing.B) { + benchSetType(b, make([]Node1, 32)) +} + +type Node8 struct { + Value [8]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode8(b *testing.B) { + benchSetType(b, new(Node8)) +} + +func BenchmarkSetTypeNode8Slice(b *testing.B) { + benchSetType(b, make([]Node8, 32)) +} + +type Node64 struct { + Value [64]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode64(b *testing.B) { + benchSetType(b, new(Node64)) +} + +func BenchmarkSetTypeNode64Slice(b *testing.B) { + benchSetType(b, make([]Node64, 32)) +} + +type Node64Dead struct { + Left, Right *byte + Value [64]uintptr +} + +func BenchmarkSetTypeNode64Dead(b *testing.B) { + benchSetType(b, new(Node64Dead)) +} + +func BenchmarkSetTypeNode64DeadSlice(b *testing.B) { + benchSetType(b, make([]Node64Dead, 32)) +} + +type Node124 struct { + Value [124]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode124(b *testing.B) { + benchSetType(b, new(Node124)) +} + +func BenchmarkSetTypeNode124Slice(b *testing.B) { + benchSetType(b, make([]Node124, 32)) +} + +type Node126 struct { + Value [126]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode126(b *testing.B) { + benchSetType(b, new(Node126)) +} + +func BenchmarkSetTypeNode126Slice(b *testing.B) { + benchSetType(b, make([]Node126, 32)) +} + +type Node128 struct { + Value [128]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode128(b *testing.B) { + benchSetType(b, new(Node128)) +} + +func BenchmarkSetTypeNode128Slice(b *testing.B) { + benchSetType(b, make([]Node128, 32)) +} + +type Node130 struct { + Value [130]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode130(b *testing.B) { + benchSetType(b, new(Node130)) +} + +func BenchmarkSetTypeNode130Slice(b *testing.B) { + benchSetType(b, make([]Node130, 32)) +} + +type Node1024 struct { + Value [1024]uintptr + Left, Right *byte +} + +func BenchmarkSetTypeNode1024(b *testing.B) { + benchSetType(b, new(Node1024)) +} + +func BenchmarkSetTypeNode1024Slice(b *testing.B) { + benchSetType(b, make([]Node1024, 32)) +} + +func benchSetType(b *testing.B, x interface{}) { + v := reflect.ValueOf(x) + t := v.Type() + switch t.Kind() { + case reflect.Ptr: + b.SetBytes(int64(t.Elem().Size())) + case reflect.Slice: + b.SetBytes(int64(t.Elem().Size()) * int64(v.Len())) } - _ = p + b.ResetTimer() + runtime.BenchSetType(b.N, x) } func BenchmarkAllocation(b *testing.B) { diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 66b0353f08..f330bf2430 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -10,8 +10,14 @@ import ( "testing" ) +const ( + typeScalar = 0 + typePointer = 1 +) + // TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info. func TestGCInfo(t *testing.T) { + verifyGCInfo(t, "bss Ptr", &bssPtr, infoPtr) verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, infoScalarPtr) verifyGCInfo(t, "bss PtrScalar", &bssPtrScalar, infoPtrScalar) verifyGCInfo(t, "bss BigStruct", &bssBigStruct, infoBigStruct()) @@ -20,6 +26,7 @@ func TestGCInfo(t *testing.T) { verifyGCInfo(t, "bss eface", &bssEface, infoEface) verifyGCInfo(t, "bss iface", &bssIface, infoIface) + verifyGCInfo(t, "data Ptr", &dataPtr, infoPtr) verifyGCInfo(t, "data ScalarPtr", &dataScalarPtr, infoScalarPtr) verifyGCInfo(t, "data PtrScalar", &dataPtrScalar, infoPtrScalar) verifyGCInfo(t, "data BigStruct", &dataBigStruct, infoBigStruct()) @@ -28,6 +35,7 @@ func TestGCInfo(t *testing.T) { verifyGCInfo(t, "data eface", &dataEface, infoEface) verifyGCInfo(t, "data iface", &dataIface, infoIface) + verifyGCInfo(t, "stack Ptr", new(Ptr), infoPtr) verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr) verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar) verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct()) @@ -37,38 +45,43 @@ func TestGCInfo(t *testing.T) { verifyGCInfo(t, "stack iface", new(Iface), infoIface) for i := 0; i < 10; i++ { - verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr) - verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), infoPtrScalar) - verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), infoBigStruct()) - verifyGCInfo(t, "heap string", escape(new(string)), infoString) - verifyGCInfo(t, "heap eface", escape(new(interface{})), infoEface) - verifyGCInfo(t, "heap iface", escape(new(Iface)), infoIface) + verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr))) + verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10)) + verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr)) + verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4)) + verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), trimDead(infoPtrScalar)) + verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), trimDead(infoBigStruct())) + verifyGCInfo(t, "heap string", escape(new(string)), trimDead(infoString)) + verifyGCInfo(t, "heap eface", escape(new(interface{})), trimDead(infoEface)) + verifyGCInfo(t, "heap iface", escape(new(Iface)), trimDead(infoIface)) } - } func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) { mask := runtime.GCMask(p) - if len(mask) > len(mask0) { - mask0 = append(mask0, typeDead) - mask = mask[:len(mask0)] - } if bytes.Compare(mask, mask0) != 0 { t.Errorf("bad GC program for %v:\nwant %+v\ngot %+v", name, mask0, mask) return } } -func nonStackInfo(mask []byte) []byte { - // typeDead is replaced with typeScalar everywhere except stacks. - mask1 := make([]byte, len(mask)) - for i, v := range mask { - if v == typeDead { - v = typeScalar - } - mask1[i] = v +func padDead(mask []byte) []byte { + // Because the dead bit isn't encoded until the third word, + // and because on 32-bit systems a one-word allocation + // uses a two-word block, the pointer info for a one-word + // object needs to be expanded to include an extra scalar + // on 32-bit systems to match the heap bitmap. + if runtime.PtrSize == 4 && len(mask) == 1 { + return []byte{mask[0], 0} + } + return mask +} + +func trimDead(mask []byte) []byte { + for len(mask) > 2 && mask[len(mask)-1] == typeScalar { + mask = mask[:len(mask)-1] } - return mask1 + return mask } var gcinfoSink interface{} @@ -78,18 +91,13 @@ func escape(p interface{}) interface{} { return p } -const ( - typeDead = iota - typeScalar - typePointer -) +var infoPtr = []byte{typePointer} -const ( - BitsString = iota // unused - BitsSlice // unused - BitsIface - BitsEface -) +type Ptr struct { + *byte +} + +var infoPtr10 = []byte{typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer} type ScalarPtr struct { q int @@ -102,6 +110,8 @@ type ScalarPtr struct { var infoScalarPtr = []byte{typeScalar, typePointer, typeScalar, typePointer, typeScalar, typePointer} +var infoScalarPtr4 = append(append(append(append([]byte(nil), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...) + type PtrScalar struct { q *int w int @@ -166,6 +176,7 @@ func (IfaceImpl) f() { var ( // BSS + bssPtr Ptr bssScalarPtr ScalarPtr bssPtrScalar PtrScalar bssBigStruct BigStruct @@ -175,6 +186,7 @@ var ( bssIface Iface // DATA + dataPtr = Ptr{new(byte)} dataScalarPtr = ScalarPtr{q: 1} dataPtrScalar = PtrScalar{w: 1} dataBigStruct = BigStruct{w: 1} diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go index 9ca33992bb..b199330a1e 100644 --- a/src/runtime/hashmap.go +++ b/src/runtime/hashmap.go @@ -233,6 +233,9 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap { throw("need padding in bucket (value)") } + // make sure zero of element type is available. + mapzero(t.elem) + // find size parameter which will hold the requested # of elements B := uint8(0) for ; hint > bucketCnt && float32(hint) > loadFactor*float32(uintptr(1)<<B); B++ { @@ -990,3 +993,60 @@ func reflect_maplen(h *hmap) int { func reflect_ismapkey(t *_type) bool { return ismapkey(t) } + +var zerobuf struct { + lock mutex + p *byte + size uintptr +} + +var zerotiny [1024]byte + +// mapzero ensures that t.zero points at a zero value for type t. +// Types known to the compiler are in read-only memory and all point +// to a single zero in the bss of a large enough size. +// Types allocated by package reflect are in writable memory and +// start out with zero set to nil; we initialize those on demand. +func mapzero(t *_type) { + // On ARM, atomicloadp is implemented as xadd(p, 0), + // so we cannot use atomicloadp on read-only memory. + // Check whether the pointer is in the heap; if not, it's not writable + // so the zero value must already be set. + if GOARCH == "arm" && !inheap(uintptr(unsafe.Pointer(t))) { + if t.zero == nil { + print("runtime: map element ", *t._string, " missing zero value\n") + throw("mapzero") + } + return + } + + // Already done? + // Check without lock, so must use atomicload to sync with atomicstore in allocation case below. + if atomicloadp(unsafe.Pointer(&t.zero)) != nil { + return + } + + // Small enough for static buffer? + if t.size <= uintptr(len(zerotiny)) { + atomicstorep(unsafe.Pointer(&t.zero), unsafe.Pointer(&zerotiny[0])) + return + } + + // Use allocated buffer. + lock(&zerobuf.lock) + if zerobuf.size < t.size { + if zerobuf.size == 0 { + zerobuf.size = 4 * 1024 + } + for zerobuf.size < t.size { + zerobuf.size *= 2 + if zerobuf.size == 0 { + // need >2GB zero on 32-bit machine + throw("map element too large") + } + } + zerobuf.p = (*byte)(persistentalloc(zerobuf.size, 64, &memstats.other_sys)) + } + atomicstorep(unsafe.Pointer(&t.zero), unsafe.Pointer(zerobuf.p)) + unlock(&zerobuf.lock) +} diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index e18aa79164..c0fff3f1ce 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -15,20 +15,13 @@ import "unsafe" //go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump func runtime_debug_WriteHeapDump(fd uintptr) { - semacquire(&worldsema, false) - gp := getg() - gp.m.preemptoff = "write heap dump" - systemstack(stoptheworld) + stopTheWorld("write heap dump") systemstack(func() { writeheapdump_m(fd) }) - gp.m.preemptoff = "" - gp.m.locks++ - semrelease(&worldsema) - systemstack(starttheworld) - gp.m.locks-- + startTheWorld() } const ( @@ -730,14 +723,13 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - bits := hbits.typeBits() - if bits == typeDead { + if i >= 2 && !hbits.isMarked() { break // end of object } - hbits = hbits.next() - if bits == typePointer { + if hbits.isPointer() { tmpbuf[i/8] |= 1 << (i % 8) } + hbits = hbits.next() } return bitvector{int32(i), &tmpbuf[0]} } diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go index 68f221d6ef..4da4d88619 100644 --- a/src/runtime/lfstack_test.go +++ b/src/runtime/lfstack_test.go @@ -24,8 +24,12 @@ func toMyNode(node *LFNode) *MyNode { return (*MyNode)(unsafe.Pointer(node)) } +var global interface{} + func TestLFStack(t *testing.T) { stack := new(uint64) + global = stack // force heap allocation + // Need to keep additional referenfces to nodes, the stack is not all that type-safe. var nodes []*MyNode diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 1619ccb9f4..2d7e55643f 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -424,9 +424,6 @@ func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer { if raceenabled { racemapshadow((unsafe.Pointer)(p), n) } - if mheap_.shadow_enabled { - sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys) - } if uintptr(p)&(_PageSize-1) != 0 { throw("misrounded allocation in MHeap_SysAlloc") @@ -512,6 +509,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { if mp.mallocing != 0 { throw("malloc deadlock") } + if mp.gsignal == getg() { + throw("malloc during signal") + } mp.mallocing = 1 shouldhelpgc := false @@ -669,10 +669,6 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { }) } - if mheap_.shadow_enabled { - clearshadow(uintptr(x), size) - } - if raceenabled { racemalloc(x, size) } diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index eb5881707b..53a0a00ae7 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -10,12 +10,6 @@ // implementation, markwb, and the various wrappers called by the // compiler to implement pointer assignment, slice assignment, // typed memmove, and so on. -// -// To check for missed write barriers, the GODEBUG=wbshadow debugging -// mode allocates a second copy of the heap. Write barrier-based pointer -// updates make changes to both the real heap and the shadow, and both -// the pointer updates and the GC look for inconsistencies between the two, -// indicating pointer writes that bypassed the barrier. package runtime @@ -66,7 +60,7 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) { default: throw("gcphasework in bad gcphase") - case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan: + case _GCoff, _GCstw, _GCsweep, _GCscan: // ok case _GCmark, _GCmarktermination: @@ -107,43 +101,19 @@ func writebarrierptr_nostore1(dst *uintptr, src uintptr) { // but if we do that, Go inserts a write barrier on *dst = src. //go:nosplit func writebarrierptr(dst *uintptr, src uintptr) { + *dst = src if !writeBarrierEnabled { - *dst = src return } - if src != 0 && (src < _PhysPageSize || src == poisonStack) { - systemstack(func() { throw("bad pointer in write barrier") }) - } - - if mheap_.shadow_enabled { - writebarrierptr_shadow(dst, src) + systemstack(func() { + print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n") + throw("bad pointer in write barrier") + }) } - - *dst = src writebarrierptr_nostore1(dst, src) } -//go:nosplit -func writebarrierptr_shadow(dst *uintptr, src uintptr) { - systemstack(func() { - addr := uintptr(unsafe.Pointer(dst)) - shadow := shadowptr(addr) - if shadow == nil { - return - } - // There is a race here but only if the program is using - // racy writes instead of sync/atomic. In that case we - // don't mind crashing. - if *shadow != *dst && *shadow != noShadow && istrackedptr(*dst) { - mheap_.shadow_enabled = false - print("runtime: write barrier dst=", dst, " old=", hex(*dst), " shadow=", shadow, " old=", hex(*shadow), " new=", hex(src), "\n") - throw("missed write barrier") - } - *shadow = src - }) -} - // Like writebarrierptr, but the store has already been applied. // Do not reapply. //go:nosplit @@ -151,44 +121,12 @@ func writebarrierptr_nostore(dst *uintptr, src uintptr) { if !writeBarrierEnabled { return } - if src != 0 && (src < _PhysPageSize || src == poisonStack) { systemstack(func() { throw("bad pointer in write barrier") }) } - - // Apply changes to shadow. - // Since *dst has been overwritten already, we cannot check - // whether there were any missed updates, but writebarrierptr_nostore - // is only rarely used. - if mheap_.shadow_enabled { - systemstack(func() { - addr := uintptr(unsafe.Pointer(dst)) - shadow := shadowptr(addr) - if shadow == nil { - return - } - *shadow = src - }) - } - writebarrierptr_nostore1(dst, src) } -// writebarrierptr_noshadow records that the value in *dst -// has been written to using an atomic operation and the shadow -// has not been updated. (In general if dst must be manipulated -// atomically we cannot get the right bits for use in the shadow.) -//go:nosplit -func writebarrierptr_noshadow(dst *uintptr) { - addr := uintptr(unsafe.Pointer(dst)) - shadow := shadowptr(addr) - if shadow == nil { - return - } - - *shadow = noShadow -} - //go:nosplit func writebarrierstring(dst *[2]uintptr, src [2]uintptr) { writebarrierptr(&dst[0], src[0]) @@ -217,37 +155,11 @@ func writebarrieriface(dst *[2]uintptr, src [2]uintptr) { // typedmemmove copies a value of type t to dst from src. //go:nosplit func typedmemmove(typ *_type, dst, src unsafe.Pointer) { - if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 { - memmove(dst, src, typ.size) + memmove(dst, src, typ.size) + if typ.kind&kindNoPointers != 0 { return } - - systemstack(func() { - mask := typeBitmapInHeapBitmapFormat(typ) - nptr := typ.size / ptrSize - for i := uintptr(0); i < nptr; i += 2 { - bits := mask[i/2] - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) - } - // TODO(rsc): The noescape calls should be unnecessary. - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) - if i+1 == nptr { - break - } - bits >>= 4 - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) - } - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) - } - }) + heapBitsBulkBarrier(uintptr(dst), typ.size) } //go:linkname reflect_typedmemmove reflect.typedmemmove @@ -259,38 +171,16 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { // dst and src point off bytes into the value and only copies size bytes. //go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { - if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 || size < ptrSize { - memmove(dst, src, size) + memmove(dst, src, size) + if !writeBarrierEnabled || typ.kind&kindNoPointers != 0 || size < ptrSize || !inheap(uintptr(dst)) { return } - if off&(ptrSize-1) != 0 { - frag := -off & (ptrSize - 1) - // frag < size, because size >= ptrSize, checked above. - memmove(dst, src, frag) + if frag := -off & (ptrSize - 1); frag != 0 { + dst = add(dst, frag) size -= frag - dst = add(noescape(dst), frag) - src = add(noescape(src), frag) - off += frag - } - - mask := typeBitmapInHeapBitmapFormat(typ) - nptr := (off + size) / ptrSize - for i := uintptr(off / ptrSize); i < nptr; i++ { - bits := mask[i/2] >> ((i & 1) << 2) - if (bits>>2)&typeMask == typePointer { - writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) - } else { - *(*uintptr)(dst) = *(*uintptr)(src) - } - // TODO(rsc): The noescape calls should be unnecessary. - dst = add(noescape(dst), ptrSize) - src = add(noescape(src), ptrSize) - } - size &= ptrSize - 1 - if size > 0 { - memmove(dst, src, size) } + heapBitsBulkBarrier(uintptr(dst), size&^(ptrSize-1)) } // callwritebarrier is invoked at the end of reflectcall, to execute @@ -302,29 +192,16 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size // not to be preempted before the write barriers have been run. //go:nosplit func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) { - if !writeBarrierEnabled || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize { + if !writeBarrierEnabled || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < ptrSize || !inheap(uintptr(frame)) { return } - - systemstack(func() { - mask := typeBitmapInHeapBitmapFormat(typ) - // retoffset is known to be pointer-aligned (at least). - // TODO(rsc): The noescape call should be unnecessary. - dst := add(noescape(frame), retoffset) - nptr := framesize / ptrSize - for i := uintptr(retoffset / ptrSize); i < nptr; i++ { - bits := mask[i/2] >> ((i & 1) << 2) - if (bits>>2)&typeMask == typePointer { - writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst)) - } - // TODO(rsc): The noescape call should be unnecessary. - dst = add(noescape(dst), ptrSize) - } - }) + heapBitsBulkBarrier(uintptr(add(frame, retoffset)), framesize-retoffset) } //go:nosplit func typedslicecopy(typ *_type, dst, src slice) int { + // TODO(rsc): If typedslicecopy becomes faster than calling + // typedmemmove repeatedly, consider using during func growslice. n := dst.len if n > src.len { n = src.len @@ -342,6 +219,10 @@ func typedslicecopy(typ *_type, dst, src slice) int { racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc) } + // Note: No point in checking typ.kind&kindNoPointers here: + // compiler only emits calls to typedslicecopy for types with pointers, + // and growslice and reflect_typedslicecopy check for pointers + // before calling typedslicecopy. if !writeBarrierEnabled { memmove(dstp, srcp, uintptr(n)*typ.size) return n @@ -382,134 +263,13 @@ func typedslicecopy(typ *_type, dst, src slice) int { //go:linkname reflect_typedslicecopy reflect.typedslicecopy func reflect_typedslicecopy(elemType *_type, dst, src slice) int { - return typedslicecopy(elemType, dst, src) -} - -// Shadow heap for detecting missed write barriers. - -// noShadow is stored in as the shadow pointer to mark that there is no -// shadow word recorded. It matches any actual pointer word. -// noShadow is used when it is impossible to know the right word -// to store in the shadow heap, such as when the real heap word -// is being manipulated atomically. -const noShadow uintptr = 1 - -func wbshadowinit() { - // Initialize write barrier shadow heap if we were asked for it - // and we have enough address space (not on 32-bit). - if debug.wbshadow == 0 { - return - } - if ptrSize != 8 { - print("runtime: GODEBUG=wbshadow=1 disabled on 32-bit system\n") - return - } - - var reserved bool - p1 := sysReserveHigh(mheap_.arena_end-mheap_.arena_start, &reserved) - if p1 == nil { - throw("cannot map shadow heap") - } - mheap_.shadow_heap = uintptr(p1) - mheap_.arena_start - sysMap(p1, mheap_.arena_used-mheap_.arena_start, reserved, &memstats.other_sys) - memmove(p1, unsafe.Pointer(mheap_.arena_start), mheap_.arena_used-mheap_.arena_start) - - mheap_.shadow_reserved = reserved - - for datap := &firstmoduledata; datap != nil; datap = datap.next { - start := ^uintptr(0) - end := uintptr(0) - if start > datap.noptrdata { - start = datap.noptrdata - } - if start > datap.data { - start = datap.data - } - if start > datap.noptrbss { - start = datap.noptrbss - } - if start > datap.bss { - start = datap.bss - } - if end < datap.enoptrdata { - end = datap.enoptrdata - } - if end < datap.edata { - end = datap.edata - } - if end < datap.enoptrbss { - end = datap.enoptrbss - } - if end < datap.ebss { - end = datap.ebss - } - start &^= _PhysPageSize - 1 - end = round(end, _PhysPageSize) - datap.data_start = start - datap.data_end = end - reserved = false - p1 = sysReserveHigh(end-start, &reserved) - if p1 == nil { - throw("cannot map shadow data") + if elemType.kind&kindNoPointers != 0 { + n := dst.len + if n > src.len { + n = src.len } - datap.shadow_data = uintptr(p1) - start - sysMap(p1, end-start, reserved, &memstats.other_sys) - memmove(p1, unsafe.Pointer(start), end-start) - } - - mheap_.shadow_enabled = true - writeBarrierEnabled = true -} - -// shadowptr returns a pointer to the shadow value for addr. -//go:nosplit -func shadowptr(addr uintptr) *uintptr { - for datap := &firstmoduledata; datap != nil; datap = datap.next { - if datap.data_start <= addr && addr < datap.data_end { - return (*uintptr)(unsafe.Pointer(addr + datap.shadow_data)) - } - } - if inheap(addr) { - return (*uintptr)(unsafe.Pointer(addr + mheap_.shadow_heap)) - } - return nil -} - -// istrackedptr reports whether the pointer value p requires a write barrier -// when stored into the heap. -func istrackedptr(p uintptr) bool { - return inheap(p) -} - -// checkwbshadow checks that p matches its shadow word. -// The garbage collector calls checkwbshadow for each pointer during the checkmark phase. -// It is only called when mheap_.shadow_enabled is true. -func checkwbshadow(p *uintptr) { - addr := uintptr(unsafe.Pointer(p)) - shadow := shadowptr(addr) - if shadow == nil { - return - } - // There is no race on the accesses here, because the world is stopped, - // but there may be racy writes that lead to the shadow and the - // heap being inconsistent. If so, we will detect that here as a - // missed write barrier and crash. We don't mind. - // Code should use sync/atomic instead of racy pointer writes. - if *shadow != *p && *shadow != noShadow && istrackedptr(*p) { - mheap_.shadow_enabled = false - print("runtime: checkwritebarrier p=", p, " *p=", hex(*p), " shadow=", shadow, " *shadow=", hex(*shadow), "\n") - throw("missed write barrier") - } -} - -// clearshadow clears the shadow copy associated with the n bytes of memory at addr. -func clearshadow(addr, n uintptr) { - if !mheap_.shadow_enabled { - return - } - p := shadowptr(addr) - if p == nil || n <= ptrSize { - return + memmove(dst.array, src.array, uintptr(n)*elemType.size) + return n } - memclr(unsafe.Pointer(p), n) + return typedslicecopy(elemType, dst, src) } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index f0c7520e38..b20908fb49 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -6,48 +6,40 @@ // // Stack, data, and bss bitmaps // -// Not handled in this file, but worth mentioning: stack frames and global data -// in the data and bss sections are described by 1-bit bitmaps in which 0 means -// scalar or uninitialized or dead and 1 means pointer to visit during GC. -// -// Comparing this 1-bit form with the 2-bit form described below, 0 represents -// both the 2-bit 00 and 01, while 1 represents the 2-bit 10. -// Therefore conversions between the two (until the 2-bit form is gone) -// can be done by x>>1 for 2-bit to 1-bit and x+1 for 1-bit to 2-bit. -// -// Type bitmaps -// -// Types that aren't too large -// record information about the layout of their memory words using a type bitmap. -// The bitmap holds two bits for each pointer-sized word. The two-bit values are: -// -// 00 - typeDead: not a pointer, and no pointers in the rest of the object -// 01 - typeScalar: not a pointer -// 10 - typePointer: a pointer that GC should trace -// 11 - unused -// -// typeDead only appears in type bitmaps in Go type descriptors -// and in type bitmaps embedded in the heap bitmap (see below). +// Stack frames and global variables in the data and bss sections are described +// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer +// to be visited during GC. The bits in each byte are consumed starting with +// the low bit: 1<<0, 1<<1, and so on. // // Heap bitmap // // The allocated heap comes from a subset of the memory in the range [start, used), // where start == mheap_.arena_start and used == mheap_.arena_used. -// The heap bitmap comprises 4 bits for each pointer-sized word in that range, +// The heap bitmap comprises 2 bits for each pointer-sized word in that range, // stored in bytes indexed backward in memory from start. -// That is, the byte at address start-1 holds the 4-bit entries for the two words -// start, start+ptrSize, the byte at start-2 holds the entries for start+2*ptrSize, -// start+3*ptrSize, and so on. -// In the byte holding the entries for addresses p and p+ptrSize, the low 4 bits -// describe p and the high 4 bits describe p+ptrSize. +// That is, the byte at address start-1 holds the 2-bit entries for the four words +// start through start+3*ptrSize, the byte at start-2 holds the entries for +// start+4*ptrSize through start+7*ptrSize, and so on. // -// The 4 bits for each word are: -// 0001 - not used -// 0010 - bitMarked: this object has been marked by GC -// tt00 - word type bits, as in a type bitmap. +// In each 2-bit entry, the lower bit holds the same information as in the 1-bit +// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. +// The meaning of the high bit depends on the position of the word being described +// in its allocated object. In the first word, the high bit is the GC ``marked'' bit. +// In the second word, the high bit is the GC ``checkmarked'' bit (see below). +// In the third and later words, the high bit indicates that the object is still +// being described. In these words, if a bit pair with a high bit 0 is encountered, +// the low bit can also be assumed to be 0, and the object description is over. +// This 00 is called the ``dead'' encoding: it signals that the rest of the words +// in the object are uninteresting to the garbage collector. // -// The code makes use of the fact that the zero value for a heap bitmap nibble -// has no boundary bit set, no marked bit set, and type bits == typeDead. +// The 2-bit entries are split when written into the byte, so that the top half +// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. +// This form allows a copy from the 1-bit to the 4-bit form to keep the +// pointer bits contiguous, instead of having to space them out. +// +// The code makes use of the fact that the zero value for a heap bitmap +// has no live pointer bit set and is (depending on position), not marked, +// not checkmarked, and is the dead encoding. // These properties must be preserved when modifying the encoding. // // Checkmarks @@ -57,55 +49,71 @@ // collector implementation. As a sanity check, the GC has a 'checkmark' // mode that retraverses the object graph with the world stopped, to make // sure that everything that should be marked is marked. -// In checkmark mode, in the heap bitmap, the type bits for the first word -// of an object are redefined: -// -// 00 - typeScalarCheckmarked // typeScalar, checkmarked -// 01 - typeScalar // typeScalar, not checkmarked -// 10 - typePointer // typePointer, not checkmarked -// 11 - typePointerCheckmarked // typePointer, checkmarked +// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry +// for the second word of the object holds the checkmark bit. +// When not in checkmark mode, this bit is set to 1. // -// That is, typeDead is redefined to be typeScalar + a checkmark, and the -// previously unused 11 pattern is redefined to be typePointer + a checkmark. -// To prepare for this mode, we must move any typeDead in the first word of -// a multiword object to the second word. +// The smallest possible allocation is 8 bytes. On a 32-bit machine, that +// means every allocated object has two words, so there is room for the +// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is +// just one word, so the second bit pair is not available for encoding the +// checkmark. However, because non-pointer allocations are combined +// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation +// must be a pointer, so the type bit in the first word is not actually needed. +// It is still used in general, except in checkmark the type bit is repurposed +// as the checkmark bit and then reinitialized (to 1) as the type bit when +// finished. package runtime import "unsafe" const ( - typeDead = 0 - typeScalarCheckmarked = 0 - typeScalar = 1 - typePointer = 2 - typePointerCheckmarked = 3 + bitPointer = 1 << 0 + bitMarked = 1 << 4 - typeBitsWidth = 2 // # of type bits per pointer-sized word - typeMask = 1<<typeBitsWidth - 1 + heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries + heapBitmapScale = ptrSize * (8 / 2) // number of data bytes described by one heap bitmap byte - heapBitsWidth = 4 - heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte - bitMarked = 2 - typeShift = 2 + // all mark/pointer bits in a byte + bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) + bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) ) -// Information from the compiler about the layout of stack frames. -type bitvector struct { - n int32 // # of bits - bytedata *uint8 -} - // addb returns the byte pointer p+n. //go:nowritebarrier func addb(p *byte, n uintptr) *byte { - return (*byte)(add(unsafe.Pointer(p), n)) + // Note: wrote out full expression instead of calling add(p, n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) } // subtractb returns the byte pointer p-n. //go:nowritebarrier func subtractb(p *byte, n uintptr) *byte { - return (*byte)(add(unsafe.Pointer(p), -n)) + // Note: wrote out full expression instead of calling add(p, -n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) +} + +// add1 returns the byte pointer p+1. +//go:nowritebarrier +func add1(p *byte) *byte { + // Note: wrote out full expression instead of calling addb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) +} + +// subtract1 returns the byte pointer p-1. +//go:nowritebarrier +func subtract1(p *byte) *byte { + // Note: wrote out full expression instead of calling subtractb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) } // mHeap_MapBits is called each time arena_used is extended. @@ -140,9 +148,13 @@ type heapBits struct { // heapBitsForAddr returns the heapBits for the address addr. // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). +// +// nosplit because it is used during write barriers and must not be preempted. +//go:nosplit func heapBitsForAddr(addr uintptr) heapBits { + // 2 bits per work, 4 pairs per byte, and a mask is hard coded. off := (addr - mheap_.arena_start) / ptrSize - return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/2 - 1)), uint32(4 * (off & 1))} + return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)} } // heapBitsForSpan returns the heapBits for the span base address base. @@ -229,20 +241,39 @@ func (h heapBits) prefetch() { // That is, if h describes address p, h.next() describes p+ptrSize. // Note that next does not modify h. The caller must record the result. func (h heapBits) next() heapBits { - if h.shift == 0 { - return heapBits{h.bitp, 4} + if h.shift < 3*heapBitsShift { + return heapBits{h.bitp, h.shift + heapBitsShift} } - return heapBits{subtractb(h.bitp, 1), 0} + return heapBits{subtract1(h.bitp), 0} +} + +// forward returns the heapBits describing n pointer-sized words ahead of h in memory. +// That is, if h describes address p, h.forward(n) describes p+n*ptrSize. +// h.forward(1) is equivalent to h.next(), just slower. +// Note that forward does not modify h. The caller must record the result. +// bits returns the heap bits for the current word. +func (h heapBits) forward(n uintptr) heapBits { + n += uintptr(h.shift) / heapBitsShift + return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} +} + +// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer. +// The result includes in its higher bits the bits for subsequent words +// described by the same bitmap byte. +func (h heapBits) bits() uint32 { + return uint32(*h.bitp) >> h.shift } // isMarked reports whether the heap bits have the marked bit set. +// h must describe the initial word of the object. func (h heapBits) isMarked() bool { return *h.bitp&(bitMarked<<h.shift) != 0 } // setMarked sets the marked bit in the heap bits, atomically. +// h must describe the initial word of the object. func (h heapBits) setMarked() { - // Each byte of GC bitmap holds info for two words. + // Each byte of GC bitmap holds info for four words. // Might be racing with other updates, so use atomic update always. // We used to be clever here and use a non-atomic update in certain // cases, but it's not worth the risk. @@ -250,30 +281,103 @@ func (h heapBits) setMarked() { } // setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically. +// h must describe the initial word of the object. func (h heapBits) setMarkedNonAtomic() { *h.bitp |= bitMarked << h.shift } -// typeBits returns the heap bits' type bits. -func (h heapBits) typeBits() uint8 { - return (*h.bitp >> (h.shift + typeShift)) & typeMask +// isPointer reports whether the heap bits describe a pointer word. +// h must describe the initial word of the object. +func (h heapBits) isPointer() bool { + return (*h.bitp>>h.shift)&bitPointer != 0 +} + +// hasPointers reports whether the given object has any pointers. +// It must be told how large the object at h is, so that it does not read too +// far into the bitmap. +// h must describe the initial word of the object. +func (h heapBits) hasPointers(size uintptr) bool { + if size == ptrSize { // 1-word objects are always pointers + return true + } + // Otherwise, at least a 2-word object, and at least 2-word aligned, + // so h.shift is either 0 or 4, so we know we can get the bits for the + // first two words out of *h.bitp. + // If either of the first two words is a pointer, not pointer free. + b := uint32(*h.bitp >> h.shift) + if b&(bitPointer|bitPointer<<heapBitsShift) != 0 { + return true + } + if size == 2*ptrSize { + return false + } + // At least a 4-word object. Check scan bit (aka marked bit) in third word. + if h.shift == 0 { + return b&(bitMarked<<(2*heapBitsShift)) != 0 + } + return uint32(*subtract1(h.bitp))&bitMarked != 0 } // isCheckmarked reports whether the heap bits have the checkmarked bit set. -func (h heapBits) isCheckmarked() bool { - typ := h.typeBits() - return typ == typeScalarCheckmarked || typ == typePointerCheckmarked +// It must be told how large the object at h is, because the encoding of the +// checkmark bit varies by size. +// h must describe the initial word of the object. +func (h heapBits) isCheckmarked(size uintptr) bool { + if size == ptrSize { + return (*h.bitp>>h.shift)&bitPointer != 0 + } + // All multiword objects are 2-word aligned, + // so we know that the initial word's 2-bit pair + // and the second word's 2-bit pair are in the + // same heap bitmap byte, *h.bitp. + return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0 } // setCheckmarked sets the checkmarked bit. -func (h heapBits) setCheckmarked() { - typ := h.typeBits() - if typ == typeScalar { - // Clear low type bit to turn 01 into 00. - atomicand8(h.bitp, ^((1 << typeShift) << h.shift)) - } else if typ == typePointer { - // Set low type bit to turn 10 into 11. - atomicor8(h.bitp, (1<<typeShift)<<h.shift) +// It must be told how large the object at h is, because the encoding of the +// checkmark bit varies by size. +// h must describe the initial word of the object. +func (h heapBits) setCheckmarked(size uintptr) { + if size == ptrSize { + atomicor8(h.bitp, bitPointer<<h.shift) + return + } + atomicor8(h.bitp, bitMarked<<(heapBitsShift+h.shift)) +} + +// heapBitsBulkBarrier executes writebarrierptr_nostore +// for every pointer slot in the memory range [p, p+size), +// using the heap bitmap to locate those pointer slots. +// This executes the write barriers necessary after a memmove. +// Both p and size must be pointer-aligned. +// The range [p, p+size) must lie within a single allocation. +// +// Callers should call heapBitsBulkBarrier immediately after +// calling memmove(p, src, size). This function is marked nosplit +// to avoid being preempted; the GC must not stop the goroutine +// betwen the memmove and the execution of the barriers. +// +// The heap bitmap is not maintained for allocations containing +// no pointers at all; any caller of heapBitsBulkBarrier must first +// make sure the underlying allocation contains pointers, usually +// by checking typ.kind&kindNoPointers. +// +//go:nosplit +func heapBitsBulkBarrier(p, size uintptr) { + if (p|size)&(ptrSize-1) != 0 { + throw("heapBitsBulkBarrier: unaligned arguments") + } + if !writeBarrierEnabled || !inheap(p) { + return + } + + h := heapBitsForAddr(p) + for i := uintptr(0); i < size; i += ptrSize { + if h.isPointer() { + x := (*uintptr)(unsafe.Pointer(p + i)) + writebarrierptr_nostore(x, *x) + } + h = h.next() } } @@ -291,99 +395,59 @@ func (h heapBits) initSpan(size, n, total uintptr) { throw("initSpan: unaligned length") } nbyte := total / heapBitmapScale + if ptrSize == 8 && size == ptrSize { + end := h.bitp + bitp := subtractb(end, nbyte-1) + for { + *bitp = bitPointerAll + if bitp == end { + break + } + bitp = add1(bitp) + } + return + } memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) } // initCheckmarkSpan initializes a span for being checkmarked. -// This would be a no-op except that we need to rewrite any -// typeDead bits in the first word of the object into typeScalar -// followed by a typeDead in the second word of the object. +// It clears the checkmark bits, which are set to 1 in normal operation. func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { - if size == ptrSize { + // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. + if ptrSize == 8 && size == ptrSize { + // Checkmark bit is type bit, bottom bit of every 2-bit entry. // Only possible on 64-bit system, since minimum size is 8. - // Must update both top and bottom nibble of each byte. - // There is no second word in these objects, so all we have - // to do is rewrite typeDead to typeScalar by adding the 1<<typeShift bit. + // Must clear type bit (checkmark bit) of every word. + // The type bit is the lower of every two-bit pair. bitp := h.bitp - for i := uintptr(0); i < n; i += 2 { - x := int(*bitp) - - if (x>>typeShift)&typeMask == typeDead { - x += (typeScalar - typeDead) << typeShift - } - if (x>>(4+typeShift))&typeMask == typeDead { - x += (typeScalar - typeDead) << (4 + typeShift) - } - *bitp = uint8(x) - bitp = subtractb(bitp, 1) + for i := uintptr(0); i < n; i += 4 { + *bitp &^= bitPointerAll + bitp = subtract1(bitp) } return } - - // Update bottom nibble for first word of each object. - // If the bottom nibble says typeDead, change to typeScalar - // and clear top nibble to mark as typeDead. - bitp := h.bitp - step := size / heapBitmapScale for i := uintptr(0); i < n; i++ { - x := *bitp - if (x>>typeShift)&typeMask == typeDead { - x += (typeScalar - typeDead) << typeShift - x &= 0x0f // clear top nibble to typeDead - } - bitp = subtractb(bitp, step) + *h.bitp &^= bitMarked << (heapBitsShift + h.shift) + h = h.forward(size / ptrSize) } } -// clearCheckmarkSpan removes all the checkmarks from a span. -// If it finds a multiword object starting with typeScalar typeDead, -// it rewrites the heap bits to the simpler typeDead typeDead. +// clearCheckmarkSpan undoes all the checkmarking in a span. +// The actual checkmark bits are ignored, so the only work to do +// is to fix the pointer bits. (Pointer bits are ignored by scanobject +// but consulted by typedmemmove.) func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { - if size == ptrSize { + // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. + if ptrSize == 8 && size == ptrSize { + // Checkmark bit is type bit, bottom bit of every 2-bit entry. // Only possible on 64-bit system, since minimum size is 8. - // Must update both top and bottom nibble of each byte. - // typeScalarCheckmarked can be left as typeDead, - // but we want to change typeScalar back to typeDead. + // Must clear type bit (checkmark bit) of every word. + // The type bit is the lower of every two-bit pair. bitp := h.bitp - for i := uintptr(0); i < n; i += 2 { - x := int(*bitp) - switch typ := (x >> typeShift) & typeMask; typ { - case typeScalar: - x += (typeDead - typeScalar) << typeShift - case typePointerCheckmarked: - x += (typePointer - typePointerCheckmarked) << typeShift - } - - switch typ := (x >> (4 + typeShift)) & typeMask; typ { - case typeScalar: - x += (typeDead - typeScalar) << (4 + typeShift) - case typePointerCheckmarked: - x += (typePointer - typePointerCheckmarked) << (4 + typeShift) - } - - *bitp = uint8(x) - bitp = subtractb(bitp, 1) - } - return - } - - // Update bottom nibble for first word of each object. - // If the bottom nibble says typeScalarCheckmarked and the top is not typeDead, - // change to typeScalar. Otherwise leave, since typeScalarCheckmarked == typeDead. - // If the bottom nibble says typePointerCheckmarked, change to typePointer. - bitp := h.bitp - step := size / heapBitmapScale - for i := uintptr(0); i < n; i++ { - x := int(*bitp) - switch typ := (x >> typeShift) & typeMask; { - case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead: - x += (typeScalar - typeScalarCheckmarked) << typeShift - case typ == typePointerCheckmarked: - x += (typePointer - typePointerCheckmarked) << typeShift + for i := uintptr(0); i < n; i += 4 { + *bitp |= bitPointerAll + bitp = subtract1(bitp) } - - *bitp = uint8(x) - bitp = subtractb(bitp, step) } } @@ -393,348 +457,1046 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { // bits for the first two words (or one for single-word objects) to typeDead // and then calls f(p), where p is the object's base address. // f is expected to add the object to a free list. +// For non-free objects, heapBitsSweepSpan turns off the marked bit. func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { h := heapBitsForSpan(base) - if size == ptrSize { - // Only possible on 64-bit system, since minimum size is 8. - // Must read and update both top and bottom nibble of each byte. + switch { + default: + throw("heapBitsSweepSpan") + case ptrSize == 8 && size == ptrSize: + // Consider mark bits in all four 2-bit entries of each bitmap byte. bitp := h.bitp - for i := uintptr(0); i < n; i += 2 { - x := int(*bitp) + for i := uintptr(0); i < n; i += 4 { + x := uint32(*bitp) + // Note that unlike the other size cases, we leave the pointer bits set here. + // These are initialized during initSpan when the span is created and left + // in place the whole time the span is used for pointer-sized objects. + // That lets heapBitsSetType avoid an atomic update to set the pointer bit + // during allocation. if x&bitMarked != 0 { x &^= bitMarked } else { - x &^= typeMask << typeShift f(base + i*ptrSize) } - if x&(bitMarked<<4) != 0 { - x &^= bitMarked << 4 + if x&(bitMarked<<heapBitsShift) != 0 { + x &^= bitMarked << heapBitsShift } else { - x &^= typeMask << (4 + typeShift) f(base + (i+1)*ptrSize) } + if x&(bitMarked<<(2*heapBitsShift)) != 0 { + x &^= bitMarked << (2 * heapBitsShift) + } else { + f(base + (i+2)*ptrSize) + } + if x&(bitMarked<<(3*heapBitsShift)) != 0 { + x &^= bitMarked << (3 * heapBitsShift) + } else { + f(base + (i+3)*ptrSize) + } *bitp = uint8(x) - bitp = subtractb(bitp, 1) + bitp = subtract1(bitp) } - return - } - bitp := h.bitp - step := size / heapBitmapScale - for i := uintptr(0); i < n; i++ { - x := int(*bitp) - if x&bitMarked != 0 { - x &^= bitMarked - } else { - x = 0 - f(base + i*size) + case size%(4*ptrSize) == 0: + // Mark bit is in first word of each object. + // Each object starts at bit 0 of a heap bitmap byte. + bitp := h.bitp + step := size / heapBitmapScale + for i := uintptr(0); i < n; i++ { + x := uint32(*bitp) + if x&bitMarked != 0 { + x &^= bitMarked + } else { + x = 0 + f(base + i*size) + } + *bitp = uint8(x) + bitp = subtractb(bitp, step) + } + + case size%(4*ptrSize) == 2*ptrSize: + // Mark bit is in first word of each object, + // but every other object starts halfway through a heap bitmap byte. + // Unroll loop 2x to handle alternating shift count and step size. + bitp := h.bitp + step := size / heapBitmapScale + var i uintptr + for i = uintptr(0); i < n; i += 2 { + x := uint32(*bitp) + if x&bitMarked != 0 { + x &^= bitMarked + } else { + x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift + f(base + i*size) + if size > 2*ptrSize { + x = 0 + } + } + *bitp = uint8(x) + if i+1 >= n { + break + } + bitp = subtractb(bitp, step) + x = uint32(*bitp) + if x&(bitMarked<<(2*heapBitsShift)) != 0 { + x &^= bitMarked << (2 * heapBitsShift) + } else { + x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) + f(base + (i+1)*size) + if size > 2*ptrSize { + *subtract1(bitp) = 0 + } + } + *bitp = uint8(x) + bitp = subtractb(bitp, step+1) } - *bitp = uint8(x) - bitp = subtractb(bitp, step) } } -// TODO(rsc): Clean up the next two functions. - // heapBitsSetType records that the new allocation [x, x+size) // holds in [x, x+dataSize) one or more values of type typ. // (The number of values is given by dataSize / typ.size.) // If dataSize < size, the fragment [x+dataSize, x+size) is // recorded as non-pointer data. +// It is known that the type has pointers somewhere; +// malloc does not call heapBitsSetType when there are no pointers, +// because all free objects are marked as noscan during +// heapBitsSweepSpan. +// There can only be one allocation from a given span active at a time, +// so this code is not racing with other instances of itself, +// and we don't allocate from a span until it has been swept, +// so this code is not racing with heapBitsSweepSpan. +// It is, however, racing with the concurrent GC mark phase, +// which can be setting the mark bit in the leading 2-bit entry +// of an allocated block. The block we are modifying is not quite +// allocated yet, so the GC marker is not racing with updates to x's bits, +// but if the start or end of x shares a bitmap byte with an adjacent +// object, the GC marker is racing with updates to those object's mark bits. func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { - // From here till marked label marking the object as allocated - // and storing type info in the GC bitmap. - h := heapBitsForAddr(x) + const doubleCheck = false // slow but helpful; enable to test modifications to this code - var ti, te uintptr - var ptrmask *uint8 - if size == ptrSize { + // dataSize is always size rounded up to the next malloc size class, + // except in the case of allocating a defer block, in which case + // size is sizeof(_defer{}) (at least 6 words) and dataSize may be + // arbitrarily larger. + // + // The checks for size == ptrSize and size == 2*ptrSize can therefore + // assume that dataSize == size without checking it explicitly. + + if ptrSize == 8 && size == ptrSize { // It's one word and it has pointers, it must be a pointer. - // The bitmap byte is shared with the one-word object - // next to it, and concurrent GC might be marking that - // object, so we must use an atomic update. - atomicor8(h.bitp, typePointer<<(typeShift+h.shift)) + // In general we'd need an atomic update here if the + // concurrent GC were marking objects in this span, + // because each bitmap byte describes 3 other objects + // in addition to the one being allocated. + // However, since all allocated one-word objects are pointers + // (non-pointers are aggregated into tinySize allocations), + // initSpan sets the pointer bits for us. Nothing to do here. + if doubleCheck { + h := heapBitsForAddr(x) + if !h.isPointer() { + throw("heapBitsSetType: pointer bit missing") + } + } return } - if typ.kind&kindGCProg != 0 { - nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize - masksize := nptr - if masksize%2 != 0 { - masksize *= 2 // repeated - } - const typeBitsPerByte = 8 / typeBitsWidth - masksize = masksize * typeBitsPerByte / 8 // 4 bits per word - masksize++ // unroll flag in the beginning - if masksize > maxGCMask && typ.gc[1] != 0 { - // write barriers have not been updated to deal with this case yet. - throw("maxGCMask too small for now") - // If the mask is too large, unroll the program directly - // into the GC bitmap. It's 7 times slower than copying - // from the pre-unrolled mask, but saves 1/16 of type size - // memory for the mask. - systemstack(func() { - unrollgcproginplace_m(unsafe.Pointer(x), typ, size, dataSize) - }) + + h := heapBitsForAddr(x) + ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) + + // Heap bitmap bits for 2-word object are only 4 bits, + // so also shared with objects next to it; use atomic updates. + // This is called out as a special case primarily for 32-bit systems, + // so that on 32-bit systems the code below can assume all objects + // are 4-word aligned (because they're all 16-byte aligned). + if size == 2*ptrSize { + if typ.size == ptrSize { + // We're allocating a block big enough to hold two pointers. + // On 64-bit, that means the actual object must be two pointers, + // or else we'd have used the one-pointer-sized block. + // On 32-bit, however, this is the 8-byte block, the smallest one. + // So it could be that we're allocating one pointer and this was + // just the smallest block available. Distinguish by checking dataSize. + // (In general the number of instances of typ being allocated is + // dataSize/typ.size.) + if ptrSize == 4 && dataSize == ptrSize { + // 1 pointer. + if gcphase == _GCoff { + *h.bitp |= bitPointer << h.shift + } else { + atomicor8(h.bitp, bitPointer<<h.shift) + } + } else { + // 2-element slice of pointer. + if gcphase == _GCoff { + *h.bitp |= (bitPointer | bitPointer<<heapBitsShift) << h.shift + } else { + atomicor8(h.bitp, (bitPointer|bitPointer<<heapBitsShift)<<h.shift) + } + } return } - ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) - // Check whether the program is already unrolled - // by checking if the unroll flag byte is set - maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask))) - if *(*uint8)(unsafe.Pointer(&maskword)) == 0 { - systemstack(func() { - unrollgcprog_m(typ) - }) + // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0. + if doubleCheck { + if typ.size != 2*ptrSize || typ.kind&kindGCProg != 0 { + print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") + throw("heapBitsSetType") + } + } + b := uint32(*ptrmask) + hb := b & 3 + if gcphase == _GCoff { + *h.bitp |= uint8(hb << h.shift) + } else { + atomicor8(h.bitp, uint8(hb<<h.shift)) } - ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte + return + } + + // Copy from 1-bit ptrmask into 2-bit bitmap. + // The basic approach is to use a single uintptr as a bit buffer, + // alternating between reloading the buffer and writing bitmap bytes. + // In general, one load can supply two bitmap byte writes. + // This is a lot of lines of code, but it compiles into relatively few + // machine instructions. + + var ( + // Ptrmask input. + p *byte // last ptrmask byte read + b uintptr // ptrmask bits already loaded + nb uintptr // number of bits in b at next read + endp *byte // final ptrmask byte to read (then repeat) + endnb uintptr // number of valid bits in *endp + pbits uintptr // alternate source of bits + + // Heap bitmap output. + w uintptr // words processed + nw uintptr // number of words to process + hbitp *byte // next heap bitmap byte to write + hb uintptr // bits being prepared for *hbitp + ) + + hbitp = h.bitp + + // Handle GC program. Delayed until this part of the code + // so that we can use the same double-checking mechanism + // as the 1-bit case. Nothing above could have encountered + // GC programs: the cases were all too small. + if typ.kind&kindGCProg != 0 { + heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) + if doubleCheck { + // Double-check the heap bits written by GC program + // by running the GC program to create a 1-bit pointer mask + // and then jumping to the double-check code below. + // This doesn't catch bugs shared between the 1-bit and 4-bit + // GC program execution, but it does catch mistakes specific + // to just one of those and bugs in heapBitsSetTypeGCProg's + // implementation of arrays. + lock(&debugPtrmask.lock) + if debugPtrmask.data == nil { + debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) + } + ptrmask = debugPtrmask.data + runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) + goto Phase4 + } + return + } + + // Note about sizes: + // + // typ.size is the number of words in the object, + // and typ.ptrdata is the number of words in the prefix + // of the object that contains pointers. That is, the final + // typ.size - typ.ptrdata words contain no pointers. + // This allows optimization of a common pattern where + // an object has a small header followed by a large scalar + // buffer. If we know the pointers are over, we don't have + // to scan the buffer's heap bitmap at all. + // The 1-bit ptrmasks are sized to contain only bits for + // the typ.ptrdata prefix, zero padded out to a full byte + // of bitmap. This code sets nw (below) so that heap bitmap + // bits are only written for the typ.ptrdata prefix; if there is + // more room in the allocated object, the next heap bitmap + // entry is a 00, indicating that there are no more pointers + // to scan. So only the ptrmask for the ptrdata bytes is needed. + // + // Replicated copies are not as nice: if there is an array of + // objects with scalar tails, all but the last tail does have to + // be initialized, because there is no way to say "skip forward". + // However, because of the possibility of a repeated type with + // size not a multiple of 4 pointers (one heap bitmap byte), + // the code already must handle the last ptrmask byte specially + // by treating it as containing only the bits for endnb pointers, + // where endnb <= 4. We represent large scalar tails that must + // be expanded in the replication by setting endnb larger than 4. + // This will have the effect of reading many bits out of b, + // but once the real bits are shifted out, b will supply as many + // zero bits as we try to read, which is exactly what we need. + + p = ptrmask + if typ.size < dataSize { + // Filling in bits for an array of typ. + // Set up for repetition of ptrmask during main loop. + // Note that ptrmask describes only a prefix of + const maxBits = ptrSize*8 - 7 + if typ.ptrdata/ptrSize <= maxBits { + // Entire ptrmask fits in uintptr with room for a byte fragment. + // Load into pbits and never read from ptrmask again. + // This is especially important when the ptrmask has + // fewer than 8 bits in it; otherwise the reload in the middle + // of the Phase 2 loop would itself need to loop to gather + // at least 8 bits. + + // Accumulate ptrmask into b. + // ptrmask is sized to describe only typ.ptrdata, but we record + // it as describing typ.size bytes, since all the high bits are zero. + nb = typ.ptrdata / ptrSize + for i := uintptr(0); i < nb; i += 8 { + b |= uintptr(*p) << i + p = add1(p) + } + nb = typ.size / ptrSize + + // Replicate ptrmask to fill entire pbits uintptr. + // Doubling and truncating is fewer steps than + // iterating by nb each time. (nb could be 1.) + // Since we loaded typ.ptrdata/ptrSize bits + // but are pretending to have typ.size/ptrSize, + // there might be no replication necessary/possible. + pbits = b + endnb = nb + if nb+nb <= maxBits { + for endnb <= ptrSize*8 { + pbits |= pbits << endnb + endnb += endnb + } + // Truncate to a multiple of original ptrmask. + endnb = maxBits / nb * nb + pbits &= 1<<endnb - 1 + b = pbits + nb = endnb + } + + // Clear p and endp as sentinel for using pbits. + // Checked during Phase 2 loop. + p = nil + endp = nil + } else { + // Ptrmask is larger. Read it multiple times. + n := (typ.ptrdata/ptrSize+7)/8 - 1 + endp = addb(ptrmask, n) + endnb = typ.size/ptrSize - n*8 + } + } + if p != nil { + b = uintptr(*p) + p = add1(p) + nb = 8 + } + + if typ.size == dataSize { + // Single entry: can stop once we reach the non-pointer data. + nw = typ.ptrdata / ptrSize } else { - ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask + // Repeated instances of typ in an array. + // Have to process first N-1 entries in full, but can stop + // once we reach the non-pointer data in the final entry. + nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / ptrSize } - if size == 2*ptrSize { - // h.shift is 0 for all sizes > ptrSize. - *h.bitp = *ptrmask + if nw == 0 { + // No pointers! Caller was supposed to check. + println("runtime: invalid type ", *typ._string) + throw("heapBitsSetType: called with non-pointer type") return } - te = uintptr(typ.size) / ptrSize - // If the type occupies odd number of words, its mask is repeated. - if te%2 == 0 { - te /= 2 + if nw < 2 { + // Must write at least 2 words, because the "no scan" + // encoding doesn't take effect until the third word. + nw = 2 } - // Copy pointer bitmask into the bitmap. - // TODO(rlh): add comment addressing the following concerns: - // If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned? - // And if type occupies and odd number of words, why are we only going through half - // of ptrmask and why don't we have to shift everything by 4 on odd iterations? - for i := uintptr(0); i < dataSize; i += 2 * ptrSize { - v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti)) - ti++ - if ti == te { - ti = 0 + // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). + // The leading byte is special because it contains the bits for words 0 and 1, + // which do not have the marked bits set. + // The leading half-byte is special because it's a half a byte and must be + // manipulated atomically. + switch { + default: + throw("heapBitsSetType: unexpected shift") + + case h.shift == 0: + // Ptrmask and heap bitmap are aligned. + // Handle first byte of bitmap specially. + // The first byte we write out contains the first two words of the object. + // In those words, the mark bits are mark and checkmark, respectively, + // and must not be set. In all following words, we want to set the mark bit + // as a signal that the object continues to the next 2-bit entry in the bitmap. + hb = b & bitPointerAll + hb |= bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) + if w += 4; w >= nw { + goto Phase3 } - if i+ptrSize == dataSize { - v &^= typeMask << (4 + typeShift) + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 + nb -= 4 + + case ptrSize == 8 && h.shift == 2: + // Ptrmask and heap bitmap are misaligned. + // The bits for the first two words are in a byte shared with another object + // and must be updated atomically. + // NOTE(rsc): The atomic here may not be necessary. + // We took care of 1-word and 2-word objects above, + // so this is at least a 6-word object, so our start bits + // are shared only with the type bits of another object, + // not with its mark bit. Since there is only one allocation + // from a given span at a time, we should be able to set + // these bits non-atomically. Not worth the risk right now. + hb = (b & 3) << (2 * heapBitsShift) + b >>= 2 + nb -= 2 + // Note: no bitMarker in hb because the first two words don't get markers from us. + if gcphase == _GCoff { + *hbitp |= uint8(hb) + } else { + atomicor8(hbitp, uint8(hb)) + } + hbitp = subtract1(hbitp) + if w += 2; w >= nw { + // We know that there is more data, because we handled 2-word objects above. + // This must be at least a 6-word object. If we're out of pointer words, + // mark no scan in next bitmap byte and finish. + hb = 0 + w += 4 + goto Phase3 + } + } + + // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. + // The loop computes the bits for that last write but does not execute the write; + // it leaves the bits in hb for processing by phase 3. + // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to + // use in the first half of the loop right now, and then we only adjust nb explicitly + // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. + nb -= 4 + for { + // Emit bitmap byte. + // b has at least nb+4 bits, with one exception: + // if w+4 >= nw, then b has only nw-w bits, + // but we'll stop at the break and then truncate + // appropriately in Phase 3. + hb = b & bitPointerAll + hb |= bitMarkedAll + if w += 4; w >= nw { + break } + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 - *h.bitp = v - h.bitp = subtractb(h.bitp, 1) + // Load more bits. b has nb right now. + if p != endp { + // Fast path: keep reading from ptrmask. + // nb unmodified: we just loaded 8 bits, + // and the next iteration will consume 8 bits, + // leaving us with the same nb the next time we're here. + b |= uintptr(*p) << nb + p = add1(p) + } else if p == nil { + // Almost as fast path: track bit count and refill from pbits. + // For short repetitions. + if nb < 8 { + b |= pbits << nb + nb += endnb + } + nb -= 8 // for next iteration + } else { + // Slow path: reached end of ptrmask. + // Process final partial byte and rewind to start. + b |= uintptr(*p) << nb + nb += endnb + if nb < 8 { + b |= uintptr(*ptrmask) << nb + p = add1(ptrmask) + } else { + nb -= 8 + p = ptrmask + } + } + + // Emit bitmap byte. + hb = b & bitPointerAll + hb |= bitMarkedAll + if w += 4; w >= nw { + break + } + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 } - if dataSize%(2*ptrSize) == 0 && dataSize < size { - // Mark the word after last object's word as typeDead. - *h.bitp = 0 + +Phase3: + // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. + if w > nw { + // Counting the 4 entries in hb not yet written to memory, + // there are more entries than possible pointer slots. + // Discard the excess entries (can't be more than 3). + mask := uintptr(1)<<(4-(w-nw)) - 1 + hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits + } + + // Change nw from counting possibly-pointer words to total words in allocation. + nw = size / ptrSize + + // Write whole bitmap bytes. + // The first is hb, the rest are zero. + if w <= nw { + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + hb = 0 // for possible final half-byte below + for w += 4; w <= nw; w += 4 { + *hbitp = 0 + hbitp = subtract1(hbitp) + } + } + + // Write final partial bitmap byte if any. + // We know w > nw, or else we'd still be in the loop above. + // It can be bigger only due to the 4 entries in hb that it counts. + // If w == nw+4 then there's nothing left to do: we wrote all nw entries + // and can discard the 4 sitting in hb. + // But if w == nw+2, we need to write first two in hb. + // The byte is shared with the next object so we may need an atomic. + if w == nw+2 { + if gcphase == _GCoff { + *hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb) + } else { + atomicand8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift)) + atomicor8(hbitp, uint8(hb)) + } + } + +Phase4: + // Phase 4: all done, but perhaps double check. + if doubleCheck { + end := heapBitsForAddr(x + size) + if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { + println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size) + print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") + print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") + h0 := heapBitsForAddr(x) + print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") + print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") + throw("bad heapBitsSetType") + } + + // Double-check that bits to be written were written correctly. + // Does not check that other bits were not written, unfortunately. + h := heapBitsForAddr(x) + nptr := typ.ptrdata / ptrSize + ndata := typ.size / ptrSize + count := dataSize / typ.size + totalptr := ((count-1)*typ.size + typ.ptrdata) / ptrSize + for i := uintptr(0); i < size/ptrSize; i++ { + j := i % ndata + var have, want uint8 + have = (*h.bitp >> h.shift) & (bitPointer | bitMarked) + if i >= totalptr { + want = 0 // deadmarker + if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { + want = bitMarked + } + } else { + if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { + want |= bitPointer + } + if i >= 2 { + want |= bitMarked + } else { + have &^= bitMarked + } + } + if have != want { + println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size) + print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") + print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") + print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") + h0 := heapBitsForAddr(x) + print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") + print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") + print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") + println("at word", i, "offset", i*ptrSize, "have", have, "want", want) + if typ.kind&kindGCProg != 0 { + println("GC program:") + dumpGCProg(addb(typ.gcdata, 4)) + } + throw("bad heapBitsSetType") + } + h = h.next() + } + if ptrmask == debugPtrmask.data { + unlock(&debugPtrmask.lock) + } } } -// typeBitmapInHeapBitmapFormat returns a bitmap holding -// the type bits for the type typ, but expanded into heap bitmap format -// to make it easier to copy them into the heap bitmap. -// TODO(rsc): Change clients to use the type bitmap format instead, -// which can be stored more densely (especially if we drop to 1 bit per pointer). +var debugPtrmask struct { + lock mutex + data *byte +} + +// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. +// progSize is the size of the memory described by the program. +// elemSize is the size of the element that the GC program describes (a prefix of). +// dataSize is the total size of the intended data, a multiple of elemSize. +// allocSize is the total size of the allocated memory. // -// To make it easier to replicate the bits when filling out the heap -// bitmap for an array of typ, if typ holds an odd number of words -// (meaning the heap bitmap would stop halfway through a byte), -// typeBitmapInHeapBitmapFormat returns the bitmap for two instances -// of typ in a row. -// TODO(rsc): Remove doubling. -func typeBitmapInHeapBitmapFormat(typ *_type) []uint8 { - var ptrmask *uint8 - nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize - if typ.kind&kindGCProg != 0 { - masksize := nptr - if masksize%2 != 0 { - masksize *= 2 // repeated +// GC programs are only used for large allocations. +// heapBitsSetType requires that allocSize is a multiple of 4 words, +// so that the relevant bitmap bytes are not shared with surrounding +// objects and need not be accessed with atomic instructions. +func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { + if ptrSize == 8 && allocSize%(4*ptrSize) != 0 { + // Alignment will be wrong. + throw("heapBitsSetTypeGCProg: small allocation") + } + var totalBits uintptr + if elemSize == dataSize { + totalBits = runGCProg(prog, nil, h.bitp, 2) + if totalBits*ptrSize != progSize { + println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) + throw("heapBitsSetTypeGCProg: unexpected bit count") + } + } else { + count := dataSize / elemSize + + // Piece together program trailer to run after prog that does: + // literal(0) + // repeat(1, elemSize-progSize-1) // zeros to fill element size + // repeat(elemSize, count-1) // repeat that element for count + // This zero-pads the data remaining in the first element and then + // repeats that first element to fill the array. + var trailer [40]byte // 3 varints (max 10 each) + some bytes + i := 0 + if n := elemSize/ptrSize - progSize/ptrSize; n > 0 { + // literal(0) + trailer[i] = 0x01 + i++ + trailer[i] = 0 + i++ + if n > 1 { + // repeat(1, n-1) + trailer[i] = 0x81 + i++ + n-- + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ + } + trailer[i] = byte(n) + i++ + } } - const typeBitsPerByte = 8 / typeBitsWidth - masksize = masksize * typeBitsPerByte / 8 // 4 bits per word - masksize++ // unroll flag in the beginning - if masksize > maxGCMask && typ.gc[1] != 0 { - // write barriers have not been updated to deal with this case yet. - throw("maxGCMask too small for now") + // repeat(elemSize/ptrSize, count-1) + trailer[i] = 0x80 + i++ + n := elemSize / ptrSize + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ } - ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) - // Check whether the program is already unrolled - // by checking if the unroll flag byte is set - maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask))) - if *(*uint8)(unsafe.Pointer(&maskword)) == 0 { - systemstack(func() { - unrollgcprog_m(typ) - }) + trailer[i] = byte(n) + i++ + n = count + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ } - ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte - } else { - ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask + trailer[i] = byte(n) + i++ + trailer[i] = 0 + i++ + + runGCProg(prog, &trailer[0], h.bitp, 2) + + // Even though we filled in the full array just now, + // record that we only filled in up to the ptrdata of the + // last element. This will cause the code below to + // memclr the dead section of the final array element, + // so that scanobject can stop early in the final element. + totalBits = (elemSize*(count-1) + progSize) / ptrSize + } + endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) + endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) + memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) +} + +// progToPointerMask returns the 1-bit pointer mask output by the GC program prog. +// size the size of the region described by prog, in bytes. +// The resulting bitvector will have no more than size/ptrSize bits. +func progToPointerMask(prog *byte, size uintptr) bitvector { + n := (size/ptrSize + 7) / 8 + x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] + x[len(x)-1] = 0xa1 // overflow check sentinel + n = runGCProg(prog, nil, &x[0], 1) + if x[len(x)-1] != 0xa1 { + throw("progToPointerMask: overflow") } - return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2] + return bitvector{int32(n), &x[0]} } -// GC type info programs +// Packed GC pointer bitmaps, aka GC programs. // -// TODO(rsc): Clean up and enable. +// For large types containing arrays, the type information has a +// natural repetition that can be encoded to save space in the +// binary and in the memory representation of the type information. +// +// The encoding is a simple Lempel-Ziv style bytecode machine +// with the following instructions: +// +// 00000000: stop +// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes +// 10000000 n c: repeat the previous n bits c times; n, c are varints +// 1nnnnnnn c: repeat the previous n bits c times; c is a varint -const ( - // GC type info programs. - // The programs allow to store type info required for GC in a compact form. - // Most importantly arrays take O(1) space instead of O(n). - // The program grammar is: - // - // Program = {Block} "insEnd" - // Block = Data | Array - // Data = "insData" DataSize DataBlock - // DataSize = int // size of the DataBlock in bit pairs, 1 byte - // DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes - // Array = "insArray" ArrayLen Block "insArrayEnd" - // ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch) - // - // Each instruction (insData, insArray, etc) is 1 byte. - // For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; } - // the program looks as: - // - // insData 3 (typePointer typeScalar typeScalar) - // insArray 20 insData 2 (typeScalar typePointer) insArrayEnd insEnd - // - // Total size of the program is 17 bytes (13 bytes on 32-bits). - // The corresponding GC mask would take 43 bytes (it would be repeated - // because the type has odd number of words). - insData = 1 + iota - insArray - insArrayEnd - insEnd +// runGCProg executes the GC program prog, and then trailer if non-nil, +// writing to dst with entries of the given size. +// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. +// If size == 2, dst is the 2-bit heap bitmap, and writes move backward +// starting at dst (because the heap bitmap does). In this case, the caller guarantees +// that only whole bytes in dst need to be written. +// +// runGCProg returns the number of 1- or 2-bit entries written to memory. +func runGCProg(prog, trailer, dst *byte, size int) uintptr { + dstStart := dst - // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively. - maxGCMask = 65536 // TODO(rsc): change back to 64 -) + // Bits waiting to be written to memory. + var bits uintptr + var nbits uintptr -// Recursively unrolls GC program in prog. -// mask is where to store the result. -// If inplace is true, store the result not in mask but in the heap bitmap for mask. -// ppos is a pointer to position in mask, in bits. -// sparse says to generate 4-bits per word mask for heap (1-bit for data/bss otherwise). -//go:nowritebarrier -func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte { - pos := *ppos - mask := (*[1 << 30]byte)(unsafe.Pointer(maskp)) + p := prog +Run: for { - switch *prog { - default: - throw("unrollgcprog: unknown instruction") + // Flush accumulated full bytes. + // The rest of the loop assumes that nbits <= 7. + for ; nbits >= 8; nbits -= 8 { + if size == 1 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } else { + v := bits&bitPointerAll | bitMarkedAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + v = bits&bitPointerAll | bitMarkedAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + } - case insData: - prog = addb(prog, 1) - siz := int(*prog) - prog = addb(prog, 1) - p := (*[1 << 30]byte)(unsafe.Pointer(prog)) - for i := 0; i < siz; i++ { - const typeBitsPerByte = 8 / typeBitsWidth - v := p[i/typeBitsPerByte] - v >>= (uint(i) % typeBitsPerByte) * typeBitsWidth - v &= typeMask - if inplace { - // Store directly into GC bitmap. - h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos]))) - if h.shift == 0 { - *h.bitp = v << typeShift - } else { - *h.bitp |= v << (4 + typeShift) - } - pos += ptrSize - } else if sparse { - // 4-bits per word, type bits in high bits - v <<= (pos % 8) + typeShift - mask[pos/8] |= v - pos += heapBitsWidth + // Process one instruction. + inst := uintptr(*p) + p = add1(p) + n := inst & 0x7F + if inst&0x80 == 0 { + // Literal bits; n == 0 means end of program. + if n == 0 { + // Program is over; continue in trailer if present. + if trailer != nil { + //println("trailer") + p = trailer + trailer = nil + continue + } + //println("done") + break Run + } + //println("lit", n, dst) + nbyte := n / 8 + for i := uintptr(0); i < nbyte; i++ { + bits |= uintptr(*p) << nbits + p = add1(p) + if size == 1 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 } else { - // 1 bit per word, for data/bss bitmap - v >>= 1 // convert typePointer to 1, others to 0 - mask[pos/8] |= v << (pos % 8) - pos++ + v := bits&0xf | bitMarkedAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + v = bits&0xf | bitMarkedAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + } + if n %= 8; n > 0 { + bits |= uintptr(*p) << nbits + p = add1(p) + nbits += n + } + continue Run + } + + // Repeat. If n == 0, it is encoded in a varint in the next bytes. + if n == 0 { + for off := uint(0); ; off += 7 { + x := uintptr(*p) + p = add1(p) + n |= (x & 0x7F) << off + if x&0x80 == 0 { + break + } + } + } + + // Count is encoded in a varint in the next bytes. + c := uintptr(0) + for off := uint(0); ; off += 7 { + x := uintptr(*p) + p = add1(p) + c |= (x & 0x7F) << off + if x&0x80 == 0 { + break + } + } + c *= n // now total number of bits to copy + + // If the number of bits being repeated is small, load them + // into a register and use that register for the entire loop + // instead of repeatedly reading from memory. + // Handling fewer than 8 bits here makes the general loop simpler. + // The cutoff is ptrSize*8 - 7 to guarantee that when we add + // the pattern to a bit buffer holding at most 7 bits (a partial byte) + // it will not overflow. + src := dst + const maxBits = ptrSize*8 - 7 + if n <= maxBits { + // Start with bits in output buffer. + pattern := bits + npattern := nbits + + // If we need more bits, fetch them from memory. + if size == 1 { + src = subtract1(src) + for npattern < n { + pattern <<= 8 + pattern |= uintptr(*src) + src = subtract1(src) + npattern += 8 + } + } else { + src = add1(src) + for npattern < n { + pattern <<= 4 + pattern |= uintptr(*src) & 0xf + src = add1(src) + npattern += 4 } } - prog = addb(prog, round(uintptr(siz)*typeBitsWidth, 8)/8) - case insArray: - prog = (*byte)(add(unsafe.Pointer(prog), 1)) - siz := uintptr(0) - for i := uintptr(0); i < ptrSize; i++ { - siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1))) + // We started with the whole bit output buffer, + // and then we loaded bits from whole bytes. + // Either way, we might now have too many instead of too few. + // Discard the extra. + if npattern > n { + pattern >>= npattern - n + npattern = n } - prog = (*byte)(add(unsafe.Pointer(prog), ptrSize)) - var prog1 *byte - for i := uintptr(0); i < siz; i++ { - prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse) + + // Replicate pattern to at most maxBits. + if npattern == 1 { + // One bit being repeated. + // If the bit is 1, make the pattern all 1s. + // If the bit is 0, the pattern is already all 0s, + // but we can claim that the number of bits + // in the word is equal to the number we need (c), + // because right shift of bits will zero fill. + if pattern == 1 { + pattern = 1<<maxBits - 1 + npattern = maxBits + } else { + npattern = c + } + } else { + b := pattern + nb := npattern + if nb+nb <= maxBits { + // Double pattern until the whole uintptr is filled. + for nb <= ptrSize*8 { + b |= b << nb + nb += nb + } + // Trim away incomplete copy of original pattern in high bits. + // TODO(rsc): Replace with table lookup or loop on systems without divide? + nb = maxBits / npattern * npattern + b &= 1<<nb - 1 + pattern = b + npattern = nb + } } - if *prog1 != insArrayEnd { - throw("unrollgcprog: array does not end with insArrayEnd") + + // Add pattern to bit buffer and flush bit buffer, c/npattern times. + // Since pattern contains >8 bits, there will be full bytes to flush + // on each iteration. + for ; c >= npattern; c -= npattern { + bits |= pattern << nbits + nbits += npattern + if size == 1 { + for nbits >= 8 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + nbits -= 8 + } + } else { + for nbits >= 4 { + *dst = uint8(bits&0xf | bitMarkedAll) + dst = subtract1(dst) + bits >>= 4 + nbits -= 4 + } + } } - prog = (*byte)(add(unsafe.Pointer(prog1), 1)) - case insArrayEnd, insEnd: - *ppos = pos - return prog + // Add final fragment to bit buffer. + if c > 0 { + pattern &= 1<<c - 1 + bits |= pattern << nbits + nbits += c + } + continue Run } - } -} - -// Unrolls GC program prog for data/bss, returns dense GC mask. -func unrollglobgcprog(prog *byte, size uintptr) bitvector { - masksize := round(round(size, ptrSize)/ptrSize, 8) / 8 - mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys)) - mask[masksize] = 0xa1 - pos := uintptr(0) - prog = unrollgcprog1(&mask[0], prog, &pos, false, false) - if pos != size/ptrSize { - print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n") - throw("unrollglobgcprog: bad program size") - } - if *prog != insEnd { - throw("unrollglobgcprog: program does not end with insEnd") - } - if mask[masksize] != 0xa1 { - throw("unrollglobgcprog: overflow") - } - return bitvector{int32(masksize * 8), &mask[0]} -} -func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) { - // TODO(rsc): Explain why these non-atomic updates are okay. - pos := uintptr(0) - prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) - for pos != size0 { - unrollgcprog1((*byte)(v), prog, &pos, true, true) + // Repeat; n too large to fit in a register. + // Since nbits <= 7, we know the first few bytes of repeated data + // are already written to memory. + off := n - nbits // n > nbits because n > maxBits and nbits <= 7 + if size == 1 { + // Leading src fragment. + src = subtractb(src, (off+7)/8) + if frag := off & 7; frag != 0 { + bits |= uintptr(*src) >> (8 - frag) << nbits + src = add1(src) + nbits += frag + c -= frag + } + // Main loop: load one byte, write another. + // The bits are rotating through the bit buffer. + for i := c / 8; i > 0; i-- { + bits |= uintptr(*src) << nbits + src = add1(src) + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } + // Final src fragment. + if c %= 8; c > 0 { + bits |= (uintptr(*src) & (1<<c - 1)) << nbits + nbits += c + } + } else { + // Leading src fragment. + src = addb(src, (off+3)/4) + if frag := off & 3; frag != 0 { + bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits + src = subtract1(src) + nbits += frag + c -= frag + } + // Main loop: load one byte, write another. + // The bits are rotating through the bit buffer. + for i := c / 4; i > 0; i-- { + bits |= (uintptr(*src) & 0xf) << nbits + src = subtract1(src) + *dst = uint8(bits&0xf | bitMarkedAll) + dst = subtract1(dst) + bits >>= 4 + } + // Final src fragment. + if c %= 4; c > 0 { + bits |= (uintptr(*src) & (1<<c - 1)) << nbits + nbits += c + } + } } - // Mark first word as bitAllocated. - // Mark word after last as typeDead. - if size0 < size { - h := heapBitsForAddr(uintptr(v) + size0) - *h.bitp &^= typeMask << typeShift + // Write any final bits out, using full-byte writes, even for the final byte. + var totalBits uintptr + if size == 1 { + totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits + nbits += -nbits & 7 + for ; nbits > 0; nbits -= 8 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } + } else { + totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits + nbits += -nbits & 3 + for ; nbits > 0; nbits -= 4 { + v := bits&0xf | bitMarkedAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + // Clear the mark bits in the first two entries. + // They are the actual mark and checkmark bits, + // not non-dead markers. It simplified the code + // above to set the marker in every bit written and + // then clear these two as a special case at the end. + *dstStart &^= bitMarked | bitMarked<<heapBitsShift } + return totalBits } -var unroll mutex - -// Unrolls GC program in typ.gc[1] into typ.gc[0] -//go:nowritebarrier -func unrollgcprog_m(typ *_type) { - lock(&unroll) - mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0]))) - if *mask == 0 { - pos := uintptr(8) // skip the unroll flag - prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) - prog = unrollgcprog1(mask, prog, &pos, false, true) - if *prog != insEnd { - throw("unrollgcprog: program does not end with insEnd") +func dumpGCProg(p *byte) { + nptr := 0 + for { + x := *p + p = add1(p) + if x == 0 { + print("\t", nptr, " end\n") + break } - if typ.size/ptrSize%2 != 0 { - // repeat the program - prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) - unrollgcprog1(mask, prog, &pos, false, true) + if x&0x80 == 0 { + print("\t", nptr, " lit ", x, ":") + n := int(x+7) / 8 + for i := 0; i < n; i++ { + print(" ", hex(*p)) + p = add1(p) + } + print("\n") + nptr += int(x) + } else { + nbit := int(x &^ 0x80) + if nbit == 0 { + for nb := uint(0); ; nb += 7 { + x := *p + p = add1(p) + nbit |= int(x&0x7f) << nb + if x&0x80 == 0 { + break + } + } + } + count := 0 + for nb := uint(0); ; nb += 7 { + x := *p + p = add1(p) + count |= int(x&0x7f) << nb + if x&0x80 == 0 { + break + } + } + print("\t", nptr, " repeat ", nbit, " × ", count, "\n") + nptr += nbit * count } - - // atomic way to say mask[0] = 1 - atomicor8(mask, 1) } - unlock(&unroll) } // Testing. @@ -748,36 +1510,46 @@ func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { return true } -// Returns GC type info for object p for testing. -func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { - *mask = nil - *len = 0 +// gcbits returns the GC type info for x, for testing. +// The result is the bitmap entries (0 or 1), one entry per byte. +//go:linkname reflect_gcbits reflect.gcbits +func reflect_gcbits(x interface{}) []byte { + ret := getgcmask(x) + typ := (*ptrtype)(unsafe.Pointer((*eface)(unsafe.Pointer(&x))._type)).elem + nptr := typ.ptrdata / ptrSize + for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { + ret = ret[:len(ret)-1] + } + return ret +} - // data +// Returns GC type info for object p for testing. +func getgcmask(ep interface{}) (mask []byte) { + e := *(*eface)(unsafe.Pointer(&ep)) + p := e.data + t := e._type + // data or bss for datap := &firstmoduledata; datap != nil; datap = datap.next { + // data if datap.data <= uintptr(p) && uintptr(p) < datap.edata { + bitmap := datap.gcdatamask.bytedata n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.data) / ptrSize - bits := (*addb(datap.gcdatamask.bytedata, off/8) >> (off % 8)) & 1 - bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 } return } // bss if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { + bitmap := datap.gcbssmask.bytedata n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.bss) / ptrSize - bits := (*addb(datap.gcbssmask.bytedata, off/8) >> (off % 8)) & 1 - bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 } return } @@ -787,47 +1559,58 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { var n uintptr var base uintptr if mlookup(uintptr(p), &base, &n, nil) != 0 { - *len = n / ptrSize - *mask = &make([]byte, *len)[0] + mask = make([]byte, n/ptrSize) for i := uintptr(0); i < n; i += ptrSize { - bits := heapBitsForAddr(base + i).typeBits() - *addb(*mask, i/ptrSize) = bits + hbits := heapBitsForAddr(base + i) + if hbits.isPointer() { + mask[i/ptrSize] = 1 + } + if i >= 2*ptrSize && !hbits.isMarked() { + mask = mask[:i/ptrSize] + break + } } return } // stack - var frame stkframe - frame.sp = uintptr(p) - _g_ := getg() - gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) - if frame.fn != nil { - f := frame.fn - targetpc := frame.continpc - if targetpc == 0 { - return - } - if targetpc != f.entry { - targetpc-- - } - pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc) - if pcdata == -1 { - return - } - stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) - if stkmap == nil || stkmap.n <= 0 { - return - } - bv := stackmapdata(stkmap, pcdata) - size := uintptr(bv.n) * ptrSize - n := (*ptrtype)(unsafe.Pointer(t)).elem.size - *len = n / ptrSize - *mask = &make([]byte, *len)[0] - for i := uintptr(0); i < n; i += ptrSize { - off := (uintptr(p) + i - frame.varp + size) / ptrSize - bits := (*addb(bv.bytedata, off/8) >> (off % 8)) & 1 - bits += 1 // convert 1-bit to 2-bit - *addb(*mask, i/ptrSize) = bits + if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi { + var frame stkframe + frame.sp = uintptr(p) + _g_ := getg() + gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) + if frame.fn != nil { + f := frame.fn + targetpc := frame.continpc + if targetpc == 0 { + return + } + if targetpc != f.entry { + targetpc-- + } + pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc) + if pcdata == -1 { + return + } + stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) + if stkmap == nil || stkmap.n <= 0 { + return + } + bv := stackmapdata(stkmap, pcdata) + size := uintptr(bv.n) * ptrSize + n := (*ptrtype)(unsafe.Pointer(t)).elem.size + mask = make([]byte, n/ptrSize) + for i := uintptr(0); i < n; i += ptrSize { + bitmap := bv.bytedata + off := (uintptr(p) + i - frame.varp + size) / ptrSize + mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 + } } + return } + + // otherwise, not something the GC knows about. + // possibly read-only data, like malloc(0). + // must not have pointers + return } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 9bd36d1a5e..db5b2dcd36 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -127,13 +127,22 @@ const ( _RootCount = 5 ) -// heapminimum is the minimum number of bytes in the heap. -// This cleans up the corner case of where we have a very small live set but a lot -// of allocations and collecting every GOGC * live set is expensive. -// heapminimum is adjust by multiplying it by GOGC/100. In -// the special case of GOGC==0 this will set heapminimum to 0 resulting -// collecting at every allocation even when the heap size is small. -var heapminimum = uint64(4 << 20) +// heapminimum is the minimum heap size at which to trigger GC. +// For small heaps, this overrides the usual GOGC*live set rule. +// +// When there is a very small live set but a lot of allocation, simply +// collecting when the heap reaches GOGC*live results in many GC +// cycles and high total per-GC overhead. This minimum amortizes this +// per-GC overhead while keeping the heap reasonably small. +// +// During initialization this is set to 4MB*GOGC/100. In the case of +// GOGC==0, this will set heapminimum to 0, resulting in constant +// collection even when the heap size is small, which is useful for +// debugging. +var heapminimum uint64 = defaultHeapMinimum + +// defaultHeapMinimum is the value of heapminimum for GOGC==100. +const defaultHeapMinimum = 4 << 20 // Initialized from $GOGC. GOGC=off means no GC. var gcpercent int32 @@ -146,8 +155,8 @@ func gcinit() { work.markfor = parforalloc(_MaxGcproc) _ = setGCPercent(readgogc()) for datap := &firstmoduledata; datap != nil; datap = datap.next { - datap.gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data) - datap.gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss) + datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data) + datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss) } memstats.next_gc = heapminimum } @@ -180,7 +189,7 @@ func setGCPercent(in int32) (out int32) { in = -1 } gcpercent = in - heapminimum = heapminimum * uint64(gcpercent) / 100 + heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 unlock(&mheap_.lock) return out } @@ -197,7 +206,6 @@ var gcBlackenEnabled uint32 const ( _GCoff = iota // GC not running, write barrier disabled - _GCquiesce // unused state _GCstw // unused state _GCscan // GC collecting roots into workbufs, write barrier disabled _GCmark // GC marking from workbufs, write barrier ENABLED @@ -208,7 +216,7 @@ const ( //go:nosplit func setGCPhase(x uint32) { atomicstore(&gcphase, x) - writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled + writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination } // gcMarkWorkerMode represents the mode that a concurrent mark worker @@ -699,11 +707,11 @@ const ( func startGC(mode int) { // The gc is turned off (via enablegc) until the bootstrap has completed. // Also, malloc gets called in the guts of a number of libraries that might be - // holding locks. To avoid deadlocks during stoptheworld, don't bother + // holding locks. To avoid deadlocks during stop-the-world, don't bother // trying to run gc while holding a lock. The next mallocgc without a lock // will do the gc instead. mp := acquirem() - if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 { + if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" || !memstats.enablegc || panicking != 0 || gcpercent < 0 { releasem(mp) return } @@ -797,7 +805,7 @@ func gc(mode int) { traceGCStart() } - systemstack(stoptheworld) + systemstack(stopTheWorldWithSema) systemstack(finishsweep_m) // finish sweep before we start concurrent scan. // clearpools before we start the GC. If we wait they memory will not be // reclaimed until the next GC cycle. @@ -814,7 +822,7 @@ func gc(mode int) { setGCPhase(_GCscan) // Concurrent scan. - starttheworld() + startTheWorldWithSema() if debug.gctrace > 0 { tScan = nanotime() } @@ -858,7 +866,7 @@ func gc(mode int) { if debug.gctrace > 0 { tMarkTerm = nanotime() } - systemstack(stoptheworld) + systemstack(stopTheWorldWithSema) // The gcphase is _GCmark, it will transition to _GCmarktermination // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. @@ -952,13 +960,12 @@ func gc(mode int) { // all done mp.preemptoff = "" - semrelease(&worldsema) - if gcphase != _GCoff { throw("gc done but gcphase != _GCoff") } - systemstack(starttheworld) + systemstack(startTheWorldWithSema) + semrelease(&worldsema) releasem(mp) mp = nil @@ -1160,6 +1167,18 @@ func gcBgMarkDone() { } } +// gcMarkWorkAvailable determines if mark work is readily available. +// It is used by the scheduler to decide if this p run a mark work. +func gcMarkWorkAvailable(p *p) bool { + if !p.gcw.empty() { + return true + } + if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 { + return true // global work available + } + return false +} + // gcFlushGCWork disposes the gcWork caches of all Ps. The world must // be stopped. //go:nowritebarrier diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 9d78ddecae..62fa33895b 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -261,7 +261,7 @@ func gcphasework(gp *g) { switch gcphase { default: throw("gcphasework in bad gcphase") - case _GCoff, _GCquiesce, _GCstw, _GCsweep: + case _GCoff, _GCstw, _GCsweep: // No work. case _GCscan: // scan the stack, mark the objects, put pointers in work buffers @@ -557,9 +557,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { // Same work as in scanobject; see comments there. obj := *(*uintptr)(unsafe.Pointer(b + i)) if obj != 0 && arena_start <= obj && obj < arena_used { - if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark { - checkwbshadow((*uintptr)(unsafe.Pointer(b + i))) - } if obj, hbits, span := heapBitsForObject(obj); obj != 0 { greyobject(obj, b, i, hbits, span, gcw) } @@ -597,32 +594,25 @@ func scanobject(b uintptr, gcw *gcWork) { // Avoid needless hbits.next() on last iteration. hbits = hbits.next() } - bits := uintptr(hbits.typeBits()) - if bits == typeDead { + // During checkmarking, 1-word objects store the checkmark + // in the type bit for the one word. The only one-word objects + // are pointers, or else they'd be merged with other non-pointer + // data into larger allocations. + bits := hbits.bits() + if i >= 2*ptrSize && bits&bitMarked == 0 { break // no more pointers in this object } - - if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked - continue - } - - if bits&typePointer != typePointer { - print("gc useCheckmark=", useCheckmark, " b=", hex(b), "\n") - throw("unexpected garbage collection bits") + if bits&bitPointer == 0 { + continue // not a pointer } - // Work here is duplicated in scanblock. + // Work here is duplicated in scanblock and above. // If you make changes here, make changes there too. - obj := *(*uintptr)(unsafe.Pointer(b + i)) // At this point we have extracted the next potential pointer. - // Check if it points into heap. - if obj != 0 && arena_start <= obj && obj < arena_used { - if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark { - checkwbshadow((*uintptr)(unsafe.Pointer(b + i))) - } - + // Check if it points into heap and not back at the current object. + if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n { // Mark the object. if obj, hbits, span := heapBitsForObject(obj); obj != 0 { greyobject(obj, b, i, hbits, span, gcw) @@ -673,11 +663,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork throw("checkmark found unmarked object") } - if hbits.isCheckmarked() { + if hbits.isCheckmarked(span.elemsize) { return } - hbits.setCheckmarked() - if !hbits.isCheckmarked() { + hbits.setCheckmarked(span.elemsize) + if !hbits.isCheckmarked(span.elemsize) { throw("setCheckmarked and isCheckmarked disagree") } } else { @@ -685,12 +675,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork if hbits.isMarked() { return } - hbits.setMarked() // If this is a noscan object, fast-track it to black // instead of greying it. - if hbits.typeBits() == typeDead { + if !hbits.hasPointers(span.elemsize) { gcw.bytesMarked += uint64(span.elemsize) return } diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index 9c32ae8880..b7feb847b4 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -7,7 +7,7 @@ package runtime import "unsafe" const ( - _Debugwbufs = true // if true check wbufs consistency + _Debugwbufs = false // if true check wbufs consistency _WorkbufSize = 1 * 256 // in bytes - if small wbufs are passed to GC in a timely fashion. ) @@ -182,6 +182,13 @@ func (w *gcWork) balance() { } } +// empty returns true if w has no mark work available. +//go:nowritebarrier +func (w *gcWork) empty() bool { + wbuf := w.wbuf + return wbuf == 0 || wbuf.ptr().nobj == 0 +} + // Internally, the GC work pool is kept in arrays in work buffers. // The gcWork interface caches a work buffer until full (or empty) to // avoid contending on the global work buffer lists. diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 10878ee5cf..04fa050bc5 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -28,6 +28,15 @@ type mheap struct { spans **mspan spans_mapped uintptr + // Proportional sweep + pagesSwept uint64 // pages swept this cycle; updated atomically + sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without + + // Malloc stats. + largefree uint64 // bytes freed for large objects (>maxsmallsize) + nlargefree uint64 // number of frees for large objects (>maxsmallsize) + nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) + // range of addresses we might see in the heap bitmap uintptr bitmap_mapped uintptr @@ -36,14 +45,6 @@ type mheap struct { arena_end uintptr arena_reserved bool - // write barrier shadow heap. - // 64-bit systems only, enabled by GODEBUG=wbshadow=1. - // See also shadow_data, data_start, data_end fields on moduledata in - // symtab.go. - shadow_enabled bool // shadow should be updated and checked - shadow_reserved bool // shadow memory is reserved - shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr - // central free lists for small size classes. // the padding makes sure that the MCentrals are // spaced CacheLineSize bytes apart, so that each MCentral.lock @@ -58,15 +59,6 @@ type mheap struct { specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for sepcial record allocators. - - // Proportional sweep - pagesSwept uint64 // pages swept this cycle; updated atomically - sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without - - // Malloc stats. - largefree uint64 // bytes freed for large objects (>maxsmallsize) - nlargefree uint64 // number of frees for large objects (>maxsmallsize) - nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) } var mheap_ mheap @@ -176,7 +168,9 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { // inheap reports whether b is a pointer into a (potentially dead) heap object. // It returns false for pointers into stack spans. +// Non-preemptible because it is used by write barriers. //go:nowritebarrier +//go:nosplit func inheap(b uintptr) bool { if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { return false diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 4544344780..a618bd5e81 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -521,9 +521,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { n = NumGoroutine() if n <= len(p) { gp := getg() - semacquire(&worldsema, false) - gp.m.preemptoff = "profile" - systemstack(stoptheworld) + stopTheWorld("profile") n = NumGoroutine() if n <= len(p) { @@ -544,9 +542,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { } } - gp.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() } return n, ok @@ -565,10 +561,7 @@ func saveg(pc, sp uintptr, gp *g, r *StackRecord) { // into buf after the trace for the current goroutine. func Stack(buf []byte, all bool) int { if all { - semacquire(&worldsema, false) - gp := getg() - gp.m.preemptoff = "stack trace" - systemstack(stoptheworld) + stopTheWorld("stack trace") } n := 0 @@ -590,10 +583,7 @@ func Stack(buf []byte, all bool) int { } if all { - gp := getg() - gp.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() } return n } diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index c8e5249156..3eff7f6b3e 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -153,24 +153,13 @@ func init() { // ReadMemStats populates m with memory allocator statistics. func ReadMemStats(m *MemStats) { - // Have to acquire worldsema to stop the world, - // because stoptheworld can only be used by - // one goroutine at a time, and there might be - // a pending garbage collection already calling it. - semacquire(&worldsema, false) - gp := getg() - gp.m.preemptoff = "read mem stats" - systemstack(stoptheworld) + stopTheWorld("read mem stats") systemstack(func() { readmemstats_m(m) }) - gp.m.preemptoff = "" - gp.m.locks++ - semrelease(&worldsema) - systemstack(starttheworld) - gp.m.locks-- + startTheWorld() } func readmemstats_m(stats *MemStats) { diff --git a/src/runtime/os1_darwin.go b/src/runtime/os1_darwin.go index 10cf460f7f..1b74e3e653 100644 --- a/src/runtime/os1_darwin.go +++ b/src/runtime/os1_darwin.go @@ -8,7 +8,6 @@ import "unsafe" //extern SigTabTT runtime·sigtab[]; -var sigset_none = uint32(0) var sigset_all = ^uint32(0) func unimplemented(name string) { @@ -126,17 +125,36 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*uint32)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + sigprocmask(_SIG_SETMASK, nil, smask) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { // Initialize signal handling. _g_ := getg() signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(_SIG_SETMASK, &sigset_none, nil) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask &^= 1 << (uint32(i) - 1) + } + } + sigprocmask(_SIG_SETMASK, &nmask, nil) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*uint32)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(_SIG_SETMASK, smask, nil) signalstack(nil, 0) } @@ -447,6 +465,6 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(_SIG_SETMASK, &sigset_none, nil) +func updatesigmask(m sigmask) { + sigprocmask(_SIG_SETMASK, &m[0], nil) } diff --git a/src/runtime/os1_dragonfly.go b/src/runtime/os1_dragonfly.go index a590aea39b..eb42b54e2b 100644 --- a/src/runtime/os1_dragonfly.go +++ b/src/runtime/os1_dragonfly.go @@ -12,7 +12,6 @@ const ( _HW_NCPU = 3 ) -var sigset_none = sigset{} var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} func getncpu() int32 { @@ -120,6 +119,14 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*sigset)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + sigprocmask(nil, smask) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -130,11 +137,22 @@ func minit() { // Initialize signal handling signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(&sigset_none, nil) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + sigprocmask(&nmask, nil) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(smask, nil) signalstack(nil, 0) } @@ -215,6 +233,8 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(&sigset_none, nil) +func updatesigmask(m sigmask) { + var mask sigset + copy(mask.__bits[:], m[:]) + sigprocmask(&mask, nil) } diff --git a/src/runtime/os1_freebsd.go b/src/runtime/os1_freebsd.go index 8719a49286..f7f34bd386 100644 --- a/src/runtime/os1_freebsd.go +++ b/src/runtime/os1_freebsd.go @@ -12,7 +12,6 @@ const ( _HW_NCPU = 3 ) -var sigset_none = sigset{} var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} func getncpu() int32 { @@ -119,6 +118,14 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*sigset)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + sigprocmask(nil, smask) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -132,11 +139,22 @@ func minit() { // Initialize signal handling. signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(&sigset_none, nil) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + sigprocmask(&nmask, nil) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(smask, nil) signalstack(nil, 0) } @@ -217,6 +235,8 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(&sigset_none, nil) +func updatesigmask(m [(_NSIG + 31) / 32]uint32) { + var mask sigset + copy(mask.__bits[:], m[:]) + sigprocmask(&mask, nil) } diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go index e4b18c79b3..02f98d7c5f 100644 --- a/src/runtime/os1_linux.go +++ b/src/runtime/os1_linux.go @@ -6,7 +6,6 @@ package runtime import "unsafe" -var sigset_none sigset var sigset_all sigset = sigset{^uint32(0), ^uint32(0)} // Linux futex. @@ -190,17 +189,36 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*sigset)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + rtsigprocmask(_SIG_SETMASK, nil, smask, int32(unsafe.Sizeof(*smask))) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { // Initialize signal handling. _g_ := getg() signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none))) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + rtsigprocmask(_SIG_SETMASK, &nmask, nil, int32(unsafe.Sizeof(nmask))) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + rtsigprocmask(_SIG_SETMASK, smask, nil, int32(unsafe.Sizeof(*smask))) signalstack(nil, 0) } @@ -304,6 +322,8 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none))) +func updatesigmask(m sigmask) { + var mask sigset + copy(mask[:], m[:]) + rtsigprocmask(_SIG_SETMASK, &mask, nil, int32(unsafe.Sizeof(mask))) } diff --git a/src/runtime/os1_nacl.go b/src/runtime/os1_nacl.go index dbb5dec2fd..66e60f8b12 100644 --- a/src/runtime/os1_nacl.go +++ b/src/runtime/os1_nacl.go @@ -15,6 +15,9 @@ func mpreinit(mp *m) { func sigtramp() +func msigsave(mp *m) { +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { diff --git a/src/runtime/os1_netbsd.go b/src/runtime/os1_netbsd.go index 8df74b5593..3fb05989e7 100644 --- a/src/runtime/os1_netbsd.go +++ b/src/runtime/os1_netbsd.go @@ -17,7 +17,6 @@ const ( _CLOCK_MONOTONIC = 3 ) -var sigset_none = sigset{} var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} // From NetBSD's <sys/sysctl.h> @@ -139,6 +138,14 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*sigset)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + sigprocmask(_SIG_SETMASK, nil, smask) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -147,11 +154,23 @@ func minit() { // Initialize signal handling signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(_SIG_SETMASK, &sigset_none, nil) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + sigprocmask(_SIG_SETMASK, &nmask, nil) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(_SIG_SETMASK, smask, nil) + signalstack(nil, 0) } @@ -206,6 +225,8 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(_SIG_SETMASK, &sigset_none, nil) +func updatesigmask(m sigmask) { + var mask sigset + copy(mask.__bits[:], m[:]) + sigprocmask(_SIG_SETMASK, &mask, nil) } diff --git a/src/runtime/os1_openbsd.go b/src/runtime/os1_openbsd.go index 95729a56df..5ccf642468 100644 --- a/src/runtime/os1_openbsd.go +++ b/src/runtime/os1_openbsd.go @@ -148,6 +148,14 @@ func mpreinit(mp *m) { mp.gsignal.m = mp } +func msigsave(mp *m) { + smask := (*uint32)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + *smask = sigprocmask(_SIG_BLOCK, 0) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -158,11 +166,22 @@ func minit() { // Initialize signal handling signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(_SIG_SETMASK, sigset_none) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask &^= 1 << (uint32(i) - 1) + } + } + sigprocmask(_SIG_SETMASK, nmask) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(_SIG_SETMASK, smask) signalstack(nil, 0) } @@ -217,6 +236,6 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(_SIG_SETMASK, sigset_none) +func updatesigmask(m sigmask) { + sigprocmask(_SIG_SETMASK, m[0]) } diff --git a/src/runtime/os1_plan9.go b/src/runtime/os1_plan9.go index c026218241..bda7057f44 100644 --- a/src/runtime/os1_plan9.go +++ b/src/runtime/os1_plan9.go @@ -18,6 +18,9 @@ func mpreinit(mp *m) { mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, _FlagNoScan)) } +func msigsave(mp *m) { +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -177,7 +180,7 @@ func exit(e int) { } else { // build error string var tmp [32]byte - status = []byte(gostringnocopy(&itoa(tmp[:len(tmp)-1], uint64(e))[0])) + status = append(itoa(tmp[:len(tmp)-1], uint64(e)), 0) } goexitsall(&status[0]) exits(&status[0]) diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go index 5719b320f5..bc472d0de9 100644 --- a/src/runtime/os1_windows.go +++ b/src/runtime/os1_windows.go @@ -292,6 +292,9 @@ func newosproc(mp *m, stk unsafe.Pointer) { func mpreinit(mp *m) { } +func msigsave(mp *m) { +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go index 69ac5b4970..e4fe92de41 100644 --- a/src/runtime/os3_solaris.go +++ b/src/runtime/os3_solaris.go @@ -114,7 +114,6 @@ var ( libc_write libcFunc ) -var sigset_none = sigset{} var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}} func getncpu() int32 { @@ -190,6 +189,14 @@ func mpreinit(mp *m) { func miniterrno() +func msigsave(mp *m) { + smask := (*sigset)(unsafe.Pointer(&mp.sigmask)) + if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) { + throw("insufficient storage for signal mask") + } + sigprocmask(_SIG_SETMASK, nil, smask) +} + // Called to initialize a new m (including the bootstrap m). // Called on the new thread, can not allocate memory. func minit() { @@ -197,11 +204,23 @@ func minit() { asmcgocall(unsafe.Pointer(funcPC(miniterrno)), unsafe.Pointer(&libc____errno)) // Initialize signal handling signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024) - sigprocmask(_SIG_SETMASK, &sigset_none, nil) + + // restore signal mask from m.sigmask and unblock essential signals + nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + nmask.__sigbits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + sigprocmask(_SIG_SETMASK, &nmask, nil) } // Called from dropm to undo the effect of an minit. func unminit() { + _g_ := getg() + smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask)) + sigprocmask(_SIG_SETMASK, smask, nil) + signalstack(nil, 0) } @@ -278,8 +297,10 @@ func signalstack(p *byte, n int32) { sigaltstack(&st, nil) } -func unblocksignals() { - sigprocmask(_SIG_SETMASK, &sigset_none, nil) +func updatesigmask(m sigmask) { + var mask sigset + copy(mask.__sigbits[:], m[:]) + sigprocmask(_SIG_SETMASK, &mask, nil) } //go:nosplit diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 0e4086c7ef..47563f450e 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -188,16 +188,6 @@ func newdefer(siz int32) *_defer { d = (*_defer)(mallocgc(total, deferType, 0)) } d.siz = siz - if mheap_.shadow_enabled { - // This memory will be written directly, with no write barrier, - // and then scanned like stacks during collection. - // Unlike real stacks, it is from heap spans, so mark the - // shadow as explicitly unusable. - p := deferArgs(d) - for i := uintptr(0); i+ptrSize <= uintptr(siz); i += ptrSize { - writebarrierptr_noshadow((*uintptr)(add(p, i))) - } - } gp := mp.curg d.link = gp._defer gp._defer = d @@ -214,12 +204,6 @@ func freedefer(d *_defer) { if d.fn != nil { freedeferfn() } - if mheap_.shadow_enabled { - // Undo the marking in newdefer. - systemstack(func() { - clearshadow(uintptr(deferArgs(d)), uintptr(d.siz)) - }) - } sc := deferclass(uintptr(d.siz)) if sc < uintptr(len(p{}.deferpool)) { mp := acquirem() diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index b3d0ae9b64..4290edb7be 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -442,35 +442,33 @@ func writeHeap(w io.Writer, debug int) error { // Print memstats information too. // Pprof will ignore, but useful for people - if debug > 0 { - s := new(runtime.MemStats) - runtime.ReadMemStats(s) - fmt.Fprintf(w, "\n# runtime.MemStats\n") - fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) - fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) - fmt.Fprintf(w, "# Sys = %d\n", s.Sys) - fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) - fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) - fmt.Fprintf(w, "# Frees = %d\n", s.Frees) + s := new(runtime.MemStats) + runtime.ReadMemStats(s) + fmt.Fprintf(w, "\n# runtime.MemStats\n") + fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) + fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) + fmt.Fprintf(w, "# Sys = %d\n", s.Sys) + fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) + fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) + fmt.Fprintf(w, "# Frees = %d\n", s.Frees) - fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) - fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) - fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) - fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) - fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) - fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) + fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) + fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) + fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) + fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) + fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) + fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) - fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) - fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) - fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) - fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) + fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) + fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) + fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) + fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) - fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) - fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) - fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) - fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC) - fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) - } + fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) + fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) + fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) + fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC) + fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) if tw != nil { tw.Flush() diff --git a/src/runtime/proc.go b/src/runtime/proc.go index f725fc890b..805b96e627 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -203,7 +203,7 @@ func acquireSudog() *sudog { // acquireSudog, acquireSudog calls new(sudog), // new calls malloc, malloc can call the garbage collector, // and the garbage collector calls the semaphore implementation - // in stoptheworld. + // in stopTheWorld. // Break the cycle by doing acquirem/releasem around new(sudog). // The acquirem/releasem increments m.locks during new(sudog), // which keeps the garbage collector from being invoked. diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index 00535da77d..c070f7d773 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -59,7 +59,6 @@ func schedinit() { goargs() goenvs() parsedebugvars() - wbshadowinit() gcinit() sched.lastpoll = uint64(nanotime()) @@ -212,7 +211,7 @@ func helpgc(nproc int32) { // sched.stopwait to in order to request that all Gs permanently stop. const freezeStopWait = 0x7fffffff -// Similar to stoptheworld but best-effort and can be called several times. +// Similar to stopTheWorld but best-effort and can be called several times. // There is no reverse operation, used during crashing. // This function must not lock any mutexes. func freezetheworld() { @@ -466,94 +465,68 @@ func stopscanstart(gp *g) { } } -// Runs on g0 and does the actual work after putting the g back on the run queue. -func mquiesce(gpmaster *g) { - // enqueue the calling goroutine. - restartg(gpmaster) - - activeglen := len(allgs) - for i := 0; i < activeglen; i++ { - gp := allgs[i] - if readgstatus(gp) == _Gdead { - gp.gcworkdone = true // noop scan. - } else { - gp.gcworkdone = false - } - stopscanstart(gp) - } - - // Check that the G's gcwork (such as scanning) has been done. If not do it now. - // You can end up doing work here if the page trap on a Grunning Goroutine has - // not been sprung or in some race situations. For example a runnable goes dead - // and is started up again with a gp->gcworkdone set to false. - for i := 0; i < activeglen; i++ { - gp := allgs[i] - for !gp.gcworkdone { - status := readgstatus(gp) - if status == _Gdead { - //do nothing, scan not needed. - gp.gcworkdone = true // scan is a noop - break - } - if status == _Grunning && gp.stackguard0 == uintptr(stackPreempt) && notetsleep(&sched.stopnote, 100*1000) { // nanosecond arg - noteclear(&sched.stopnote) - } else { - stopscanstart(gp) - } - } - } - - for i := 0; i < activeglen; i++ { - gp := allgs[i] - status := readgstatus(gp) - if isscanstatus(status) { - print("mstopandscang:bottom: post scan bad status gp=", gp, " has status ", hex(status), "\n") - dumpgstatus(gp) - } - if !gp.gcworkdone && status != _Gdead { - print("mstopandscang:bottom: post scan gp=", gp, "->gcworkdone still false\n") - dumpgstatus(gp) - } - } - - schedule() // Never returns. +// stopTheWorld stops all P's from executing goroutines, interrupting +// all goroutines at GC safe points and records reason as the reason +// for the stop. On return, only the current goroutine's P is running. +// stopTheWorld must not be called from a system stack and the caller +// must not hold worldsema. The caller must call startTheWorld when +// other P's should resume execution. +// +// stopTheWorld is safe for multiple goroutines to call at the +// same time. Each will execute its own stop, and the stops will +// be serialized. +// +// This is also used by routines that do stack dumps. If the system is +// in panic or being exited, this may not reliably stop all +// goroutines. +func stopTheWorld(reason string) { + semacquire(&worldsema, false) + getg().m.preemptoff = reason + systemstack(stopTheWorldWithSema) } -// quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point. -// If the global gcphase is GCmark quiesce will ensure that all of the goroutine's stacks -// have been scanned before it returns. -func quiesce(mastergp *g) { - castogscanstatus(mastergp, _Grunning, _Gscanenqueue) - // Now move this to the g0 (aka m) stack. - // g0 will potentially scan this thread and put mastergp on the runqueue - mcall(mquiesce) +// startTheWorld undoes the effects of stopTheWorld. +func startTheWorld() { + systemstack(startTheWorldWithSema) + // worldsema must be held over startTheWorldWithSema to ensure + // gomaxprocs cannot change while worldsema is held. + semrelease(&worldsema) + getg().m.preemptoff = "" } -// Holding worldsema grants an M the right to try to stop the world. -// The procedure is: +// Holding worldsema grants an M the right to try to stop the world +// and prevents gomaxprocs from changing concurrently. +var worldsema uint32 = 1 + +// stopTheWorldWithSema is the core implementation of stopTheWorld. +// The caller is responsible for acquiring worldsema and disabling +// preemption first and then should stopTheWorldWithSema on the system +// stack: // -// semacquire(&worldsema); -// m.preemptoff = "reason"; -// stoptheworld(); +// semacquire(&worldsema, false) +// m.preemptoff = "reason" +// systemstack(stopTheWorldWithSema) // -// ... do stuff ... +// When finished, the caller must either call startTheWorld or undo +// these three operations separately: // -// m.preemptoff = ""; -// semrelease(&worldsema); -// starttheworld(); +// m.preemptoff = "" +// systemstack(startTheWorldWithSema) +// semrelease(&worldsema) // -var worldsema uint32 = 1 - -// This is used by the GC as well as the routines that do stack dumps. In the case -// of GC all the routines can be reliably stopped. This is not always the case -// when the system is in panic or being exited. -func stoptheworld() { +// It is allowed to acquire worldsema once and then execute multiple +// startTheWorldWithSema/stopTheWorldWithSema pairs. +// Other P's are able to execute between successive calls to +// startTheWorldWithSema and stopTheWorldWithSema. +// Holding worldsema causes any other goroutines invoking +// stopTheWorld to block. +func stopTheWorldWithSema() { _g_ := getg() // If we hold a lock, then we won't be able to stop another M // that is blocked trying to acquire the lock. if _g_.m.locks > 0 { - throw("stoptheworld: holding locks") + throw("stopTheWorld: holding locks") } lock(&sched.lock) @@ -600,12 +573,12 @@ func stoptheworld() { } } if sched.stopwait != 0 { - throw("stoptheworld: not stopped") + throw("stopTheWorld: not stopped") } for i := 0; i < int(gomaxprocs); i++ { p := allp[i] if p.status != _Pgcstop { - throw("stoptheworld: not stopped") + throw("stopTheWorld: not stopped") } } } @@ -615,7 +588,7 @@ func mhelpgc() { _g_.m.helpgc = -1 } -func starttheworld() { +func startTheWorldWithSema() { _g_ := getg() _g_.m.locks++ // disable preemption because it can be holding p in a local var @@ -644,7 +617,7 @@ func starttheworld() { mp := p.m.ptr() p.m = 0 if mp.nextp != 0 { - throw("starttheworld: inconsistent mp->nextp") + throw("startTheWorld: inconsistent mp->nextp") } mp.nextp.set(p) notewakeup(&mp.park) @@ -754,10 +727,10 @@ func forEachP(fn func(*p)) { _p_ := getg().m.p.ptr() lock(&sched.lock) - if sched.stopwait != 0 { - throw("forEachP: sched.stopwait != 0") + if sched.safePointWait != 0 { + throw("forEachP: sched.safePointWait != 0") } - sched.stopwait = gomaxprocs - 1 + sched.safePointWait = gomaxprocs - 1 sched.safePointFn = fn // Ask all Ps to run the safe point function. @@ -777,11 +750,11 @@ func forEachP(fn func(*p)) { for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { if cas(&p.runSafePointFn, 1, 0) { fn(p) - sched.stopwait-- + sched.safePointWait-- } } - wait := sched.stopwait > 0 + wait := sched.safePointWait > 0 unlock(&sched.lock) // Run fn for the current P. @@ -807,15 +780,15 @@ func forEachP(fn func(*p)) { for { // Wait for 100us, then try to re-preempt in // case of any races. - if notetsleep(&sched.stopnote, 100*1000) { - noteclear(&sched.stopnote) + if notetsleep(&sched.safePointNote, 100*1000) { + noteclear(&sched.safePointNote) break } preemptall() } } - if sched.stopwait != 0 { - throw("forEachP: not stopped") + if sched.safePointWait != 0 { + throw("forEachP: not done") } for i := 0; i < int(gomaxprocs); i++ { p := allp[i] @@ -851,9 +824,9 @@ func runSafePointFn() { } sched.safePointFn(p) lock(&sched.lock) - sched.stopwait-- - if sched.stopwait == 0 { - notewakeup(&sched.stopnote) + sched.safePointWait-- + if sched.safePointWait == 0 { + notewakeup(&sched.safePointNote) } unlock(&sched.lock) } @@ -971,6 +944,7 @@ func needm(x byte) { _g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024 _g_.stackguard0 = _g_.stack.lo + _StackGuard + msigsave(mp) // Initialize this thread to use the m. asminit() minit() @@ -1098,6 +1072,7 @@ func unlockextra(mp *m) { func newm(fn func(), _p_ *p) { mp := allocm(_p_, fn) mp.nextp.set(_p_) + msigsave(mp) if iscgo { var ts cgothreadstart if _cgo_thread_start == nil { @@ -1226,9 +1201,9 @@ func handoffp(_p_ *p) { } if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) { sched.safePointFn(_p_) - sched.stopwait-- - if sched.stopwait == 0 { - notewakeup(&sched.stopnote) + sched.safePointWait-- + if sched.safePointWait == 0 { + notewakeup(&sched.safePointNote) } } if sched.runqsize != 0 { @@ -1305,7 +1280,7 @@ func startlockedm(gp *g) { stopm() } -// Stops the current m for stoptheworld. +// Stops the current m for stopTheWorld. // Returns when the world is restarted. func gcstopm() { _g_ := getg() @@ -1421,7 +1396,7 @@ top: xadd(&sched.nmspinning, 1) } // random steal from other P's - for i := 0; i < int(2*gomaxprocs); i++ { + for i := 0; i < int(4*gomaxprocs); i++ { if sched.gcwaiting != 0 { goto top } @@ -1430,18 +1405,20 @@ top: if _p_ == _g_.m.p.ptr() { gp, _ = runqget(_p_) } else { - gp = runqsteal(_g_.m.p.ptr(), _p_) + stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g + gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG) } if gp != nil { return gp, false } } + stop: - // We have nothing to do. If we're in the GC mark phaseand can + // We have nothing to do. If we're in the GC mark phase and can // safely scan and blacken objects, run idle-time marking // rather than give up the P. - if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil { + if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil && gcMarkWorkAvailable(_p_) { _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode gp := _p_.gcBgMarkWorker casgstatus(gp, _Gwaiting, _Grunnable) @@ -2484,11 +2461,9 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { mp.mallocing++ // Define that a "user g" is a user-created goroutine, and a "system g" - // is one that is m->g0 or m->gsignal. We've only made sure that we - // can unwind user g's, so exclude the system g's. + // is one that is m->g0 or m->gsignal. // - // It is not quite as easy as testing gp == m->curg (the current user g) - // because we might be interrupted for profiling halfway through a + // We might be interrupted for profiling halfway through a // goroutine switch. The switch involves updating three (or four) values: // g, PC, SP, and (on arm) LR. The PC must be the last to be updated, // because once it gets updated the new g is running. @@ -2497,8 +2472,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { // so the update only affects g, SP, and PC. Since PC must be last, there // the possible partial transitions in ordinary execution are (1) g alone is updated, // (2) both g and SP are updated, and (3) SP alone is updated. - // If g is updated, we'll see a system g and not look closer. - // If SP alone is updated, we can detect the partial transition by checking + // If SP or g alone is updated, we can detect the partial transition by checking // whether the SP is within g's stack bounds. (We could also require that SP // be changed only after g, but the stack bounds check is needed by other // cases, so there is no need to impose an additional requirement.) @@ -2527,15 +2501,11 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { // disabled, so a profiling signal cannot arrive then anyway. // // Third, the common case: it may be that the switch updates g, SP, and PC - // separately, as in gogo. - // - // Because gogo is the only instance, we check whether the PC lies - // within that function, and if so, not ask for a traceback. This approach - // requires knowing the size of the gogo function, which we - // record in arch_*.h and check in runtime_test.go. + // separately. If the PC is within any of the functions that does this, + // we don't ask for a traceback. C.F. the function setsSP for more about this. // // There is another apparently viable approach, recorded here in case - // the "PC within gogo" check turns out not to be usable. + // the "PC within setsSP function" check turns out not to be usable. // It would be possible to delay the update of either g or SP until immediately // before the PC update instruction. Then, because of the stack bounds check, // the only problematic interrupt point is just before that PC update instruction, @@ -2556,28 +2526,23 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { // transition. We simply require that g and SP match and that the PC is not // in gogo. traceback := true - gogo := funcPC(gogo) - if gp == nil || gp != mp.curg || - sp < gp.stack.lo || gp.stack.hi < sp || - (gogo <= pc && pc < gogo+_RuntimeGogoBytes) { + if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) { traceback = false } - var stk [maxCPUProfStack]uintptr n := 0 - if traceback { - n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap) + if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { + // Cgo, we can't unwind and symbolize arbitrary C code, + // so instead collect Go stack that leads to the cgo call. + // This is especially important on windows, since all syscalls are cgo calls. + n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0) + } else if traceback { + n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack) } if !traceback || n <= 0 { // Normal traceback is impossible or has failed. // See if it falls into several common cases. n = 0 - if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { - // Cgo, we can't unwind and symbolize arbitrary C code, - // so instead collect Go stack that leads to the cgo call. - // This is especially important on windows, since all syscalls are cgo calls. - n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0) - } if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { // Libcall, i.e. runtime syscall on windows. // Collect Go stack that leads to the call. @@ -2612,6 +2577,30 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { mp.mallocing-- } +// Reports whether a function will set the SP +// to an absolute value. Important that +// we don't traceback when these are at the bottom +// of the stack since we can't be sure that we will +// find the caller. +// +// If the function is not on the bottom of the stack +// we assume that it will have set it up so that traceback will be consistent, +// either by being a traceback terminating function +// or putting one on the stack at the right offset. +func setsSP(pc uintptr) bool { + f := findfunc(pc) + if f == nil { + // couldn't find the function for this PC, + // so assume the worst and stop traceback + return true + } + switch f.entry { + case gogoPC, systemstackPC, mcallPC, morestackPC: + return true + } + return false +} + // Arrange to call fn with a traceback hz times a second. func setcpuprofilerate_m(hz int32) { // Force sane arguments. @@ -3447,23 +3436,34 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) { } } -// Grabs a batch of goroutines from local runnable queue. -// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines. +// Grabs a batch of goroutines from _p_'s runnable queue into batch. +// Batch is a ring buffer starting at batchHead. +// Returns number of grabbed goroutines. // Can be executed by any P. -func runqgrab(_p_ *p, batch []*g) uint32 { +func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 { for { h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer n := t - h n = n - n/2 if n == 0 { - // Try to steal from _p_.runnext. - if next := _p_.runnext; next != 0 { - if !_p_.runnext.cas(next, 0) { - continue + if stealRunNextG { + // Try to steal from _p_.runnext. + if next := _p_.runnext; next != 0 { + // Sleep to ensure that _p_ isn't about to run the g we + // are about to steal. + // The important use case here is when the g running on _p_ + // ready()s another g and then almost immediately blocks. + // Instead of stealing runnext in this window, back off + // to give _p_ a chance to schedule runnext. This will avoid + // thrashing gs between different Ps. + usleep(100) + if !_p_.runnext.cas(next, 0) { + continue + } + batch[batchHead%uint32(len(batch))] = next.ptr() + return 1 } - batch[0] = next.ptr() - return 1 } return 0 } @@ -3471,7 +3471,8 @@ func runqgrab(_p_ *p, batch []*g) uint32 { continue } for i := uint32(0); i < n; i++ { - batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))] + g := _p_.runq[(h+i)%uint32(len(_p_.runq))] + batch[(batchHead+i)%uint32(len(batch))] = g } if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume return n @@ -3482,26 +3483,21 @@ func runqgrab(_p_ *p, batch []*g) uint32 { // Steal half of elements from local runnable queue of p2 // and put onto local runnable queue of p. // Returns one of the stolen elements (or nil if failed). -func runqsteal(_p_, p2 *p) *g { - var batch [len(_p_.runq) / 2]*g - - n := runqgrab(p2, batch[:]) +func runqsteal(_p_, p2 *p, stealRunNextG bool) *g { + t := _p_.runqtail + n := runqgrab(p2, &_p_.runq, t, stealRunNextG) if n == 0 { return nil } n-- - gp := batch[n] + gp := _p_.runq[(t+n)%uint32(len(_p_.runq))] if n == 0 { return gp } h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers - t := _p_.runqtail if t-h+n >= uint32(len(_p_.runq)) { throw("runqsteal: runq overflow") } - for i := uint32(0); i < n; i++ { - _p_.runq[(t+i)%uint32(len(_p_.runq))] = batch[i] - } atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption return gp } @@ -3528,20 +3524,16 @@ func testSchedLocalQueue() { } } -var pSink *p - func testSchedLocalQueueSteal() { p1 := new(p) p2 := new(p) - pSink = p1 // Force to heap, too large to allocate on system stack ("G0 stack") - pSink = p2 // Force to heap, too large to allocate on system stack ("G0 stack") gs := make([]g, len(p1.runq)) for i := 0; i < len(p1.runq); i++ { for j := 0; j < i; j++ { gs[j].sig = 0 runqput(p1, &gs[j], false) } - gp := runqsteal(p2, p1) + gp := runqsteal(p2, p1, true) s := 0 if gp != nil { s++ diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index 4c5712d32f..4471ee5afb 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -7,6 +7,7 @@ package runtime_test import ( "math" "runtime" + "runtime/debug" "sync" "sync/atomic" "syscall" @@ -104,8 +105,8 @@ func TestGoroutineParallelism(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P)) // If runtime triggers a forced GC during this test then it will deadlock, // since the goroutines can't be stopped/preempted. - // So give this test as much time as possible. - runtime.GC() + // Disable GC for this test (see issue #10958). + defer debug.SetGCPercent(debug.SetGCPercent(-1)) for try := 0; try < N; try++ { done := make(chan bool) x := uint32(0) diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go index fe7d38a39c..f4014b2e05 100644 --- a/src/runtime/runtime-gdb_test.go +++ b/src/runtime/runtime-gdb_test.go @@ -59,7 +59,7 @@ func TestGdbPython(t *testing.T) { cmd := exec.Command("go", "build", "-o", "a.exe") cmd.Dir = dir - out, err := cmd.CombinedOutput() + out, err := testEnv(cmd).CombinedOutput() if err != nil { t.Fatalf("building source %v\n%s", err, out) } @@ -85,7 +85,7 @@ func TestGdbPython(t *testing.T) { // stack frames on RISC architectures. canBackTrace := false switch runtime.GOARCH { - case "amd64", "386": + case "amd64", "386", "ppc64", "ppc64le", "arm", "arm64": canBackTrace = true args = append(args, "-ex", "echo BEGIN goroutine 2 bt\n", diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ac539b9a9d..3ee5d5d29d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -266,6 +266,7 @@ type m struct { // Fields not known to debuggers. procid uint64 // for debuggers, but offset not hard-coded gsignal *g // signal-handling g + sigmask [4]uintptr // storage for saved signal mask tls [4]uintptr // thread-local storage (for x86 extern register) mstartfn func() curg *g // current running goroutine @@ -441,7 +442,9 @@ type schedt struct { // safepointFn should be called on each P at the next GC // safepoint if p.runSafePointFn is set. - safePointFn func(*p) + safePointFn func(*p) + safePointWait int32 + safePointNote note profilehz int32 // cpu profiling rate @@ -467,15 +470,16 @@ type sigtabtt struct { } const ( - _SigNotify = 1 << 0 // let signal.Notify have signal, even if from kernel - _SigKill = 1 << 1 // if signal.Notify doesn't take it, exit quietly - _SigThrow = 1 << 2 // if signal.Notify doesn't take it, exit loudly - _SigPanic = 1 << 3 // if the signal is from the kernel, panic - _SigDefault = 1 << 4 // if the signal isn't explicitly requested, don't monitor it - _SigHandling = 1 << 5 // our signal handler is registered - _SigIgnored = 1 << 6 // the signal was ignored before we registered for it - _SigGoExit = 1 << 7 // cause all runtime procs to exit (only used on Plan 9). - _SigSetStack = 1 << 8 // add SA_ONSTACK to libc handler + _SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel + _SigKill // if signal.Notify doesn't take it, exit quietly + _SigThrow // if signal.Notify doesn't take it, exit loudly + _SigPanic // if the signal is from the kernel, panic + _SigDefault // if the signal isn't explicitly requested, don't monitor it + _SigHandling // our signal handler is registered + _SigIgnored // the signal was ignored before we registered for it + _SigGoExit // cause all runtime procs to exit (only used on Plan 9). + _SigSetStack // add SA_ONSTACK to libc handler + _SigUnblock // unblocked in minit ) // Layout of in-memory per-function information prepared by linker @@ -594,8 +598,9 @@ type stkframe struct { } const ( - _TraceRuntimeFrames = 1 << 0 // include frames for internal runtime functions. - _TraceTrap = 1 << 1 // the initial PC, SP are from a trap, not a return PC from a call + _TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions. + _TraceTrap // the initial PC, SP are from a trap, not a return PC from a call + _TraceJumpStack // if traceback is on a systemstack, resume trace at g that called into it ) const ( diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go index d4cccbf084..f65562ab91 100644 --- a/src/runtime/runtime_test.go +++ b/src/runtime/runtime_test.go @@ -6,13 +6,8 @@ package runtime_test import ( "io" - "io/ioutil" - "os" - "os/exec" . "runtime" "runtime/debug" - "strconv" - "strings" "testing" "unsafe" ) @@ -88,53 +83,6 @@ func BenchmarkDeferMany(b *testing.B) { } } -// The profiling signal handler needs to know whether it is executing runtime.gogo. -// The constant RuntimeGogoBytes in arch_*.h gives the size of the function; -// we don't have a way to obtain it from the linker (perhaps someday). -// Test that the constant matches the size determined by 'go tool nm -S'. -// The value reported will include the padding between runtime.gogo and the -// next function in memory. That's fine. -func TestRuntimeGogoBytes(t *testing.T) { - switch GOOS { - case "android", "nacl": - t.Skipf("skipping on %s", GOOS) - case "darwin": - switch GOARCH { - case "arm", "arm64": - t.Skipf("skipping on %s/%s, no fork", GOOS, GOARCH) - } - } - - dir, err := ioutil.TempDir("", "go-build") - if err != nil { - t.Fatalf("failed to create temp directory: %v", err) - } - defer os.RemoveAll(dir) - - out, err := exec.Command("go", "build", "-o", dir+"/hello", "../../test/helloworld.go").CombinedOutput() - if err != nil { - t.Fatalf("building hello world: %v\n%s", err, out) - } - - out, err = exec.Command("go", "tool", "nm", "-size", dir+"/hello").CombinedOutput() - if err != nil { - t.Fatalf("go tool nm: %v\n%s", err, out) - } - - for _, line := range strings.Split(string(out), "\n") { - f := strings.Fields(line) - if len(f) == 4 && f[3] == "runtime.gogo" { - size, _ := strconv.Atoi(f[1]) - if GogoBytes() != int32(size) { - t.Fatalf("RuntimeGogoBytes = %d, should be %d", GogoBytes(), size) - } - return - } - } - - t.Fatalf("go tool nm did not report size for runtime.gogo") -} - // golang.org/issue/7063 func TestStopCPUProfilingWithProfilerOff(t *testing.T) { SetCPUProfileRate(0) diff --git a/src/runtime/signal1_unix.go b/src/runtime/signal1_unix.go index 7577d43a64..d3e9dac097 100644 --- a/src/runtime/signal1_unix.go +++ b/src/runtime/signal1_unix.go @@ -19,6 +19,19 @@ const ( // Signal forwarding is currently available only on Linux. var fwdSig [_NSIG]uintptr +// sigmask represents a general signal mask compatible with the GOOS +// specific sigset types: the signal numbered x is represented by bit x-1 +// to match the representation expected by sigprocmask. +type sigmask [(_NSIG + 31) / 32]uint32 + +// channels for synchronizing signal mask updates with the signal mask +// thread +var ( + disableSigChan chan uint32 + enableSigChan chan uint32 + maskUpdatedChan chan struct{} +) + func initsig() { // _NSIG is the number of signals on this operating system. // sigtable should describe what to do for all the possible signals. @@ -61,12 +74,17 @@ func sigenable(sig uint32) { } t := &sigtable[sig] - if t.flags&_SigNotify != 0 && t.flags&_SigHandling == 0 { - t.flags |= _SigHandling - if getsig(int32(sig)) == _SIG_IGN { - t.flags |= _SigIgnored + if t.flags&_SigNotify != 0 { + ensureSigM() + enableSigChan <- sig + <-maskUpdatedChan + if t.flags&_SigHandling == 0 { + t.flags |= _SigHandling + if getsig(int32(sig)) == _SIG_IGN { + t.flags |= _SigIgnored + } + setsig(int32(sig), funcPC(sighandler), true) } - setsig(int32(sig), funcPC(sighandler), true) } } @@ -76,12 +94,17 @@ func sigdisable(sig uint32) { } t := &sigtable[sig] - if t.flags&_SigNotify != 0 && t.flags&_SigHandling != 0 { - t.flags &^= _SigHandling - if t.flags&_SigIgnored != 0 { - setsig(int32(sig), _SIG_IGN, true) - } else { - setsig(int32(sig), _SIG_DFL, true) + if t.flags&_SigNotify != 0 { + ensureSigM() + disableSigChan <- sig + <-maskUpdatedChan + if t.flags&_SigHandling != 0 { + t.flags &^= _SigHandling + if t.flags&_SigIgnored != 0 { + setsig(int32(sig), _SIG_IGN, true) + } else { + setsig(int32(sig), _SIG_DFL, true) + } } } } @@ -130,7 +153,52 @@ func crash() { } } - unblocksignals() + updatesigmask(sigmask{}) setsig(_SIGABRT, _SIG_DFL, false) raise(_SIGABRT) } + +// createSigM starts one global, sleeping thread to make sure at least one thread +// is available to catch signals enabled for os/signal. +func ensureSigM() { + if maskUpdatedChan != nil { + return + } + maskUpdatedChan = make(chan struct{}) + disableSigChan = make(chan uint32) + enableSigChan = make(chan uint32) + go func() { + // Signal masks are per-thread, so make sure this goroutine stays on one + // thread. + LockOSThread() + defer UnlockOSThread() + // The sigBlocked mask contains the signals not active for os/signal, + // initially all signals except the essential. When signal.Notify()/Stop is called, + // sigenable/sigdisable in turn notify this thread to update its signal + // mask accordingly. + var sigBlocked sigmask + for i := range sigBlocked { + sigBlocked[i] = ^uint32(0) + } + for i := range sigtable { + if sigtable[i].flags&_SigUnblock != 0 { + sigBlocked[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31) + } + } + updatesigmask(sigBlocked) + for { + select { + case sig := <-enableSigChan: + if b := sig - 1; b >= 0 { + sigBlocked[b/32] &^= (1 << (b & 31)) + } + case sig := <-disableSigChan: + if b := sig - 1; b >= 0 { + sigBlocked[b/32] |= (1 << (b & 31)) + } + } + updatesigmask(sigBlocked) + maskUpdatedChan <- struct{}{} + } + }() +} diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go index 32ecce0d7d..6cd18653d5 100644 --- a/src/runtime/signal_darwin.go +++ b/src/runtime/signal_darwin.go @@ -16,14 +16,14 @@ var sigtable = [...]sigTabT{ /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, /* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"}, /* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"}, - /* 4 */ {_SigThrow, "SIGILL: illegal instruction"}, - /* 5 */ {_SigThrow, "SIGTRAP: trace trap"}, + /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"}, + /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"}, /* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"}, /* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"}, - /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"}, + /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"}, /* 9 */ {0, "SIGKILL: kill"}, - /* 10 */ {_SigPanic, "SIGBUS: bus error"}, - /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"}, + /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"}, + /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"}, /* 12 */ {_SigThrow, "SIGSYS: bad system call"}, /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, @@ -32,14 +32,14 @@ var sigtable = [...]sigTabT{ /* 17 */ {0, "SIGSTOP: stop"}, /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, /* 19 */ {0, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"}, + /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, - /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"}, + /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, diff --git a/src/runtime/signal_linux.go b/src/runtime/signal_linux.go index f8250b9fa1..2f25b59663 100644 --- a/src/runtime/signal_linux.go +++ b/src/runtime/signal_linux.go @@ -16,20 +16,20 @@ var sigtable = [...]sigTabT{ /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, /* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"}, /* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"}, - /* 4 */ {_SigThrow, "SIGILL: illegal instruction"}, - /* 5 */ {_SigThrow, "SIGTRAP: trace trap"}, + /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"}, + /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"}, /* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"}, - /* 7 */ {_SigPanic, "SIGBUS: bus error"}, - /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"}, + /* 7 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"}, + /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"}, /* 9 */ {0, "SIGKILL: kill"}, /* 10 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, - /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"}, + /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"}, /* 12 */ {_SigNotify, "SIGUSR2: user-defined signal 2"}, /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"}, - /* 16 */ {_SigThrow, "SIGSTKFLT: stack fault"}, - /* 17 */ {_SigNotify, "SIGCHLD: child status has changed"}, + /* 16 */ {_SigThrow + _SigUnblock, "SIGSTKFLT: stack fault"}, + /* 17 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, /* 18 */ {0, "SIGCONT: continue"}, /* 19 */ {0, "SIGSTOP: stop, unblockable"}, /* 20 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, @@ -39,7 +39,7 @@ var sigtable = [...]sigTabT{ /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, - /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"}, + /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, /* 29 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 30 */ {_SigNotify, "SIGPWR: power failure restart"}, diff --git a/src/runtime/signal_netbsd.go b/src/runtime/signal_netbsd.go index 78afc59efa..d93a450d98 100644 --- a/src/runtime/signal_netbsd.go +++ b/src/runtime/signal_netbsd.go @@ -14,14 +14,14 @@ var sigtable = [...]sigTabT{ /* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"}, /* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"}, /* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"}, - /* 4 */ {_SigThrow, "SIGILL: illegal instruction"}, - /* 5 */ {_SigThrow, "SIGTRAP: trace trap"}, + /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"}, + /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"}, /* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"}, /* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"}, - /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"}, + /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"}, /* 9 */ {0, "SIGKILL: kill"}, - /* 10 */ {_SigPanic, "SIGBUS: bus error"}, - /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"}, + /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"}, + /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"}, /* 12 */ {_SigThrow, "SIGSYS: bad system call"}, /* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, @@ -30,14 +30,14 @@ var sigtable = [...]sigTabT{ /* 17 */ {0, "SIGSTOP: stop"}, /* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"}, /* 19 */ {0, "SIGCONT: continue after stop"}, - /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"}, + /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"}, /* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"}, /* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"}, /* 23 */ {_SigNotify, "SIGIO: i/o now possible"}, /* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"}, /* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"}, - /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"}, + /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"}, /* 28 */ {_SigNotify, "SIGWINCH: window size change"}, /* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"}, /* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"}, diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go index 2986c5aabc..d8ac676846 100644 --- a/src/runtime/signal_solaris.go +++ b/src/runtime/signal_solaris.go @@ -14,21 +14,21 @@ var sigtable = [...]sigTabT{ /* 1 */ {_SigNotify + _SigKill, "SIGHUP: hangup"}, /* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt (rubout)"}, /* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit (ASCII FS)"}, - /* 4 */ {_SigThrow, "SIGILL: illegal instruction (not reset when caught)"}, - /* 5 */ {_SigThrow, "SIGTRAP: trace trap (not reset when caught)"}, + /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction (not reset when caught)"}, + /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap (not reset when caught)"}, /* 6 */ {_SigNotify + _SigThrow, "SIGABRT: used by abort, replace SIGIOT in the future"}, /* 7 */ {_SigThrow, "SIGEMT: EMT instruction"}, - /* 8 */ {_SigPanic, "SIGFPE: floating point exception"}, + /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating point exception"}, /* 9 */ {0, "SIGKILL: kill (cannot be caught or ignored)"}, - /* 10 */ {_SigPanic, "SIGBUS: bus error"}, - /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"}, + /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"}, + /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"}, /* 12 */ {_SigThrow, "SIGSYS: bad argument to system call"}, /* 13 */ {_SigNotify, "SIGPIPE: write on a pipe with no one to read it"}, /* 14 */ {_SigNotify, "SIGALRM: alarm clock"}, /* 15 */ {_SigNotify + _SigKill, "SIGTERM: software termination signal from kill"}, /* 16 */ {_SigNotify, "SIGUSR1: user defined signal 1"}, /* 17 */ {_SigNotify, "SIGUSR2: user defined signal 2"}, - /* 18 */ {_SigNotify, "SIGCHLD: child status change alias (POSIX)"}, + /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status change alias (POSIX)"}, /* 19 */ {_SigNotify, "SIGPWR: power-fail restart"}, /* 20 */ {_SigNotify, "SIGWINCH: window size change"}, /* 21 */ {_SigNotify, "SIGURG: urgent socket condition"}, @@ -39,7 +39,7 @@ var sigtable = [...]sigTabT{ /* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background tty read attempted"}, /* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background tty write attempted"}, /* 28 */ {_SigNotify, "SIGVTALRM: virtual timer expired"}, - /* 29 */ {_SigNotify, "SIGPROF: profiling timer expired"}, + /* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling timer expired"}, /* 30 */ {_SigNotify, "SIGXCPU: exceeded cpu limit"}, /* 31 */ {_SigNotify, "SIGXFSZ: exceeded file size limit"}, /* 32 */ {_SigNotify, "SIGWAITING: reserved signal no longer used by"}, diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go index da8a1c5801..b2fce53534 100644 --- a/src/runtime/signal_windows.go +++ b/src/runtime/signal_windows.go @@ -131,7 +131,9 @@ func lastcontinuehandler(info *exceptionrecord, r *context, gp *g) int32 { print("PC=", hex(r.ip()), "\n") if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { - print("signal arrived during cgo execution\n") + if iscgo { + print("signal arrived during external code execution\n") + } gp = _g_.m.lockedg } print("\n") diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go index 38f0a57b90..f000fabd1a 100644 --- a/src/runtime/sigqueue_plan9.go +++ b/src/runtime/sigqueue_plan9.go @@ -17,21 +17,29 @@ var sig struct { sleeping bool } +type noteData struct { + s [_ERRMAX]byte + n int // n bytes of s are valid +} + type noteQueue struct { lock mutex - data [qsize]*byte + data [qsize]noteData ri int wi int full bool } +// It is not allowed to allocate memory in the signal handler. func (q *noteQueue) push(item *byte) bool { lock(&q.lock) if q.full { unlock(&q.lock) return false } - q.data[q.wi] = item + s := gostringnocopy(item) + copy(q.data[q.wi].s[:], s) + q.data[q.wi].n = len(s) q.wi++ if q.wi == qsize { q.wi = 0 @@ -43,14 +51,15 @@ func (q *noteQueue) push(item *byte) bool { return true } -func (q *noteQueue) pop() *byte { +func (q *noteQueue) pop() string { lock(&q.lock) q.full = false if q.ri == q.wi { unlock(&q.lock) - return nil + return "" } - item := q.data[q.ri] + note := &q.data[q.ri] + item := string(note.s[:note.n]) q.ri++ if q.ri == qsize { q.ri = 0 @@ -86,8 +95,8 @@ func sendNote(s *byte) bool { func signal_recv() string { for { note := sig.q.pop() - if note != nil { - return gostring(note) + if note != "" { + return note } lock(&sig.lock) diff --git a/src/runtime/slice.go b/src/runtime/slice.go index 5ccc6592bf..79b611839d 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -84,10 +84,13 @@ func growslice(t *slicetype, old slice, n int) slice { memclr(add(p, lenmem), capmem-lenmem) } else { // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory. - // TODO(rsc): Use memmove when !writeBarrierEnabled. p = newarray(et, uintptr(newcap)) - for i := 0; i < old.len; i++ { - typedmemmove(et, add(p, uintptr(i)*et.size), add(old.array, uintptr(i)*et.size)) + if !writeBarrierEnabled { + memmove(p, old.array, lenmem) + } else { + for i := uintptr(0); i < lenmem; i += et.size { + typedmemmove(et, add(p, i), add(old.array, i)) + } } } diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go index f74694b7e9..27427af955 100644 --- a/src/runtime/stack1.go +++ b/src/runtime/stack1.go @@ -352,6 +352,12 @@ func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) { } } +// Information from the compiler about the layout of stack frames. +type bitvector struct { + n int32 // # of bits + bytedata *uint8 +} + type gobitvector struct { n uintptr bytedata []uint8 @@ -381,20 +387,20 @@ func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f print(" ", add(scanp, i*ptrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n") } if ptrbit(&bv, i) == 1 { - p := *(*unsafe.Pointer)(add(scanp, i*ptrSize)) - up := uintptr(p) - if f != nil && 0 < up && up < _PageSize && debug.invalidptr != 0 || up == poisonStack { + pp := (*uintptr)(add(scanp, i*ptrSize)) + p := *pp + if f != nil && 0 < p && p < _PageSize && debug.invalidptr != 0 || p == poisonStack { // Looks like a junk value in a pointer slot. // Live analysis wrong? getg().m.traceback = 2 - print("runtime: bad pointer in frame ", funcname(f), " at ", add(scanp, i*ptrSize), ": ", p, "\n") + print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n") throw("invalid stack pointer") } - if minp <= up && up < maxp { + if minp <= p && p < maxp { if stackDebug >= 3 { print("adjust ptr ", p, " ", funcname(f), "\n") } - *(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(up + delta) + *pp = p + delta } } } diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 25f5bf46fb..687f067cb9 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -32,6 +32,8 @@ const ( // moduledata records information about the layout of the executable // image. It is written by the linker. Any changes here must be // matched changes to the code in cmd/internal/ld/symtab.go:symtab. +// moduledata is stored in read-only memory; none of the pointers here +// are visible to the garbage collector. type moduledata struct { pclntable []byte ftab []functab @@ -48,18 +50,24 @@ type moduledata struct { typelinks []*_type - gcdatamask, gcbssmask bitvector + modulename string + modulehashes []modulehash - // write barrier shadow data - // 64-bit systems only, enabled by GODEBUG=wbshadow=1. - // See also the shadow_* fields on mheap in mheap.go. - shadow_data uintptr // data-addr + shadow_data = shadow data addr - data_start uintptr // start of shadowed data addresses - data_end uintptr // end of shadowed data addresses + gcdatamask, gcbssmask bitvector next *moduledata } +// For each shared library a module links against, the linker creates an entry in the +// moduledata.modulehashes slice containing the name of the module, the abi hash seen +// at link time and a pointer to the runtime abi hash. These are checked in +// moduledataverify1 below. +type modulehash struct { + modulename string + linktimehash string + runtimehash *string +} + var firstmoduledata moduledata // linker symbol var lastmoduledatap *moduledata // linker symbol @@ -124,6 +132,13 @@ func moduledataverify1(datap *moduledata) { datap.maxpc != datap.ftab[nftab].entry { throw("minpc or maxpc invalid") } + + for _, modulehash := range datap.modulehashes { + if modulehash.linktimehash != *modulehash.runtimehash { + println("abi mismatch detected between", datap.modulename, "and", modulehash.modulename) + throw("abi mismatch") + } + } } // FuncForPC returns a *Func describing the function that contains the diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 3b7501b9b4..6da7baddc5 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -132,10 +132,7 @@ type traceBuf struct { func StartTrace() error { // Stop the world, so that we can take a consistent snapshot // of all goroutines at the beginning of the trace. - semacquire(&worldsema, false) - _g_ := getg() - _g_.m.preemptoff = "start tracing" - systemstack(stoptheworld) + stopTheWorld("start tracing") // We are in stop-the-world, but syscalls can finish and write to trace concurrently. // Exitsyscall could check trace.enabled long before and then suddenly wake up @@ -146,9 +143,7 @@ func StartTrace() error { if trace.enabled || trace.shutdown { unlock(&trace.bufLock) - _g_.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() return errorString("tracing is already enabled") } @@ -175,9 +170,7 @@ func StartTrace() error { unlock(&trace.bufLock) - _g_.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() return nil } @@ -186,19 +179,14 @@ func StartTrace() error { func StopTrace() { // Stop the world so that we can collect the trace buffers from all p's below, // and also to avoid races with traceEvent. - semacquire(&worldsema, false) - _g_ := getg() - _g_.m.preemptoff = "stop tracing" - systemstack(stoptheworld) + stopTheWorld("stop tracing") // See the comment in StartTrace. lock(&trace.bufLock) if !trace.enabled { unlock(&trace.bufLock) - _g_.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() return } @@ -236,9 +224,7 @@ func StopTrace() { unlock(&trace.bufLock) - _g_.m.preemptoff = "" - semrelease(&worldsema) - systemstack(starttheworld) + startTheWorld() // The world is started but we've set trace.shutdown, so new tracing can't start. // Wait for the trace reader to flush pending buffers and stop. @@ -428,9 +414,9 @@ func traceEvent(ev byte, skip int, args ...uint64) { // The caller checked that trace.enabled == true, but trace.enabled might have been // turned off between the check and now. Check again. traceLockBuffer did mp.locks++, - // StopTrace does stoptheworld, and stoptheworld waits for mp.locks to go back to zero, + // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero, // so if we see trace.enabled == true now, we know it's true for the rest of the function. - // Exitsyscall can run even during stoptheworld. The race with StartTrace/StopTrace + // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer. if !trace.enabled { traceReleaseBuffer(pid) @@ -733,7 +719,7 @@ func traceProcStart() { } func traceProcStop(pp *p) { - // Sysmon and stoptheworld can stop Ps blocked in syscalls, + // Sysmon and stopTheWorld can stop Ps blocked in syscalls, // to handle this we temporary employ the P. mp := acquirem() oldp := mp.p @@ -807,7 +793,7 @@ func traceGoSysExit(ts int64) { } func traceGoSysBlock(pp *p) { - // Sysmon and stoptheworld can declare syscalls running on remote Ps as blocked, + // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked, // to handle this we temporary employ the P. mp := acquirem() oldp := mp.p diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 9f34e37ea4..5ed601e6f3 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -46,6 +46,9 @@ var ( timerprocPC uintptr gcBgMarkWorkerPC uintptr systemstack_switchPC uintptr + systemstackPC uintptr + + gogoPC uintptr externalthreadhandlerp uintptr // initialized elsewhere ) @@ -69,6 +72,10 @@ func tracebackinit() { timerprocPC = funcPC(timerproc) gcBgMarkWorkerPC = funcPC(gcBgMarkWorker) systemstack_switchPC = funcPC(systemstack_switch) + systemstackPC = funcPC(systemstack) + + // used by sigprof handler + gogoPC = funcPC(gogo) } // Traceback over the deferred function calls. @@ -194,7 +201,14 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in // Found an actual function. // Derive frame pointer and link register. if frame.fp == 0 { - frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc)) + // We want to jump over the systemstack switch. If we're running on the + // g0, this systemstack is at the top of the stack. + // if we're not on g0 or there's a no curg, then this is a regular call. + sp := frame.sp + if flags&_TraceJumpStack != 0 && f.entry == systemstackPC && gp == g.m.g0 && gp.m.curg != nil { + sp = gp.m.curg.sched.sp + } + frame.fp = sp + uintptr(funcspdelta(f, frame.pc)) if !usesLR { // On x86, call instruction pushes return PC before entering new function. frame.fp += regSize @@ -455,7 +469,7 @@ func setArgInfo(frame *stkframe, f *_func, needArgMap bool) { throw("reflect mismatch") } bv := (*bitvector)(unsafe.Pointer(fn[1])) - frame.arglen = uintptr(bv.n / 2 * ptrSize) + frame.arglen = uintptr(bv.n * ptrSize) frame.argmap = bv } } @@ -517,9 +531,10 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags uint) { func callers(skip int, pcbuf []uintptr) int { sp := getcallersp(unsafe.Pointer(&skip)) pc := uintptr(getcallerpc(unsafe.Pointer(&skip))) + gp := getg() var n int systemstack(func() { - n = gentraceback(pc, sp, 0, getg(), skip, &pcbuf[0], len(pcbuf), nil, nil, 0) + n = gentraceback(pc, sp, 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0) }) return n } diff --git a/src/runtime/type.go b/src/runtime/type.go index 48df2a4382..45bdac8b91 100644 --- a/src/runtime/type.go +++ b/src/runtime/type.go @@ -20,17 +20,10 @@ type _type struct { fieldalign uint8 kind uint8 alg *typeAlg - // gc stores type info required for garbage collector. - // If (kind&KindGCProg)==0, then gc[0] points at sparse GC bitmap - // (no indirection), 4 bits per word. - // If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated - // read-only GC program; and gc[0] points to BSS space for sparse GC bitmap. - // For huge types (>maxGCMask), runtime unrolls the program directly into - // GC bitmap and gc[0] is not used. For moderately-sized types, runtime - // unrolls the program into gc[0] space on first use. The first byte of gc[0] - // (gc[0][0]) contains 'unroll' flag saying whether the program is already - // unrolled into gc[0] or not. - gc [2]uintptr + // gcdata stores the GC type data for the garbage collector. + // If the KindGCProg bit is set in kind, gcdata is a GC program. + // Otherwise it is a ptrmask bitmap. See mbitmap.go for details. + gcdata *byte _string *string x *uncommontype ptrto *_type |
