aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/arch1_386.go15
-rw-r--r--src/runtime/arch1_amd64.go15
-rw-r--r--src/runtime/arch1_amd64p32.go15
-rw-r--r--src/runtime/arch1_arm.go15
-rw-r--r--src/runtime/arch1_arm64.go15
-rw-r--r--src/runtime/arch1_ppc64.go15
-rw-r--r--src/runtime/arch1_ppc64le.go15
-rw-r--r--src/runtime/asm_amd64.s4
-rw-r--r--src/runtime/atomic_pointer.go18
-rw-r--r--src/runtime/debug.go11
-rw-r--r--src/runtime/export_test.go49
-rw-r--r--src/runtime/extern.go12
-rw-r--r--src/runtime/gc_test.go192
-rw-r--r--src/runtime/gcinfo_test.go74
-rw-r--r--src/runtime/hashmap.go60
-rw-r--r--src/runtime/heapdump.go18
-rw-r--r--src/runtime/lfstack_test.go4
-rw-r--r--src/runtime/malloc.go10
-rw-r--r--src/runtime/mbarrier.go298
-rw-r--r--src/runtime/mbitmap.go1733
-rw-r--r--src/runtime/mgc.go59
-rw-r--r--src/runtime/mgcmark.go43
-rw-r--r--src/runtime/mgcwork.go9
-rw-r--r--src/runtime/mheap.go28
-rw-r--r--src/runtime/mprof.go18
-rw-r--r--src/runtime/mstats.go15
-rw-r--r--src/runtime/os1_darwin.go26
-rw-r--r--src/runtime/os1_dragonfly.go28
-rw-r--r--src/runtime/os1_freebsd.go28
-rw-r--r--src/runtime/os1_linux.go28
-rw-r--r--src/runtime/os1_nacl.go3
-rw-r--r--src/runtime/os1_netbsd.go29
-rw-r--r--src/runtime/os1_openbsd.go25
-rw-r--r--src/runtime/os1_plan9.go5
-rw-r--r--src/runtime/os1_windows.go3
-rw-r--r--src/runtime/os3_solaris.go29
-rw-r--r--src/runtime/panic.go16
-rw-r--r--src/runtime/pprof/pprof.go50
-rw-r--r--src/runtime/proc.go2
-rw-r--r--src/runtime/proc1.go294
-rw-r--r--src/runtime/proc_test.go5
-rw-r--r--src/runtime/runtime-gdb_test.go4
-rw-r--r--src/runtime/runtime2.go29
-rw-r--r--src/runtime/runtime_test.go52
-rw-r--r--src/runtime/signal1_unix.go92
-rw-r--r--src/runtime/signal_darwin.go14
-rw-r--r--src/runtime/signal_linux.go16
-rw-r--r--src/runtime/signal_netbsd.go14
-rw-r--r--src/runtime/signal_solaris.go14
-rw-r--r--src/runtime/signal_windows.go4
-rw-r--r--src/runtime/sigqueue_plan9.go23
-rw-r--r--src/runtime/slice.go9
-rw-r--r--src/runtime/stack1.go18
-rw-r--r--src/runtime/symtab.go29
-rw-r--r--src/runtime/trace.go34
-rw-r--r--src/runtime/traceback.go21
-rw-r--r--src/runtime/type.go15
57 files changed, 2294 insertions, 1425 deletions
diff --git a/src/runtime/arch1_386.go b/src/runtime/arch1_386.go
index b024d7a51f..d41696a6d6 100644
--- a/src/runtime/arch1_386.go
+++ b/src/runtime/arch1_386.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '8'
- _BigEndian = 0
- _CacheLineSize = 64
- _RuntimeGogoBytes = 64
- _PhysPageSize = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl
- _PCQuantum = 1
- _Int64Align = 4
- hugePageSize = 1 << 21
+ thechar = '8'
+ _BigEndian = 0
+ _CacheLineSize = 64
+ _PhysPageSize = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl
+ _PCQuantum = 1
+ _Int64Align = 4
+ hugePageSize = 1 << 21
)
diff --git a/src/runtime/arch1_amd64.go b/src/runtime/arch1_amd64.go
index 932b2b7c55..15f4cc65fe 100644
--- a/src/runtime/arch1_amd64.go
+++ b/src/runtime/arch1_amd64.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '6'
- _BigEndian = 0
- _CacheLineSize = 64
- _RuntimeGogoBytes = 80 + (goos_solaris)*16
- _PhysPageSize = 4096
- _PCQuantum = 1
- _Int64Align = 8
- hugePageSize = 1 << 21
+ thechar = '6'
+ _BigEndian = 0
+ _CacheLineSize = 64
+ _PhysPageSize = 4096
+ _PCQuantum = 1
+ _Int64Align = 8
+ hugePageSize = 1 << 21
)
diff --git a/src/runtime/arch1_amd64p32.go b/src/runtime/arch1_amd64p32.go
index 79421e848a..3c5456f933 100644
--- a/src/runtime/arch1_amd64p32.go
+++ b/src/runtime/arch1_amd64p32.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '6'
- _BigEndian = 0
- _CacheLineSize = 64
- _RuntimeGogoBytes = 64
- _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
- _PCQuantum = 1
- _Int64Align = 8
- hugePageSize = 1 << 21
+ thechar = '6'
+ _BigEndian = 0
+ _CacheLineSize = 64
+ _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
+ _PCQuantum = 1
+ _Int64Align = 8
+ hugePageSize = 1 << 21
)
diff --git a/src/runtime/arch1_arm.go b/src/runtime/arch1_arm.go
index c3fe4f0cb3..0ec2093881 100644
--- a/src/runtime/arch1_arm.go
+++ b/src/runtime/arch1_arm.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '5'
- _BigEndian = 0
- _CacheLineSize = 32
- _RuntimeGogoBytes = 60
- _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
- _PCQuantum = 4
- _Int64Align = 4
- hugePageSize = 0
+ thechar = '5'
+ _BigEndian = 0
+ _CacheLineSize = 32
+ _PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
+ _PCQuantum = 4
+ _Int64Align = 4
+ hugePageSize = 0
)
diff --git a/src/runtime/arch1_arm64.go b/src/runtime/arch1_arm64.go
index 549a635ca4..1a3165c8b7 100644
--- a/src/runtime/arch1_arm64.go
+++ b/src/runtime/arch1_arm64.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '7'
- _BigEndian = 0
- _CacheLineSize = 32
- _RuntimeGogoBytes = 64
- _PhysPageSize = 4096*(1-goos_darwin) + 16384*goos_darwin
- _PCQuantum = 4
- _Int64Align = 8
- hugePageSize = 0
+ thechar = '7'
+ _BigEndian = 0
+ _CacheLineSize = 32
+ _PhysPageSize = 4096*(1-goos_darwin) + 16384*goos_darwin
+ _PCQuantum = 4
+ _Int64Align = 8
+ hugePageSize = 0
)
diff --git a/src/runtime/arch1_ppc64.go b/src/runtime/arch1_ppc64.go
index ee453c09f2..de6dd91401 100644
--- a/src/runtime/arch1_ppc64.go
+++ b/src/runtime/arch1_ppc64.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '9'
- _BigEndian = 1
- _CacheLineSize = 64
- _RuntimeGogoBytes = 72
- _PhysPageSize = 65536
- _PCQuantum = 4
- _Int64Align = 8
- hugePageSize = 0
+ thechar = '9'
+ _BigEndian = 1
+ _CacheLineSize = 64
+ _PhysPageSize = 65536
+ _PCQuantum = 4
+ _Int64Align = 8
+ hugePageSize = 0
)
diff --git a/src/runtime/arch1_ppc64le.go b/src/runtime/arch1_ppc64le.go
index aa028a10f3..9a55c71101 100644
--- a/src/runtime/arch1_ppc64le.go
+++ b/src/runtime/arch1_ppc64le.go
@@ -5,12 +5,11 @@
package runtime
const (
- thechar = '9'
- _BigEndian = 0
- _CacheLineSize = 64
- _RuntimeGogoBytes = 72
- _PhysPageSize = 65536
- _PCQuantum = 4
- _Int64Align = 8
- hugePageSize = 0
+ thechar = '9'
+ _BigEndian = 0
+ _CacheLineSize = 64
+ _PhysPageSize = 65536
+ _PCQuantum = 4
+ _Int64Align = 8
+ hugePageSize = 0
)
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 36353d108f..0f9aeb8f37 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1693,8 +1693,10 @@ TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
RET
// This is called from .init_array and follows the platform, not Go, ABI.
-TEXT runtime·addmoduledata(SB),NOSPLIT,$0-8
+TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
+ PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
MOVQ runtime·lastmoduledatap(SB), AX
MOVQ DI, moduledata_next(AX)
MOVQ DI, runtime·lastmoduledatap(SB)
+ POPQ R15
RET
diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index 50a30242d9..f84afe0362 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -20,18 +20,12 @@ import "unsafe"
func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
atomicstorep1(noescape(ptr), new)
writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(ptr)))
- }
}
//go:nosplit
func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
old := xchgp1(noescape(ptr), new)
writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(ptr)))
- }
return old
}
@@ -41,9 +35,6 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
return false
}
writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr))))
- }
return true
}
@@ -60,9 +51,6 @@ func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
atomicstorep1(noescape(unsafe.Pointer(ptr)), new)
writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr))))
- }
}
//go:linkname sync_atomic_SwapUintptr sync/atomic.SwapUintptr
@@ -73,9 +61,6 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr
func sync_atomic_SwapPointer(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(ptr)), uintptr(new)))
writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(ptr)))
- }
return old
}
@@ -89,8 +74,5 @@ func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Poin
return false
}
writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
- if mheap_.shadow_enabled {
- writebarrierptr_noshadow((*uintptr)(noescape(unsafe.Pointer(ptr))))
- }
return true
}
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
index 3ecaac10bc..9aec3b03e0 100644
--- a/src/runtime/debug.go
+++ b/src/runtime/debug.go
@@ -22,17 +22,12 @@ func GOMAXPROCS(n int) int {
return ret
}
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "GOMAXPROCS"
- systemstack(stoptheworld)
+ stopTheWorld("GOMAXPROCS")
- // newprocs will be processed by starttheworld
+ // newprocs will be processed by startTheWorld
newprocs = int32(n)
- gp.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
return ret
}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index e0c8b17bd3..3fddcc868f 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -76,24 +76,17 @@ func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) {
}
func GCMask(x interface{}) (ret []byte) {
- e := (*eface)(unsafe.Pointer(&x))
- s := (*slice)(unsafe.Pointer(&ret))
systemstack(func() {
- var len uintptr
- var a *byte
- getgcmask(e.data, e._type, &a, &len)
- s.array = unsafe.Pointer(a)
- s.len = int(len)
- s.cap = s.len
+ ret = getgcmask(x)
})
return
}
func RunSchedLocalQueueTest() {
- systemstack(testSchedLocalQueue)
+ testSchedLocalQueue()
}
func RunSchedLocalQueueStealTest() {
- systemstack(testSchedLocalQueueSteal)
+ testSchedLocalQueueSteal()
}
var StringHash = stringHash
@@ -106,11 +99,6 @@ var MemclrBytes = memclrBytes
var HashLoad = &hashLoad
-// For testing.
-func GogoBytes() int32 {
- return _RuntimeGogoBytes
-}
-
// entry point for testing
func GostringW(w []uint16) (s string) {
systemstack(func() {
@@ -133,3 +121,34 @@ func Envs() []string { return envs }
func SetEnvs(e []string) { envs = e }
var BigEndian = _BigEndian
+
+// For benchmarking.
+
+func BenchSetType(n int, x interface{}) {
+ e := *(*eface)(unsafe.Pointer(&x))
+ t := e._type
+ var size uintptr
+ var p unsafe.Pointer
+ switch t.kind & kindMask {
+ case _KindPtr:
+ t = (*ptrtype)(unsafe.Pointer(t)).elem
+ size = t.size
+ p = e.data
+ case _KindSlice:
+ slice := *(*struct {
+ ptr unsafe.Pointer
+ len, cap uintptr
+ })(e.data)
+ t = (*slicetype)(unsafe.Pointer(t)).elem
+ size = t.size * slice.len
+ p = slice.ptr
+ }
+ allocSize := roundupsize(size)
+ systemstack(func() {
+ for i := 0; i < n; i++ {
+ heapBitsSetType(uintptr(p), allocSize, size, t)
+ }
+ })
+}
+
+const PtrSize = ptrSize
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 540d7b5124..476c3c5ae3 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -58,18 +58,6 @@ a comma-separated list of name=val pairs. Supported names are:
scavenge: scavenge=1 enables debugging mode of heap scavenger.
- wbshadow: setting wbshadow=1 enables a shadow copy of the heap
- used to detect missing write barriers at the next write to a
- given location. If a bug can be detected in this mode it is
- typically easy to understand, since the crash says quite
- clearly what kind of word has missed a write barrier.
- Setting wbshadow=2 checks the shadow copy during garbage
- collection as well. Bugs detected at garbage collection can be
- difficult to understand, because there is no context for what
- the found word means. Typically you have to reproduce the
- problem with allocfreetrace=1 in order to understand the type
- of the badly updated word.
-
gccheckmark: setting gccheckmark=1 enables verification of the
garbage collector's concurrent mark phase by performing a
second mark pass while the world is stopped. If the second
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 6abec4cca7..e3e0c3a583 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -6,6 +6,7 @@ package runtime_test
import (
"os"
+ "reflect"
"runtime"
"runtime/debug"
"testing"
@@ -197,45 +198,166 @@ func TestHugeGCInfo(t *testing.T) {
}
}
-func BenchmarkSetTypeNoPtr1(b *testing.B) {
- type NoPtr1 struct {
- p uintptr
- }
- var p *NoPtr1
- for i := 0; i < b.N; i++ {
- p = &NoPtr1{}
- }
- _ = p
+func BenchmarkSetTypePtr(b *testing.B) {
+ benchSetType(b, new(*byte))
}
-func BenchmarkSetTypeNoPtr2(b *testing.B) {
- type NoPtr2 struct {
- p, q uintptr
- }
- var p *NoPtr2
- for i := 0; i < b.N; i++ {
- p = &NoPtr2{}
- }
- _ = p
+
+func BenchmarkSetTypePtr8(b *testing.B) {
+ benchSetType(b, new([8]*byte))
}
-func BenchmarkSetTypePtr1(b *testing.B) {
- type Ptr1 struct {
- p *byte
- }
- var p *Ptr1
- for i := 0; i < b.N; i++ {
- p = &Ptr1{}
- }
- _ = p
+
+func BenchmarkSetTypePtr16(b *testing.B) {
+ benchSetType(b, new([16]*byte))
}
-func BenchmarkSetTypePtr2(b *testing.B) {
- type Ptr2 struct {
- p, q *byte
- }
- var p *Ptr2
- for i := 0; i < b.N; i++ {
- p = &Ptr2{}
+
+func BenchmarkSetTypePtr32(b *testing.B) {
+ benchSetType(b, new([32]*byte))
+}
+
+func BenchmarkSetTypePtr64(b *testing.B) {
+ benchSetType(b, new([64]*byte))
+}
+
+func BenchmarkSetTypePtr126(b *testing.B) {
+ benchSetType(b, new([126]*byte))
+}
+
+func BenchmarkSetTypePtr128(b *testing.B) {
+ benchSetType(b, new([128]*byte))
+}
+
+func BenchmarkSetTypePtrSlice(b *testing.B) {
+ benchSetType(b, make([]*byte, 1<<10))
+}
+
+type Node1 struct {
+ Value [1]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode1(b *testing.B) {
+ benchSetType(b, new(Node1))
+}
+
+func BenchmarkSetTypeNode1Slice(b *testing.B) {
+ benchSetType(b, make([]Node1, 32))
+}
+
+type Node8 struct {
+ Value [8]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode8(b *testing.B) {
+ benchSetType(b, new(Node8))
+}
+
+func BenchmarkSetTypeNode8Slice(b *testing.B) {
+ benchSetType(b, make([]Node8, 32))
+}
+
+type Node64 struct {
+ Value [64]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode64(b *testing.B) {
+ benchSetType(b, new(Node64))
+}
+
+func BenchmarkSetTypeNode64Slice(b *testing.B) {
+ benchSetType(b, make([]Node64, 32))
+}
+
+type Node64Dead struct {
+ Left, Right *byte
+ Value [64]uintptr
+}
+
+func BenchmarkSetTypeNode64Dead(b *testing.B) {
+ benchSetType(b, new(Node64Dead))
+}
+
+func BenchmarkSetTypeNode64DeadSlice(b *testing.B) {
+ benchSetType(b, make([]Node64Dead, 32))
+}
+
+type Node124 struct {
+ Value [124]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode124(b *testing.B) {
+ benchSetType(b, new(Node124))
+}
+
+func BenchmarkSetTypeNode124Slice(b *testing.B) {
+ benchSetType(b, make([]Node124, 32))
+}
+
+type Node126 struct {
+ Value [126]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode126(b *testing.B) {
+ benchSetType(b, new(Node126))
+}
+
+func BenchmarkSetTypeNode126Slice(b *testing.B) {
+ benchSetType(b, make([]Node126, 32))
+}
+
+type Node128 struct {
+ Value [128]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode128(b *testing.B) {
+ benchSetType(b, new(Node128))
+}
+
+func BenchmarkSetTypeNode128Slice(b *testing.B) {
+ benchSetType(b, make([]Node128, 32))
+}
+
+type Node130 struct {
+ Value [130]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode130(b *testing.B) {
+ benchSetType(b, new(Node130))
+}
+
+func BenchmarkSetTypeNode130Slice(b *testing.B) {
+ benchSetType(b, make([]Node130, 32))
+}
+
+type Node1024 struct {
+ Value [1024]uintptr
+ Left, Right *byte
+}
+
+func BenchmarkSetTypeNode1024(b *testing.B) {
+ benchSetType(b, new(Node1024))
+}
+
+func BenchmarkSetTypeNode1024Slice(b *testing.B) {
+ benchSetType(b, make([]Node1024, 32))
+}
+
+func benchSetType(b *testing.B, x interface{}) {
+ v := reflect.ValueOf(x)
+ t := v.Type()
+ switch t.Kind() {
+ case reflect.Ptr:
+ b.SetBytes(int64(t.Elem().Size()))
+ case reflect.Slice:
+ b.SetBytes(int64(t.Elem().Size()) * int64(v.Len()))
}
- _ = p
+ b.ResetTimer()
+ runtime.BenchSetType(b.N, x)
}
func BenchmarkAllocation(b *testing.B) {
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index 66b0353f08..f330bf2430 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -10,8 +10,14 @@ import (
"testing"
)
+const (
+ typeScalar = 0
+ typePointer = 1
+)
+
// TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info.
func TestGCInfo(t *testing.T) {
+ verifyGCInfo(t, "bss Ptr", &bssPtr, infoPtr)
verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, infoScalarPtr)
verifyGCInfo(t, "bss PtrScalar", &bssPtrScalar, infoPtrScalar)
verifyGCInfo(t, "bss BigStruct", &bssBigStruct, infoBigStruct())
@@ -20,6 +26,7 @@ func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "bss eface", &bssEface, infoEface)
verifyGCInfo(t, "bss iface", &bssIface, infoIface)
+ verifyGCInfo(t, "data Ptr", &dataPtr, infoPtr)
verifyGCInfo(t, "data ScalarPtr", &dataScalarPtr, infoScalarPtr)
verifyGCInfo(t, "data PtrScalar", &dataPtrScalar, infoPtrScalar)
verifyGCInfo(t, "data BigStruct", &dataBigStruct, infoBigStruct())
@@ -28,6 +35,7 @@ func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "data eface", &dataEface, infoEface)
verifyGCInfo(t, "data iface", &dataIface, infoIface)
+ verifyGCInfo(t, "stack Ptr", new(Ptr), infoPtr)
verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr)
verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar)
verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct())
@@ -37,38 +45,43 @@ func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "stack iface", new(Iface), infoIface)
for i := 0; i < 10; i++ {
- verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr)
- verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), infoPtrScalar)
- verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), infoBigStruct())
- verifyGCInfo(t, "heap string", escape(new(string)), infoString)
- verifyGCInfo(t, "heap eface", escape(new(interface{})), infoEface)
- verifyGCInfo(t, "heap iface", escape(new(Iface)), infoIface)
+ verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr)))
+ verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10))
+ verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr))
+ verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4))
+ verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), trimDead(infoPtrScalar))
+ verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), trimDead(infoBigStruct()))
+ verifyGCInfo(t, "heap string", escape(new(string)), trimDead(infoString))
+ verifyGCInfo(t, "heap eface", escape(new(interface{})), trimDead(infoEface))
+ verifyGCInfo(t, "heap iface", escape(new(Iface)), trimDead(infoIface))
}
-
}
func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
mask := runtime.GCMask(p)
- if len(mask) > len(mask0) {
- mask0 = append(mask0, typeDead)
- mask = mask[:len(mask0)]
- }
if bytes.Compare(mask, mask0) != 0 {
t.Errorf("bad GC program for %v:\nwant %+v\ngot %+v", name, mask0, mask)
return
}
}
-func nonStackInfo(mask []byte) []byte {
- // typeDead is replaced with typeScalar everywhere except stacks.
- mask1 := make([]byte, len(mask))
- for i, v := range mask {
- if v == typeDead {
- v = typeScalar
- }
- mask1[i] = v
+func padDead(mask []byte) []byte {
+ // Because the dead bit isn't encoded until the third word,
+ // and because on 32-bit systems a one-word allocation
+ // uses a two-word block, the pointer info for a one-word
+ // object needs to be expanded to include an extra scalar
+ // on 32-bit systems to match the heap bitmap.
+ if runtime.PtrSize == 4 && len(mask) == 1 {
+ return []byte{mask[0], 0}
+ }
+ return mask
+}
+
+func trimDead(mask []byte) []byte {
+ for len(mask) > 2 && mask[len(mask)-1] == typeScalar {
+ mask = mask[:len(mask)-1]
}
- return mask1
+ return mask
}
var gcinfoSink interface{}
@@ -78,18 +91,13 @@ func escape(p interface{}) interface{} {
return p
}
-const (
- typeDead = iota
- typeScalar
- typePointer
-)
+var infoPtr = []byte{typePointer}
-const (
- BitsString = iota // unused
- BitsSlice // unused
- BitsIface
- BitsEface
-)
+type Ptr struct {
+ *byte
+}
+
+var infoPtr10 = []byte{typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer}
type ScalarPtr struct {
q int
@@ -102,6 +110,8 @@ type ScalarPtr struct {
var infoScalarPtr = []byte{typeScalar, typePointer, typeScalar, typePointer, typeScalar, typePointer}
+var infoScalarPtr4 = append(append(append(append([]byte(nil), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...)
+
type PtrScalar struct {
q *int
w int
@@ -166,6 +176,7 @@ func (IfaceImpl) f() {
var (
// BSS
+ bssPtr Ptr
bssScalarPtr ScalarPtr
bssPtrScalar PtrScalar
bssBigStruct BigStruct
@@ -175,6 +186,7 @@ var (
bssIface Iface
// DATA
+ dataPtr = Ptr{new(byte)}
dataScalarPtr = ScalarPtr{q: 1}
dataPtrScalar = PtrScalar{w: 1}
dataBigStruct = BigStruct{w: 1}
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 9ca33992bb..b199330a1e 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -233,6 +233,9 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
throw("need padding in bucket (value)")
}
+ // make sure zero of element type is available.
+ mapzero(t.elem)
+
// find size parameter which will hold the requested # of elements
B := uint8(0)
for ; hint > bucketCnt && float32(hint) > loadFactor*float32(uintptr(1)<<B); B++ {
@@ -990,3 +993,60 @@ func reflect_maplen(h *hmap) int {
func reflect_ismapkey(t *_type) bool {
return ismapkey(t)
}
+
+var zerobuf struct {
+ lock mutex
+ p *byte
+ size uintptr
+}
+
+var zerotiny [1024]byte
+
+// mapzero ensures that t.zero points at a zero value for type t.
+// Types known to the compiler are in read-only memory and all point
+// to a single zero in the bss of a large enough size.
+// Types allocated by package reflect are in writable memory and
+// start out with zero set to nil; we initialize those on demand.
+func mapzero(t *_type) {
+ // On ARM, atomicloadp is implemented as xadd(p, 0),
+ // so we cannot use atomicloadp on read-only memory.
+ // Check whether the pointer is in the heap; if not, it's not writable
+ // so the zero value must already be set.
+ if GOARCH == "arm" && !inheap(uintptr(unsafe.Pointer(t))) {
+ if t.zero == nil {
+ print("runtime: map element ", *t._string, " missing zero value\n")
+ throw("mapzero")
+ }
+ return
+ }
+
+ // Already done?
+ // Check without lock, so must use atomicload to sync with atomicstore in allocation case below.
+ if atomicloadp(unsafe.Pointer(&t.zero)) != nil {
+ return
+ }
+
+ // Small enough for static buffer?
+ if t.size <= uintptr(len(zerotiny)) {
+ atomicstorep(unsafe.Pointer(&t.zero), unsafe.Pointer(&zerotiny[0]))
+ return
+ }
+
+ // Use allocated buffer.
+ lock(&zerobuf.lock)
+ if zerobuf.size < t.size {
+ if zerobuf.size == 0 {
+ zerobuf.size = 4 * 1024
+ }
+ for zerobuf.size < t.size {
+ zerobuf.size *= 2
+ if zerobuf.size == 0 {
+ // need >2GB zero on 32-bit machine
+ throw("map element too large")
+ }
+ }
+ zerobuf.p = (*byte)(persistentalloc(zerobuf.size, 64, &memstats.other_sys))
+ }
+ atomicstorep(unsafe.Pointer(&t.zero), unsafe.Pointer(zerobuf.p))
+ unlock(&zerobuf.lock)
+}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index e18aa79164..c0fff3f1ce 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -15,20 +15,13 @@ import "unsafe"
//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
func runtime_debug_WriteHeapDump(fd uintptr) {
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "write heap dump"
- systemstack(stoptheworld)
+ stopTheWorld("write heap dump")
systemstack(func() {
writeheapdump_m(fd)
})
- gp.m.preemptoff = ""
- gp.m.locks++
- semrelease(&worldsema)
- systemstack(starttheworld)
- gp.m.locks--
+ startTheWorld()
}
const (
@@ -730,14 +723,13 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
- bits := hbits.typeBits()
- if bits == typeDead {
+ if i >= 2 && !hbits.isMarked() {
break // end of object
}
- hbits = hbits.next()
- if bits == typePointer {
+ if hbits.isPointer() {
tmpbuf[i/8] |= 1 << (i % 8)
}
+ hbits = hbits.next()
}
return bitvector{int32(i), &tmpbuf[0]}
}
diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go
index 68f221d6ef..4da4d88619 100644
--- a/src/runtime/lfstack_test.go
+++ b/src/runtime/lfstack_test.go
@@ -24,8 +24,12 @@ func toMyNode(node *LFNode) *MyNode {
return (*MyNode)(unsafe.Pointer(node))
}
+var global interface{}
+
func TestLFStack(t *testing.T) {
stack := new(uint64)
+ global = stack // force heap allocation
+
// Need to keep additional referenfces to nodes, the stack is not all that type-safe.
var nodes []*MyNode
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 1619ccb9f4..2d7e55643f 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -424,9 +424,6 @@ func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
if raceenabled {
racemapshadow((unsafe.Pointer)(p), n)
}
- if mheap_.shadow_enabled {
- sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys)
- }
if uintptr(p)&(_PageSize-1) != 0 {
throw("misrounded allocation in MHeap_SysAlloc")
@@ -512,6 +509,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
+ if mp.gsignal == getg() {
+ throw("malloc during signal")
+ }
mp.mallocing = 1
shouldhelpgc := false
@@ -669,10 +669,6 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
})
}
- if mheap_.shadow_enabled {
- clearshadow(uintptr(x), size)
- }
-
if raceenabled {
racemalloc(x, size)
}
diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go
index eb5881707b..53a0a00ae7 100644
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -10,12 +10,6 @@
// implementation, markwb, and the various wrappers called by the
// compiler to implement pointer assignment, slice assignment,
// typed memmove, and so on.
-//
-// To check for missed write barriers, the GODEBUG=wbshadow debugging
-// mode allocates a second copy of the heap. Write barrier-based pointer
-// updates make changes to both the real heap and the shadow, and both
-// the pointer updates and the GC look for inconsistencies between the two,
-// indicating pointer writes that bypassed the barrier.
package runtime
@@ -66,7 +60,7 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
default:
throw("gcphasework in bad gcphase")
- case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan:
+ case _GCoff, _GCstw, _GCsweep, _GCscan:
// ok
case _GCmark, _GCmarktermination:
@@ -107,43 +101,19 @@ func writebarrierptr_nostore1(dst *uintptr, src uintptr) {
// but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) {
+ *dst = src
if !writeBarrierEnabled {
- *dst = src
return
}
-
if src != 0 && (src < _PhysPageSize || src == poisonStack) {
- systemstack(func() { throw("bad pointer in write barrier") })
- }
-
- if mheap_.shadow_enabled {
- writebarrierptr_shadow(dst, src)
+ systemstack(func() {
+ print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n")
+ throw("bad pointer in write barrier")
+ })
}
-
- *dst = src
writebarrierptr_nostore1(dst, src)
}
-//go:nosplit
-func writebarrierptr_shadow(dst *uintptr, src uintptr) {
- systemstack(func() {
- addr := uintptr(unsafe.Pointer(dst))
- shadow := shadowptr(addr)
- if shadow == nil {
- return
- }
- // There is a race here but only if the program is using
- // racy writes instead of sync/atomic. In that case we
- // don't mind crashing.
- if *shadow != *dst && *shadow != noShadow && istrackedptr(*dst) {
- mheap_.shadow_enabled = false
- print("runtime: write barrier dst=", dst, " old=", hex(*dst), " shadow=", shadow, " old=", hex(*shadow), " new=", hex(src), "\n")
- throw("missed write barrier")
- }
- *shadow = src
- })
-}
-
// Like writebarrierptr, but the store has already been applied.
// Do not reapply.
//go:nosplit
@@ -151,44 +121,12 @@ func writebarrierptr_nostore(dst *uintptr, src uintptr) {
if !writeBarrierEnabled {
return
}
-
if src != 0 && (src < _PhysPageSize || src == poisonStack) {
systemstack(func() { throw("bad pointer in write barrier") })
}
-
- // Apply changes to shadow.
- // Since *dst has been overwritten already, we cannot check
- // whether there were any missed updates, but writebarrierptr_nostore
- // is only rarely used.
- if mheap_.shadow_enabled {
- systemstack(func() {
- addr := uintptr(unsafe.Pointer(dst))
- shadow := shadowptr(addr)
- if shadow == nil {
- return
- }
- *shadow = src
- })
- }
-
writebarrierptr_nostore1(dst, src)
}
-// writebarrierptr_noshadow records that the value in *dst
-// has been written to using an atomic operation and the shadow
-// has not been updated. (In general if dst must be manipulated
-// atomically we cannot get the right bits for use in the shadow.)
-//go:nosplit
-func writebarrierptr_noshadow(dst *uintptr) {
- addr := uintptr(unsafe.Pointer(dst))
- shadow := shadowptr(addr)
- if shadow == nil {
- return
- }
-
- *shadow = noShadow
-}
-
//go:nosplit
func writebarrierstring(dst *[2]uintptr, src [2]uintptr) {
writebarrierptr(&dst[0], src[0])
@@ -217,37 +155,11 @@ func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
// typedmemmove copies a value of type t to dst from src.
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
- if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 {
- memmove(dst, src, typ.size)
+ memmove(dst, src, typ.size)
+ if typ.kind&kindNoPointers != 0 {
return
}
-
- systemstack(func() {
- mask := typeBitmapInHeapBitmapFormat(typ)
- nptr := typ.size / ptrSize
- for i := uintptr(0); i < nptr; i += 2 {
- bits := mask[i/2]
- if (bits>>2)&typeMask == typePointer {
- writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
- } else {
- *(*uintptr)(dst) = *(*uintptr)(src)
- }
- // TODO(rsc): The noescape calls should be unnecessary.
- dst = add(noescape(dst), ptrSize)
- src = add(noescape(src), ptrSize)
- if i+1 == nptr {
- break
- }
- bits >>= 4
- if (bits>>2)&typeMask == typePointer {
- writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
- } else {
- *(*uintptr)(dst) = *(*uintptr)(src)
- }
- dst = add(noescape(dst), ptrSize)
- src = add(noescape(src), ptrSize)
- }
- })
+ heapBitsBulkBarrier(uintptr(dst), typ.size)
}
//go:linkname reflect_typedmemmove reflect.typedmemmove
@@ -259,38 +171,16 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
// dst and src point off bytes into the value and only copies size bytes.
//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
- if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 || size < ptrSize {
- memmove(dst, src, size)
+ memmove(dst, src, size)
+ if !writeBarrierEnabled || typ.kind&kindNoPointers != 0 || size < ptrSize || !inheap(uintptr(dst)) {
return
}
- if off&(ptrSize-1) != 0 {
- frag := -off & (ptrSize - 1)
- // frag < size, because size >= ptrSize, checked above.
- memmove(dst, src, frag)
+ if frag := -off & (ptrSize - 1); frag != 0 {
+ dst = add(dst, frag)
size -= frag
- dst = add(noescape(dst), frag)
- src = add(noescape(src), frag)
- off += frag
- }
-
- mask := typeBitmapInHeapBitmapFormat(typ)
- nptr := (off + size) / ptrSize
- for i := uintptr(off / ptrSize); i < nptr; i++ {
- bits := mask[i/2] >> ((i & 1) << 2)
- if (bits>>2)&typeMask == typePointer {
- writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
- } else {
- *(*uintptr)(dst) = *(*uintptr)(src)
- }
- // TODO(rsc): The noescape calls should be unnecessary.
- dst = add(noescape(dst), ptrSize)
- src = add(noescape(src), ptrSize)
- }
- size &= ptrSize - 1
- if size > 0 {
- memmove(dst, src, size)
}
+ heapBitsBulkBarrier(uintptr(dst), size&^(ptrSize-1))
}
// callwritebarrier is invoked at the end of reflectcall, to execute
@@ -302,29 +192,16 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
// not to be preempted before the write barriers have been run.
//go:nosplit
func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) {
- if !writeBarrierEnabled || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize {
+ if !writeBarrierEnabled || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < ptrSize || !inheap(uintptr(frame)) {
return
}
-
- systemstack(func() {
- mask := typeBitmapInHeapBitmapFormat(typ)
- // retoffset is known to be pointer-aligned (at least).
- // TODO(rsc): The noescape call should be unnecessary.
- dst := add(noescape(frame), retoffset)
- nptr := framesize / ptrSize
- for i := uintptr(retoffset / ptrSize); i < nptr; i++ {
- bits := mask[i/2] >> ((i & 1) << 2)
- if (bits>>2)&typeMask == typePointer {
- writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst))
- }
- // TODO(rsc): The noescape call should be unnecessary.
- dst = add(noescape(dst), ptrSize)
- }
- })
+ heapBitsBulkBarrier(uintptr(add(frame, retoffset)), framesize-retoffset)
}
//go:nosplit
func typedslicecopy(typ *_type, dst, src slice) int {
+ // TODO(rsc): If typedslicecopy becomes faster than calling
+ // typedmemmove repeatedly, consider using during func growslice.
n := dst.len
if n > src.len {
n = src.len
@@ -342,6 +219,10 @@ func typedslicecopy(typ *_type, dst, src slice) int {
racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
}
+ // Note: No point in checking typ.kind&kindNoPointers here:
+ // compiler only emits calls to typedslicecopy for types with pointers,
+ // and growslice and reflect_typedslicecopy check for pointers
+ // before calling typedslicecopy.
if !writeBarrierEnabled {
memmove(dstp, srcp, uintptr(n)*typ.size)
return n
@@ -382,134 +263,13 @@ func typedslicecopy(typ *_type, dst, src slice) int {
//go:linkname reflect_typedslicecopy reflect.typedslicecopy
func reflect_typedslicecopy(elemType *_type, dst, src slice) int {
- return typedslicecopy(elemType, dst, src)
-}
-
-// Shadow heap for detecting missed write barriers.
-
-// noShadow is stored in as the shadow pointer to mark that there is no
-// shadow word recorded. It matches any actual pointer word.
-// noShadow is used when it is impossible to know the right word
-// to store in the shadow heap, such as when the real heap word
-// is being manipulated atomically.
-const noShadow uintptr = 1
-
-func wbshadowinit() {
- // Initialize write barrier shadow heap if we were asked for it
- // and we have enough address space (not on 32-bit).
- if debug.wbshadow == 0 {
- return
- }
- if ptrSize != 8 {
- print("runtime: GODEBUG=wbshadow=1 disabled on 32-bit system\n")
- return
- }
-
- var reserved bool
- p1 := sysReserveHigh(mheap_.arena_end-mheap_.arena_start, &reserved)
- if p1 == nil {
- throw("cannot map shadow heap")
- }
- mheap_.shadow_heap = uintptr(p1) - mheap_.arena_start
- sysMap(p1, mheap_.arena_used-mheap_.arena_start, reserved, &memstats.other_sys)
- memmove(p1, unsafe.Pointer(mheap_.arena_start), mheap_.arena_used-mheap_.arena_start)
-
- mheap_.shadow_reserved = reserved
-
- for datap := &firstmoduledata; datap != nil; datap = datap.next {
- start := ^uintptr(0)
- end := uintptr(0)
- if start > datap.noptrdata {
- start = datap.noptrdata
- }
- if start > datap.data {
- start = datap.data
- }
- if start > datap.noptrbss {
- start = datap.noptrbss
- }
- if start > datap.bss {
- start = datap.bss
- }
- if end < datap.enoptrdata {
- end = datap.enoptrdata
- }
- if end < datap.edata {
- end = datap.edata
- }
- if end < datap.enoptrbss {
- end = datap.enoptrbss
- }
- if end < datap.ebss {
- end = datap.ebss
- }
- start &^= _PhysPageSize - 1
- end = round(end, _PhysPageSize)
- datap.data_start = start
- datap.data_end = end
- reserved = false
- p1 = sysReserveHigh(end-start, &reserved)
- if p1 == nil {
- throw("cannot map shadow data")
+ if elemType.kind&kindNoPointers != 0 {
+ n := dst.len
+ if n > src.len {
+ n = src.len
}
- datap.shadow_data = uintptr(p1) - start
- sysMap(p1, end-start, reserved, &memstats.other_sys)
- memmove(p1, unsafe.Pointer(start), end-start)
- }
-
- mheap_.shadow_enabled = true
- writeBarrierEnabled = true
-}
-
-// shadowptr returns a pointer to the shadow value for addr.
-//go:nosplit
-func shadowptr(addr uintptr) *uintptr {
- for datap := &firstmoduledata; datap != nil; datap = datap.next {
- if datap.data_start <= addr && addr < datap.data_end {
- return (*uintptr)(unsafe.Pointer(addr + datap.shadow_data))
- }
- }
- if inheap(addr) {
- return (*uintptr)(unsafe.Pointer(addr + mheap_.shadow_heap))
- }
- return nil
-}
-
-// istrackedptr reports whether the pointer value p requires a write barrier
-// when stored into the heap.
-func istrackedptr(p uintptr) bool {
- return inheap(p)
-}
-
-// checkwbshadow checks that p matches its shadow word.
-// The garbage collector calls checkwbshadow for each pointer during the checkmark phase.
-// It is only called when mheap_.shadow_enabled is true.
-func checkwbshadow(p *uintptr) {
- addr := uintptr(unsafe.Pointer(p))
- shadow := shadowptr(addr)
- if shadow == nil {
- return
- }
- // There is no race on the accesses here, because the world is stopped,
- // but there may be racy writes that lead to the shadow and the
- // heap being inconsistent. If so, we will detect that here as a
- // missed write barrier and crash. We don't mind.
- // Code should use sync/atomic instead of racy pointer writes.
- if *shadow != *p && *shadow != noShadow && istrackedptr(*p) {
- mheap_.shadow_enabled = false
- print("runtime: checkwritebarrier p=", p, " *p=", hex(*p), " shadow=", shadow, " *shadow=", hex(*shadow), "\n")
- throw("missed write barrier")
- }
-}
-
-// clearshadow clears the shadow copy associated with the n bytes of memory at addr.
-func clearshadow(addr, n uintptr) {
- if !mheap_.shadow_enabled {
- return
- }
- p := shadowptr(addr)
- if p == nil || n <= ptrSize {
- return
+ memmove(dst.array, src.array, uintptr(n)*elemType.size)
+ return n
}
- memclr(unsafe.Pointer(p), n)
+ return typedslicecopy(elemType, dst, src)
}
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index f0c7520e38..b20908fb49 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -6,48 +6,40 @@
//
// Stack, data, and bss bitmaps
//
-// Not handled in this file, but worth mentioning: stack frames and global data
-// in the data and bss sections are described by 1-bit bitmaps in which 0 means
-// scalar or uninitialized or dead and 1 means pointer to visit during GC.
-//
-// Comparing this 1-bit form with the 2-bit form described below, 0 represents
-// both the 2-bit 00 and 01, while 1 represents the 2-bit 10.
-// Therefore conversions between the two (until the 2-bit form is gone)
-// can be done by x>>1 for 2-bit to 1-bit and x+1 for 1-bit to 2-bit.
-//
-// Type bitmaps
-//
-// Types that aren't too large
-// record information about the layout of their memory words using a type bitmap.
-// The bitmap holds two bits for each pointer-sized word. The two-bit values are:
-//
-// 00 - typeDead: not a pointer, and no pointers in the rest of the object
-// 01 - typeScalar: not a pointer
-// 10 - typePointer: a pointer that GC should trace
-// 11 - unused
-//
-// typeDead only appears in type bitmaps in Go type descriptors
-// and in type bitmaps embedded in the heap bitmap (see below).
+// Stack frames and global variables in the data and bss sections are described
+// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
+// to be visited during GC. The bits in each byte are consumed starting with
+// the low bit: 1<<0, 1<<1, and so on.
//
// Heap bitmap
//
// The allocated heap comes from a subset of the memory in the range [start, used),
// where start == mheap_.arena_start and used == mheap_.arena_used.
-// The heap bitmap comprises 4 bits for each pointer-sized word in that range,
+// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
// stored in bytes indexed backward in memory from start.
-// That is, the byte at address start-1 holds the 4-bit entries for the two words
-// start, start+ptrSize, the byte at start-2 holds the entries for start+2*ptrSize,
-// start+3*ptrSize, and so on.
-// In the byte holding the entries for addresses p and p+ptrSize, the low 4 bits
-// describe p and the high 4 bits describe p+ptrSize.
+// That is, the byte at address start-1 holds the 2-bit entries for the four words
+// start through start+3*ptrSize, the byte at start-2 holds the entries for
+// start+4*ptrSize through start+7*ptrSize, and so on.
//
-// The 4 bits for each word are:
-// 0001 - not used
-// 0010 - bitMarked: this object has been marked by GC
-// tt00 - word type bits, as in a type bitmap.
+// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
+// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
+// The meaning of the high bit depends on the position of the word being described
+// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
+// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
+// In the third and later words, the high bit indicates that the object is still
+// being described. In these words, if a bit pair with a high bit 0 is encountered,
+// the low bit can also be assumed to be 0, and the object description is over.
+// This 00 is called the ``dead'' encoding: it signals that the rest of the words
+// in the object are uninteresting to the garbage collector.
//
-// The code makes use of the fact that the zero value for a heap bitmap nibble
-// has no boundary bit set, no marked bit set, and type bits == typeDead.
+// The 2-bit entries are split when written into the byte, so that the top half
+// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
+// This form allows a copy from the 1-bit to the 4-bit form to keep the
+// pointer bits contiguous, instead of having to space them out.
+//
+// The code makes use of the fact that the zero value for a heap bitmap
+// has no live pointer bit set and is (depending on position), not marked,
+// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding.
//
// Checkmarks
@@ -57,55 +49,71 @@
// collector implementation. As a sanity check, the GC has a 'checkmark'
// mode that retraverses the object graph with the world stopped, to make
// sure that everything that should be marked is marked.
-// In checkmark mode, in the heap bitmap, the type bits for the first word
-// of an object are redefined:
-//
-// 00 - typeScalarCheckmarked // typeScalar, checkmarked
-// 01 - typeScalar // typeScalar, not checkmarked
-// 10 - typePointer // typePointer, not checkmarked
-// 11 - typePointerCheckmarked // typePointer, checkmarked
+// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
+// for the second word of the object holds the checkmark bit.
+// When not in checkmark mode, this bit is set to 1.
//
-// That is, typeDead is redefined to be typeScalar + a checkmark, and the
-// previously unused 11 pattern is redefined to be typePointer + a checkmark.
-// To prepare for this mode, we must move any typeDead in the first word of
-// a multiword object to the second word.
+// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
+// means every allocated object has two words, so there is room for the
+// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
+// just one word, so the second bit pair is not available for encoding the
+// checkmark. However, because non-pointer allocations are combined
+// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
+// must be a pointer, so the type bit in the first word is not actually needed.
+// It is still used in general, except in checkmark the type bit is repurposed
+// as the checkmark bit and then reinitialized (to 1) as the type bit when
+// finished.
package runtime
import "unsafe"
const (
- typeDead = 0
- typeScalarCheckmarked = 0
- typeScalar = 1
- typePointer = 2
- typePointerCheckmarked = 3
+ bitPointer = 1 << 0
+ bitMarked = 1 << 4
- typeBitsWidth = 2 // # of type bits per pointer-sized word
- typeMask = 1<<typeBitsWidth - 1
+ heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries
+ heapBitmapScale = ptrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
- heapBitsWidth = 4
- heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte
- bitMarked = 2
- typeShift = 2
+ // all mark/pointer bits in a byte
+ bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
+ bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
)
-// Information from the compiler about the layout of stack frames.
-type bitvector struct {
- n int32 // # of bits
- bytedata *uint8
-}
-
// addb returns the byte pointer p+n.
//go:nowritebarrier
func addb(p *byte, n uintptr) *byte {
- return (*byte)(add(unsafe.Pointer(p), n))
+ // Note: wrote out full expression instead of calling add(p, n)
+ // to reduce the number of temporaries generated by the
+ // compiler for this trivial expression during inlining.
+ return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
}
// subtractb returns the byte pointer p-n.
//go:nowritebarrier
func subtractb(p *byte, n uintptr) *byte {
- return (*byte)(add(unsafe.Pointer(p), -n))
+ // Note: wrote out full expression instead of calling add(p, -n)
+ // to reduce the number of temporaries generated by the
+ // compiler for this trivial expression during inlining.
+ return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
+}
+
+// add1 returns the byte pointer p+1.
+//go:nowritebarrier
+func add1(p *byte) *byte {
+ // Note: wrote out full expression instead of calling addb(p, 1)
+ // to reduce the number of temporaries generated by the
+ // compiler for this trivial expression during inlining.
+ return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
+}
+
+// subtract1 returns the byte pointer p-1.
+//go:nowritebarrier
+func subtract1(p *byte) *byte {
+ // Note: wrote out full expression instead of calling subtractb(p, 1)
+ // to reduce the number of temporaries generated by the
+ // compiler for this trivial expression during inlining.
+ return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
// mHeap_MapBits is called each time arena_used is extended.
@@ -140,9 +148,13 @@ type heapBits struct {
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
+//
+// nosplit because it is used during write barriers and must not be preempted.
+//go:nosplit
func heapBitsForAddr(addr uintptr) heapBits {
+ // 2 bits per work, 4 pairs per byte, and a mask is hard coded.
off := (addr - mheap_.arena_start) / ptrSize
- return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/2 - 1)), uint32(4 * (off & 1))}
+ return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)}
}
// heapBitsForSpan returns the heapBits for the span base address base.
@@ -229,20 +241,39 @@ func (h heapBits) prefetch() {
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
func (h heapBits) next() heapBits {
- if h.shift == 0 {
- return heapBits{h.bitp, 4}
+ if h.shift < 3*heapBitsShift {
+ return heapBits{h.bitp, h.shift + heapBitsShift}
}
- return heapBits{subtractb(h.bitp, 1), 0}
+ return heapBits{subtract1(h.bitp), 0}
+}
+
+// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
+// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
+// h.forward(1) is equivalent to h.next(), just slower.
+// Note that forward does not modify h. The caller must record the result.
+// bits returns the heap bits for the current word.
+func (h heapBits) forward(n uintptr) heapBits {
+ n += uintptr(h.shift) / heapBitsShift
+ return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
+}
+
+// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
+// The result includes in its higher bits the bits for subsequent words
+// described by the same bitmap byte.
+func (h heapBits) bits() uint32 {
+ return uint32(*h.bitp) >> h.shift
}
// isMarked reports whether the heap bits have the marked bit set.
+// h must describe the initial word of the object.
func (h heapBits) isMarked() bool {
return *h.bitp&(bitMarked<<h.shift) != 0
}
// setMarked sets the marked bit in the heap bits, atomically.
+// h must describe the initial word of the object.
func (h heapBits) setMarked() {
- // Each byte of GC bitmap holds info for two words.
+ // Each byte of GC bitmap holds info for four words.
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
@@ -250,30 +281,103 @@ func (h heapBits) setMarked() {
}
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
+// h must describe the initial word of the object.
func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
-// typeBits returns the heap bits' type bits.
-func (h heapBits) typeBits() uint8 {
- return (*h.bitp >> (h.shift + typeShift)) & typeMask
+// isPointer reports whether the heap bits describe a pointer word.
+// h must describe the initial word of the object.
+func (h heapBits) isPointer() bool {
+ return (*h.bitp>>h.shift)&bitPointer != 0
+}
+
+// hasPointers reports whether the given object has any pointers.
+// It must be told how large the object at h is, so that it does not read too
+// far into the bitmap.
+// h must describe the initial word of the object.
+func (h heapBits) hasPointers(size uintptr) bool {
+ if size == ptrSize { // 1-word objects are always pointers
+ return true
+ }
+ // Otherwise, at least a 2-word object, and at least 2-word aligned,
+ // so h.shift is either 0 or 4, so we know we can get the bits for the
+ // first two words out of *h.bitp.
+ // If either of the first two words is a pointer, not pointer free.
+ b := uint32(*h.bitp >> h.shift)
+ if b&(bitPointer|bitPointer<<heapBitsShift) != 0 {
+ return true
+ }
+ if size == 2*ptrSize {
+ return false
+ }
+ // At least a 4-word object. Check scan bit (aka marked bit) in third word.
+ if h.shift == 0 {
+ return b&(bitMarked<<(2*heapBitsShift)) != 0
+ }
+ return uint32(*subtract1(h.bitp))&bitMarked != 0
}
// isCheckmarked reports whether the heap bits have the checkmarked bit set.
-func (h heapBits) isCheckmarked() bool {
- typ := h.typeBits()
- return typ == typeScalarCheckmarked || typ == typePointerCheckmarked
+// It must be told how large the object at h is, because the encoding of the
+// checkmark bit varies by size.
+// h must describe the initial word of the object.
+func (h heapBits) isCheckmarked(size uintptr) bool {
+ if size == ptrSize {
+ return (*h.bitp>>h.shift)&bitPointer != 0
+ }
+ // All multiword objects are 2-word aligned,
+ // so we know that the initial word's 2-bit pair
+ // and the second word's 2-bit pair are in the
+ // same heap bitmap byte, *h.bitp.
+ return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
}
// setCheckmarked sets the checkmarked bit.
-func (h heapBits) setCheckmarked() {
- typ := h.typeBits()
- if typ == typeScalar {
- // Clear low type bit to turn 01 into 00.
- atomicand8(h.bitp, ^((1 << typeShift) << h.shift))
- } else if typ == typePointer {
- // Set low type bit to turn 10 into 11.
- atomicor8(h.bitp, (1<<typeShift)<<h.shift)
+// It must be told how large the object at h is, because the encoding of the
+// checkmark bit varies by size.
+// h must describe the initial word of the object.
+func (h heapBits) setCheckmarked(size uintptr) {
+ if size == ptrSize {
+ atomicor8(h.bitp, bitPointer<<h.shift)
+ return
+ }
+ atomicor8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
+}
+
+// heapBitsBulkBarrier executes writebarrierptr_nostore
+// for every pointer slot in the memory range [p, p+size),
+// using the heap bitmap to locate those pointer slots.
+// This executes the write barriers necessary after a memmove.
+// Both p and size must be pointer-aligned.
+// The range [p, p+size) must lie within a single allocation.
+//
+// Callers should call heapBitsBulkBarrier immediately after
+// calling memmove(p, src, size). This function is marked nosplit
+// to avoid being preempted; the GC must not stop the goroutine
+// betwen the memmove and the execution of the barriers.
+//
+// The heap bitmap is not maintained for allocations containing
+// no pointers at all; any caller of heapBitsBulkBarrier must first
+// make sure the underlying allocation contains pointers, usually
+// by checking typ.kind&kindNoPointers.
+//
+//go:nosplit
+func heapBitsBulkBarrier(p, size uintptr) {
+ if (p|size)&(ptrSize-1) != 0 {
+ throw("heapBitsBulkBarrier: unaligned arguments")
+ }
+ if !writeBarrierEnabled || !inheap(p) {
+ return
+ }
+
+ h := heapBitsForAddr(p)
+ for i := uintptr(0); i < size; i += ptrSize {
+ if h.isPointer() {
+ x := (*uintptr)(unsafe.Pointer(p + i))
+ writebarrierptr_nostore(x, *x)
+ }
+ h = h.next()
}
}
@@ -291,99 +395,59 @@ func (h heapBits) initSpan(size, n, total uintptr) {
throw("initSpan: unaligned length")
}
nbyte := total / heapBitmapScale
+ if ptrSize == 8 && size == ptrSize {
+ end := h.bitp
+ bitp := subtractb(end, nbyte-1)
+ for {
+ *bitp = bitPointerAll
+ if bitp == end {
+ break
+ }
+ bitp = add1(bitp)
+ }
+ return
+ }
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
}
// initCheckmarkSpan initializes a span for being checkmarked.
-// This would be a no-op except that we need to rewrite any
-// typeDead bits in the first word of the object into typeScalar
-// followed by a typeDead in the second word of the object.
+// It clears the checkmark bits, which are set to 1 in normal operation.
func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
- if size == ptrSize {
+ // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
+ if ptrSize == 8 && size == ptrSize {
+ // Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
- // Must update both top and bottom nibble of each byte.
- // There is no second word in these objects, so all we have
- // to do is rewrite typeDead to typeScalar by adding the 1<<typeShift bit.
+ // Must clear type bit (checkmark bit) of every word.
+ // The type bit is the lower of every two-bit pair.
bitp := h.bitp
- for i := uintptr(0); i < n; i += 2 {
- x := int(*bitp)
-
- if (x>>typeShift)&typeMask == typeDead {
- x += (typeScalar - typeDead) << typeShift
- }
- if (x>>(4+typeShift))&typeMask == typeDead {
- x += (typeScalar - typeDead) << (4 + typeShift)
- }
- *bitp = uint8(x)
- bitp = subtractb(bitp, 1)
+ for i := uintptr(0); i < n; i += 4 {
+ *bitp &^= bitPointerAll
+ bitp = subtract1(bitp)
}
return
}
-
- // Update bottom nibble for first word of each object.
- // If the bottom nibble says typeDead, change to typeScalar
- // and clear top nibble to mark as typeDead.
- bitp := h.bitp
- step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
- x := *bitp
- if (x>>typeShift)&typeMask == typeDead {
- x += (typeScalar - typeDead) << typeShift
- x &= 0x0f // clear top nibble to typeDead
- }
- bitp = subtractb(bitp, step)
+ *h.bitp &^= bitMarked << (heapBitsShift + h.shift)
+ h = h.forward(size / ptrSize)
}
}
-// clearCheckmarkSpan removes all the checkmarks from a span.
-// If it finds a multiword object starting with typeScalar typeDead,
-// it rewrites the heap bits to the simpler typeDead typeDead.
+// clearCheckmarkSpan undoes all the checkmarking in a span.
+// The actual checkmark bits are ignored, so the only work to do
+// is to fix the pointer bits. (Pointer bits are ignored by scanobject
+// but consulted by typedmemmove.)
func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
- if size == ptrSize {
+ // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
+ if ptrSize == 8 && size == ptrSize {
+ // Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
- // Must update both top and bottom nibble of each byte.
- // typeScalarCheckmarked can be left as typeDead,
- // but we want to change typeScalar back to typeDead.
+ // Must clear type bit (checkmark bit) of every word.
+ // The type bit is the lower of every two-bit pair.
bitp := h.bitp
- for i := uintptr(0); i < n; i += 2 {
- x := int(*bitp)
- switch typ := (x >> typeShift) & typeMask; typ {
- case typeScalar:
- x += (typeDead - typeScalar) << typeShift
- case typePointerCheckmarked:
- x += (typePointer - typePointerCheckmarked) << typeShift
- }
-
- switch typ := (x >> (4 + typeShift)) & typeMask; typ {
- case typeScalar:
- x += (typeDead - typeScalar) << (4 + typeShift)
- case typePointerCheckmarked:
- x += (typePointer - typePointerCheckmarked) << (4 + typeShift)
- }
-
- *bitp = uint8(x)
- bitp = subtractb(bitp, 1)
- }
- return
- }
-
- // Update bottom nibble for first word of each object.
- // If the bottom nibble says typeScalarCheckmarked and the top is not typeDead,
- // change to typeScalar. Otherwise leave, since typeScalarCheckmarked == typeDead.
- // If the bottom nibble says typePointerCheckmarked, change to typePointer.
- bitp := h.bitp
- step := size / heapBitmapScale
- for i := uintptr(0); i < n; i++ {
- x := int(*bitp)
- switch typ := (x >> typeShift) & typeMask; {
- case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead:
- x += (typeScalar - typeScalarCheckmarked) << typeShift
- case typ == typePointerCheckmarked:
- x += (typePointer - typePointerCheckmarked) << typeShift
+ for i := uintptr(0); i < n; i += 4 {
+ *bitp |= bitPointerAll
+ bitp = subtract1(bitp)
}
-
- *bitp = uint8(x)
- bitp = subtractb(bitp, step)
}
}
@@ -393,348 +457,1046 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
// bits for the first two words (or one for single-word objects) to typeDead
// and then calls f(p), where p is the object's base address.
// f is expected to add the object to a free list.
+// For non-free objects, heapBitsSweepSpan turns off the marked bit.
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
h := heapBitsForSpan(base)
- if size == ptrSize {
- // Only possible on 64-bit system, since minimum size is 8.
- // Must read and update both top and bottom nibble of each byte.
+ switch {
+ default:
+ throw("heapBitsSweepSpan")
+ case ptrSize == 8 && size == ptrSize:
+ // Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp
- for i := uintptr(0); i < n; i += 2 {
- x := int(*bitp)
+ for i := uintptr(0); i < n; i += 4 {
+ x := uint32(*bitp)
+ // Note that unlike the other size cases, we leave the pointer bits set here.
+ // These are initialized during initSpan when the span is created and left
+ // in place the whole time the span is used for pointer-sized objects.
+ // That lets heapBitsSetType avoid an atomic update to set the pointer bit
+ // during allocation.
if x&bitMarked != 0 {
x &^= bitMarked
} else {
- x &^= typeMask << typeShift
f(base + i*ptrSize)
}
- if x&(bitMarked<<4) != 0 {
- x &^= bitMarked << 4
+ if x&(bitMarked<<heapBitsShift) != 0 {
+ x &^= bitMarked << heapBitsShift
} else {
- x &^= typeMask << (4 + typeShift)
f(base + (i+1)*ptrSize)
}
+ if x&(bitMarked<<(2*heapBitsShift)) != 0 {
+ x &^= bitMarked << (2 * heapBitsShift)
+ } else {
+ f(base + (i+2)*ptrSize)
+ }
+ if x&(bitMarked<<(3*heapBitsShift)) != 0 {
+ x &^= bitMarked << (3 * heapBitsShift)
+ } else {
+ f(base + (i+3)*ptrSize)
+ }
*bitp = uint8(x)
- bitp = subtractb(bitp, 1)
+ bitp = subtract1(bitp)
}
- return
- }
- bitp := h.bitp
- step := size / heapBitmapScale
- for i := uintptr(0); i < n; i++ {
- x := int(*bitp)
- if x&bitMarked != 0 {
- x &^= bitMarked
- } else {
- x = 0
- f(base + i*size)
+ case size%(4*ptrSize) == 0:
+ // Mark bit is in first word of each object.
+ // Each object starts at bit 0 of a heap bitmap byte.
+ bitp := h.bitp
+ step := size / heapBitmapScale
+ for i := uintptr(0); i < n; i++ {
+ x := uint32(*bitp)
+ if x&bitMarked != 0 {
+ x &^= bitMarked
+ } else {
+ x = 0
+ f(base + i*size)
+ }
+ *bitp = uint8(x)
+ bitp = subtractb(bitp, step)
+ }
+
+ case size%(4*ptrSize) == 2*ptrSize:
+ // Mark bit is in first word of each object,
+ // but every other object starts halfway through a heap bitmap byte.
+ // Unroll loop 2x to handle alternating shift count and step size.
+ bitp := h.bitp
+ step := size / heapBitmapScale
+ var i uintptr
+ for i = uintptr(0); i < n; i += 2 {
+ x := uint32(*bitp)
+ if x&bitMarked != 0 {
+ x &^= bitMarked
+ } else {
+ x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
+ f(base + i*size)
+ if size > 2*ptrSize {
+ x = 0
+ }
+ }
+ *bitp = uint8(x)
+ if i+1 >= n {
+ break
+ }
+ bitp = subtractb(bitp, step)
+ x = uint32(*bitp)
+ if x&(bitMarked<<(2*heapBitsShift)) != 0 {
+ x &^= bitMarked << (2 * heapBitsShift)
+ } else {
+ x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
+ f(base + (i+1)*size)
+ if size > 2*ptrSize {
+ *subtract1(bitp) = 0
+ }
+ }
+ *bitp = uint8(x)
+ bitp = subtractb(bitp, step+1)
}
- *bitp = uint8(x)
- bitp = subtractb(bitp, step)
}
}
-// TODO(rsc): Clean up the next two functions.
-
// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
+// It is known that the type has pointers somewhere;
+// malloc does not call heapBitsSetType when there are no pointers,
+// because all free objects are marked as noscan during
+// heapBitsSweepSpan.
+// There can only be one allocation from a given span active at a time,
+// so this code is not racing with other instances of itself,
+// and we don't allocate from a span until it has been swept,
+// so this code is not racing with heapBitsSweepSpan.
+// It is, however, racing with the concurrent GC mark phase,
+// which can be setting the mark bit in the leading 2-bit entry
+// of an allocated block. The block we are modifying is not quite
+// allocated yet, so the GC marker is not racing with updates to x's bits,
+// but if the start or end of x shares a bitmap byte with an adjacent
+// object, the GC marker is racing with updates to those object's mark bits.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
- // From here till marked label marking the object as allocated
- // and storing type info in the GC bitmap.
- h := heapBitsForAddr(x)
+ const doubleCheck = false // slow but helpful; enable to test modifications to this code
- var ti, te uintptr
- var ptrmask *uint8
- if size == ptrSize {
+ // dataSize is always size rounded up to the next malloc size class,
+ // except in the case of allocating a defer block, in which case
+ // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
+ // arbitrarily larger.
+ //
+ // The checks for size == ptrSize and size == 2*ptrSize can therefore
+ // assume that dataSize == size without checking it explicitly.
+
+ if ptrSize == 8 && size == ptrSize {
// It's one word and it has pointers, it must be a pointer.
- // The bitmap byte is shared with the one-word object
- // next to it, and concurrent GC might be marking that
- // object, so we must use an atomic update.
- atomicor8(h.bitp, typePointer<<(typeShift+h.shift))
+ // In general we'd need an atomic update here if the
+ // concurrent GC were marking objects in this span,
+ // because each bitmap byte describes 3 other objects
+ // in addition to the one being allocated.
+ // However, since all allocated one-word objects are pointers
+ // (non-pointers are aggregated into tinySize allocations),
+ // initSpan sets the pointer bits for us. Nothing to do here.
+ if doubleCheck {
+ h := heapBitsForAddr(x)
+ if !h.isPointer() {
+ throw("heapBitsSetType: pointer bit missing")
+ }
+ }
return
}
- if typ.kind&kindGCProg != 0 {
- nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
- masksize := nptr
- if masksize%2 != 0 {
- masksize *= 2 // repeated
- }
- const typeBitsPerByte = 8 / typeBitsWidth
- masksize = masksize * typeBitsPerByte / 8 // 4 bits per word
- masksize++ // unroll flag in the beginning
- if masksize > maxGCMask && typ.gc[1] != 0 {
- // write barriers have not been updated to deal with this case yet.
- throw("maxGCMask too small for now")
- // If the mask is too large, unroll the program directly
- // into the GC bitmap. It's 7 times slower than copying
- // from the pre-unrolled mask, but saves 1/16 of type size
- // memory for the mask.
- systemstack(func() {
- unrollgcproginplace_m(unsafe.Pointer(x), typ, size, dataSize)
- })
+
+ h := heapBitsForAddr(x)
+ ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
+
+ // Heap bitmap bits for 2-word object are only 4 bits,
+ // so also shared with objects next to it; use atomic updates.
+ // This is called out as a special case primarily for 32-bit systems,
+ // so that on 32-bit systems the code below can assume all objects
+ // are 4-word aligned (because they're all 16-byte aligned).
+ if size == 2*ptrSize {
+ if typ.size == ptrSize {
+ // We're allocating a block big enough to hold two pointers.
+ // On 64-bit, that means the actual object must be two pointers,
+ // or else we'd have used the one-pointer-sized block.
+ // On 32-bit, however, this is the 8-byte block, the smallest one.
+ // So it could be that we're allocating one pointer and this was
+ // just the smallest block available. Distinguish by checking dataSize.
+ // (In general the number of instances of typ being allocated is
+ // dataSize/typ.size.)
+ if ptrSize == 4 && dataSize == ptrSize {
+ // 1 pointer.
+ if gcphase == _GCoff {
+ *h.bitp |= bitPointer << h.shift
+ } else {
+ atomicor8(h.bitp, bitPointer<<h.shift)
+ }
+ } else {
+ // 2-element slice of pointer.
+ if gcphase == _GCoff {
+ *h.bitp |= (bitPointer | bitPointer<<heapBitsShift) << h.shift
+ } else {
+ atomicor8(h.bitp, (bitPointer|bitPointer<<heapBitsShift)<<h.shift)
+ }
+ }
return
}
- ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
- // Check whether the program is already unrolled
- // by checking if the unroll flag byte is set
- maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
- if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
- systemstack(func() {
- unrollgcprog_m(typ)
- })
+ // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
+ if doubleCheck {
+ if typ.size != 2*ptrSize || typ.kind&kindGCProg != 0 {
+ print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
+ throw("heapBitsSetType")
+ }
+ }
+ b := uint32(*ptrmask)
+ hb := b & 3
+ if gcphase == _GCoff {
+ *h.bitp |= uint8(hb << h.shift)
+ } else {
+ atomicor8(h.bitp, uint8(hb<<h.shift))
}
- ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
+ return
+ }
+
+ // Copy from 1-bit ptrmask into 2-bit bitmap.
+ // The basic approach is to use a single uintptr as a bit buffer,
+ // alternating between reloading the buffer and writing bitmap bytes.
+ // In general, one load can supply two bitmap byte writes.
+ // This is a lot of lines of code, but it compiles into relatively few
+ // machine instructions.
+
+ var (
+ // Ptrmask input.
+ p *byte // last ptrmask byte read
+ b uintptr // ptrmask bits already loaded
+ nb uintptr // number of bits in b at next read
+ endp *byte // final ptrmask byte to read (then repeat)
+ endnb uintptr // number of valid bits in *endp
+ pbits uintptr // alternate source of bits
+
+ // Heap bitmap output.
+ w uintptr // words processed
+ nw uintptr // number of words to process
+ hbitp *byte // next heap bitmap byte to write
+ hb uintptr // bits being prepared for *hbitp
+ )
+
+ hbitp = h.bitp
+
+ // Handle GC program. Delayed until this part of the code
+ // so that we can use the same double-checking mechanism
+ // as the 1-bit case. Nothing above could have encountered
+ // GC programs: the cases were all too small.
+ if typ.kind&kindGCProg != 0 {
+ heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
+ if doubleCheck {
+ // Double-check the heap bits written by GC program
+ // by running the GC program to create a 1-bit pointer mask
+ // and then jumping to the double-check code below.
+ // This doesn't catch bugs shared between the 1-bit and 4-bit
+ // GC program execution, but it does catch mistakes specific
+ // to just one of those and bugs in heapBitsSetTypeGCProg's
+ // implementation of arrays.
+ lock(&debugPtrmask.lock)
+ if debugPtrmask.data == nil {
+ debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
+ }
+ ptrmask = debugPtrmask.data
+ runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
+ goto Phase4
+ }
+ return
+ }
+
+ // Note about sizes:
+ //
+ // typ.size is the number of words in the object,
+ // and typ.ptrdata is the number of words in the prefix
+ // of the object that contains pointers. That is, the final
+ // typ.size - typ.ptrdata words contain no pointers.
+ // This allows optimization of a common pattern where
+ // an object has a small header followed by a large scalar
+ // buffer. If we know the pointers are over, we don't have
+ // to scan the buffer's heap bitmap at all.
+ // The 1-bit ptrmasks are sized to contain only bits for
+ // the typ.ptrdata prefix, zero padded out to a full byte
+ // of bitmap. This code sets nw (below) so that heap bitmap
+ // bits are only written for the typ.ptrdata prefix; if there is
+ // more room in the allocated object, the next heap bitmap
+ // entry is a 00, indicating that there are no more pointers
+ // to scan. So only the ptrmask for the ptrdata bytes is needed.
+ //
+ // Replicated copies are not as nice: if there is an array of
+ // objects with scalar tails, all but the last tail does have to
+ // be initialized, because there is no way to say "skip forward".
+ // However, because of the possibility of a repeated type with
+ // size not a multiple of 4 pointers (one heap bitmap byte),
+ // the code already must handle the last ptrmask byte specially
+ // by treating it as containing only the bits for endnb pointers,
+ // where endnb <= 4. We represent large scalar tails that must
+ // be expanded in the replication by setting endnb larger than 4.
+ // This will have the effect of reading many bits out of b,
+ // but once the real bits are shifted out, b will supply as many
+ // zero bits as we try to read, which is exactly what we need.
+
+ p = ptrmask
+ if typ.size < dataSize {
+ // Filling in bits for an array of typ.
+ // Set up for repetition of ptrmask during main loop.
+ // Note that ptrmask describes only a prefix of
+ const maxBits = ptrSize*8 - 7
+ if typ.ptrdata/ptrSize <= maxBits {
+ // Entire ptrmask fits in uintptr with room for a byte fragment.
+ // Load into pbits and never read from ptrmask again.
+ // This is especially important when the ptrmask has
+ // fewer than 8 bits in it; otherwise the reload in the middle
+ // of the Phase 2 loop would itself need to loop to gather
+ // at least 8 bits.
+
+ // Accumulate ptrmask into b.
+ // ptrmask is sized to describe only typ.ptrdata, but we record
+ // it as describing typ.size bytes, since all the high bits are zero.
+ nb = typ.ptrdata / ptrSize
+ for i := uintptr(0); i < nb; i += 8 {
+ b |= uintptr(*p) << i
+ p = add1(p)
+ }
+ nb = typ.size / ptrSize
+
+ // Replicate ptrmask to fill entire pbits uintptr.
+ // Doubling and truncating is fewer steps than
+ // iterating by nb each time. (nb could be 1.)
+ // Since we loaded typ.ptrdata/ptrSize bits
+ // but are pretending to have typ.size/ptrSize,
+ // there might be no replication necessary/possible.
+ pbits = b
+ endnb = nb
+ if nb+nb <= maxBits {
+ for endnb <= ptrSize*8 {
+ pbits |= pbits << endnb
+ endnb += endnb
+ }
+ // Truncate to a multiple of original ptrmask.
+ endnb = maxBits / nb * nb
+ pbits &= 1<<endnb - 1
+ b = pbits
+ nb = endnb
+ }
+
+ // Clear p and endp as sentinel for using pbits.
+ // Checked during Phase 2 loop.
+ p = nil
+ endp = nil
+ } else {
+ // Ptrmask is larger. Read it multiple times.
+ n := (typ.ptrdata/ptrSize+7)/8 - 1
+ endp = addb(ptrmask, n)
+ endnb = typ.size/ptrSize - n*8
+ }
+ }
+ if p != nil {
+ b = uintptr(*p)
+ p = add1(p)
+ nb = 8
+ }
+
+ if typ.size == dataSize {
+ // Single entry: can stop once we reach the non-pointer data.
+ nw = typ.ptrdata / ptrSize
} else {
- ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
+ // Repeated instances of typ in an array.
+ // Have to process first N-1 entries in full, but can stop
+ // once we reach the non-pointer data in the final entry.
+ nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / ptrSize
}
- if size == 2*ptrSize {
- // h.shift is 0 for all sizes > ptrSize.
- *h.bitp = *ptrmask
+ if nw == 0 {
+ // No pointers! Caller was supposed to check.
+ println("runtime: invalid type ", *typ._string)
+ throw("heapBitsSetType: called with non-pointer type")
return
}
- te = uintptr(typ.size) / ptrSize
- // If the type occupies odd number of words, its mask is repeated.
- if te%2 == 0 {
- te /= 2
+ if nw < 2 {
+ // Must write at least 2 words, because the "no scan"
+ // encoding doesn't take effect until the third word.
+ nw = 2
}
- // Copy pointer bitmask into the bitmap.
- // TODO(rlh): add comment addressing the following concerns:
- // If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned?
- // And if type occupies and odd number of words, why are we only going through half
- // of ptrmask and why don't we have to shift everything by 4 on odd iterations?
- for i := uintptr(0); i < dataSize; i += 2 * ptrSize {
- v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
- ti++
- if ti == te {
- ti = 0
+ // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
+ // The leading byte is special because it contains the bits for words 0 and 1,
+ // which do not have the marked bits set.
+ // The leading half-byte is special because it's a half a byte and must be
+ // manipulated atomically.
+ switch {
+ default:
+ throw("heapBitsSetType: unexpected shift")
+
+ case h.shift == 0:
+ // Ptrmask and heap bitmap are aligned.
+ // Handle first byte of bitmap specially.
+ // The first byte we write out contains the first two words of the object.
+ // In those words, the mark bits are mark and checkmark, respectively,
+ // and must not be set. In all following words, we want to set the mark bit
+ // as a signal that the object continues to the next 2-bit entry in the bitmap.
+ hb = b & bitPointerAll
+ hb |= bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
+ if w += 4; w >= nw {
+ goto Phase3
}
- if i+ptrSize == dataSize {
- v &^= typeMask << (4 + typeShift)
+ *hbitp = uint8(hb)
+ hbitp = subtract1(hbitp)
+ b >>= 4
+ nb -= 4
+
+ case ptrSize == 8 && h.shift == 2:
+ // Ptrmask and heap bitmap are misaligned.
+ // The bits for the first two words are in a byte shared with another object
+ // and must be updated atomically.
+ // NOTE(rsc): The atomic here may not be necessary.
+ // We took care of 1-word and 2-word objects above,
+ // so this is at least a 6-word object, so our start bits
+ // are shared only with the type bits of another object,
+ // not with its mark bit. Since there is only one allocation
+ // from a given span at a time, we should be able to set
+ // these bits non-atomically. Not worth the risk right now.
+ hb = (b & 3) << (2 * heapBitsShift)
+ b >>= 2
+ nb -= 2
+ // Note: no bitMarker in hb because the first two words don't get markers from us.
+ if gcphase == _GCoff {
+ *hbitp |= uint8(hb)
+ } else {
+ atomicor8(hbitp, uint8(hb))
+ }
+ hbitp = subtract1(hbitp)
+ if w += 2; w >= nw {
+ // We know that there is more data, because we handled 2-word objects above.
+ // This must be at least a 6-word object. If we're out of pointer words,
+ // mark no scan in next bitmap byte and finish.
+ hb = 0
+ w += 4
+ goto Phase3
+ }
+ }
+
+ // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
+ // The loop computes the bits for that last write but does not execute the write;
+ // it leaves the bits in hb for processing by phase 3.
+ // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
+ // use in the first half of the loop right now, and then we only adjust nb explicitly
+ // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
+ nb -= 4
+ for {
+ // Emit bitmap byte.
+ // b has at least nb+4 bits, with one exception:
+ // if w+4 >= nw, then b has only nw-w bits,
+ // but we'll stop at the break and then truncate
+ // appropriately in Phase 3.
+ hb = b & bitPointerAll
+ hb |= bitMarkedAll
+ if w += 4; w >= nw {
+ break
}
+ *hbitp = uint8(hb)
+ hbitp = subtract1(hbitp)
+ b >>= 4
- *h.bitp = v
- h.bitp = subtractb(h.bitp, 1)
+ // Load more bits. b has nb right now.
+ if p != endp {
+ // Fast path: keep reading from ptrmask.
+ // nb unmodified: we just loaded 8 bits,
+ // and the next iteration will consume 8 bits,
+ // leaving us with the same nb the next time we're here.
+ b |= uintptr(*p) << nb
+ p = add1(p)
+ } else if p == nil {
+ // Almost as fast path: track bit count and refill from pbits.
+ // For short repetitions.
+ if nb < 8 {
+ b |= pbits << nb
+ nb += endnb
+ }
+ nb -= 8 // for next iteration
+ } else {
+ // Slow path: reached end of ptrmask.
+ // Process final partial byte and rewind to start.
+ b |= uintptr(*p) << nb
+ nb += endnb
+ if nb < 8 {
+ b |= uintptr(*ptrmask) << nb
+ p = add1(ptrmask)
+ } else {
+ nb -= 8
+ p = ptrmask
+ }
+ }
+
+ // Emit bitmap byte.
+ hb = b & bitPointerAll
+ hb |= bitMarkedAll
+ if w += 4; w >= nw {
+ break
+ }
+ *hbitp = uint8(hb)
+ hbitp = subtract1(hbitp)
+ b >>= 4
}
- if dataSize%(2*ptrSize) == 0 && dataSize < size {
- // Mark the word after last object's word as typeDead.
- *h.bitp = 0
+
+Phase3:
+ // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
+ if w > nw {
+ // Counting the 4 entries in hb not yet written to memory,
+ // there are more entries than possible pointer slots.
+ // Discard the excess entries (can't be more than 3).
+ mask := uintptr(1)<<(4-(w-nw)) - 1
+ hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits
+ }
+
+ // Change nw from counting possibly-pointer words to total words in allocation.
+ nw = size / ptrSize
+
+ // Write whole bitmap bytes.
+ // The first is hb, the rest are zero.
+ if w <= nw {
+ *hbitp = uint8(hb)
+ hbitp = subtract1(hbitp)
+ hb = 0 // for possible final half-byte below
+ for w += 4; w <= nw; w += 4 {
+ *hbitp = 0
+ hbitp = subtract1(hbitp)
+ }
+ }
+
+ // Write final partial bitmap byte if any.
+ // We know w > nw, or else we'd still be in the loop above.
+ // It can be bigger only due to the 4 entries in hb that it counts.
+ // If w == nw+4 then there's nothing left to do: we wrote all nw entries
+ // and can discard the 4 sitting in hb.
+ // But if w == nw+2, we need to write first two in hb.
+ // The byte is shared with the next object so we may need an atomic.
+ if w == nw+2 {
+ if gcphase == _GCoff {
+ *hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb)
+ } else {
+ atomicand8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift))
+ atomicor8(hbitp, uint8(hb))
+ }
+ }
+
+Phase4:
+ // Phase 4: all done, but perhaps double check.
+ if doubleCheck {
+ end := heapBitsForAddr(x + size)
+ if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
+ println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size)
+ print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
+ print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
+ h0 := heapBitsForAddr(x)
+ print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
+ print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
+ throw("bad heapBitsSetType")
+ }
+
+ // Double-check that bits to be written were written correctly.
+ // Does not check that other bits were not written, unfortunately.
+ h := heapBitsForAddr(x)
+ nptr := typ.ptrdata / ptrSize
+ ndata := typ.size / ptrSize
+ count := dataSize / typ.size
+ totalptr := ((count-1)*typ.size + typ.ptrdata) / ptrSize
+ for i := uintptr(0); i < size/ptrSize; i++ {
+ j := i % ndata
+ var have, want uint8
+ have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
+ if i >= totalptr {
+ want = 0 // deadmarker
+ if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
+ want = bitMarked
+ }
+ } else {
+ if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
+ want |= bitPointer
+ }
+ if i >= 2 {
+ want |= bitMarked
+ } else {
+ have &^= bitMarked
+ }
+ }
+ if have != want {
+ println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size)
+ print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
+ print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
+ print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
+ h0 := heapBitsForAddr(x)
+ print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
+ print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
+ print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
+ println("at word", i, "offset", i*ptrSize, "have", have, "want", want)
+ if typ.kind&kindGCProg != 0 {
+ println("GC program:")
+ dumpGCProg(addb(typ.gcdata, 4))
+ }
+ throw("bad heapBitsSetType")
+ }
+ h = h.next()
+ }
+ if ptrmask == debugPtrmask.data {
+ unlock(&debugPtrmask.lock)
+ }
}
}
-// typeBitmapInHeapBitmapFormat returns a bitmap holding
-// the type bits for the type typ, but expanded into heap bitmap format
-// to make it easier to copy them into the heap bitmap.
-// TODO(rsc): Change clients to use the type bitmap format instead,
-// which can be stored more densely (especially if we drop to 1 bit per pointer).
+var debugPtrmask struct {
+ lock mutex
+ data *byte
+}
+
+// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
+// progSize is the size of the memory described by the program.
+// elemSize is the size of the element that the GC program describes (a prefix of).
+// dataSize is the total size of the intended data, a multiple of elemSize.
+// allocSize is the total size of the allocated memory.
//
-// To make it easier to replicate the bits when filling out the heap
-// bitmap for an array of typ, if typ holds an odd number of words
-// (meaning the heap bitmap would stop halfway through a byte),
-// typeBitmapInHeapBitmapFormat returns the bitmap for two instances
-// of typ in a row.
-// TODO(rsc): Remove doubling.
-func typeBitmapInHeapBitmapFormat(typ *_type) []uint8 {
- var ptrmask *uint8
- nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
- if typ.kind&kindGCProg != 0 {
- masksize := nptr
- if masksize%2 != 0 {
- masksize *= 2 // repeated
+// GC programs are only used for large allocations.
+// heapBitsSetType requires that allocSize is a multiple of 4 words,
+// so that the relevant bitmap bytes are not shared with surrounding
+// objects and need not be accessed with atomic instructions.
+func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
+ if ptrSize == 8 && allocSize%(4*ptrSize) != 0 {
+ // Alignment will be wrong.
+ throw("heapBitsSetTypeGCProg: small allocation")
+ }
+ var totalBits uintptr
+ if elemSize == dataSize {
+ totalBits = runGCProg(prog, nil, h.bitp, 2)
+ if totalBits*ptrSize != progSize {
+ println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
+ throw("heapBitsSetTypeGCProg: unexpected bit count")
+ }
+ } else {
+ count := dataSize / elemSize
+
+ // Piece together program trailer to run after prog that does:
+ // literal(0)
+ // repeat(1, elemSize-progSize-1) // zeros to fill element size
+ // repeat(elemSize, count-1) // repeat that element for count
+ // This zero-pads the data remaining in the first element and then
+ // repeats that first element to fill the array.
+ var trailer [40]byte // 3 varints (max 10 each) + some bytes
+ i := 0
+ if n := elemSize/ptrSize - progSize/ptrSize; n > 0 {
+ // literal(0)
+ trailer[i] = 0x01
+ i++
+ trailer[i] = 0
+ i++
+ if n > 1 {
+ // repeat(1, n-1)
+ trailer[i] = 0x81
+ i++
+ n--
+ for ; n >= 0x80; n >>= 7 {
+ trailer[i] = byte(n | 0x80)
+ i++
+ }
+ trailer[i] = byte(n)
+ i++
+ }
}
- const typeBitsPerByte = 8 / typeBitsWidth
- masksize = masksize * typeBitsPerByte / 8 // 4 bits per word
- masksize++ // unroll flag in the beginning
- if masksize > maxGCMask && typ.gc[1] != 0 {
- // write barriers have not been updated to deal with this case yet.
- throw("maxGCMask too small for now")
+ // repeat(elemSize/ptrSize, count-1)
+ trailer[i] = 0x80
+ i++
+ n := elemSize / ptrSize
+ for ; n >= 0x80; n >>= 7 {
+ trailer[i] = byte(n | 0x80)
+ i++
}
- ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
- // Check whether the program is already unrolled
- // by checking if the unroll flag byte is set
- maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
- if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
- systemstack(func() {
- unrollgcprog_m(typ)
- })
+ trailer[i] = byte(n)
+ i++
+ n = count
+ for ; n >= 0x80; n >>= 7 {
+ trailer[i] = byte(n | 0x80)
+ i++
}
- ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
- } else {
- ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
+ trailer[i] = byte(n)
+ i++
+ trailer[i] = 0
+ i++
+
+ runGCProg(prog, &trailer[0], h.bitp, 2)
+
+ // Even though we filled in the full array just now,
+ // record that we only filled in up to the ptrdata of the
+ // last element. This will cause the code below to
+ // memclr the dead section of the final array element,
+ // so that scanobject can stop early in the final element.
+ totalBits = (elemSize*(count-1) + progSize) / ptrSize
+ }
+ endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
+ endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
+ memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
+}
+
+// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
+// size the size of the region described by prog, in bytes.
+// The resulting bitvector will have no more than size/ptrSize bits.
+func progToPointerMask(prog *byte, size uintptr) bitvector {
+ n := (size/ptrSize + 7) / 8
+ x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
+ x[len(x)-1] = 0xa1 // overflow check sentinel
+ n = runGCProg(prog, nil, &x[0], 1)
+ if x[len(x)-1] != 0xa1 {
+ throw("progToPointerMask: overflow")
}
- return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
+ return bitvector{int32(n), &x[0]}
}
-// GC type info programs
+// Packed GC pointer bitmaps, aka GC programs.
//
-// TODO(rsc): Clean up and enable.
+// For large types containing arrays, the type information has a
+// natural repetition that can be encoded to save space in the
+// binary and in the memory representation of the type information.
+//
+// The encoding is a simple Lempel-Ziv style bytecode machine
+// with the following instructions:
+//
+// 00000000: stop
+// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
+// 10000000 n c: repeat the previous n bits c times; n, c are varints
+// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
-const (
- // GC type info programs.
- // The programs allow to store type info required for GC in a compact form.
- // Most importantly arrays take O(1) space instead of O(n).
- // The program grammar is:
- //
- // Program = {Block} "insEnd"
- // Block = Data | Array
- // Data = "insData" DataSize DataBlock
- // DataSize = int // size of the DataBlock in bit pairs, 1 byte
- // DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
- // Array = "insArray" ArrayLen Block "insArrayEnd"
- // ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
- //
- // Each instruction (insData, insArray, etc) is 1 byte.
- // For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
- // the program looks as:
- //
- // insData 3 (typePointer typeScalar typeScalar)
- // insArray 20 insData 2 (typeScalar typePointer) insArrayEnd insEnd
- //
- // Total size of the program is 17 bytes (13 bytes on 32-bits).
- // The corresponding GC mask would take 43 bytes (it would be repeated
- // because the type has odd number of words).
- insData = 1 + iota
- insArray
- insArrayEnd
- insEnd
+// runGCProg executes the GC program prog, and then trailer if non-nil,
+// writing to dst with entries of the given size.
+// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
+// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
+// starting at dst (because the heap bitmap does). In this case, the caller guarantees
+// that only whole bytes in dst need to be written.
+//
+// runGCProg returns the number of 1- or 2-bit entries written to memory.
+func runGCProg(prog, trailer, dst *byte, size int) uintptr {
+ dstStart := dst
- // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
- maxGCMask = 65536 // TODO(rsc): change back to 64
-)
+ // Bits waiting to be written to memory.
+ var bits uintptr
+ var nbits uintptr
-// Recursively unrolls GC program in prog.
-// mask is where to store the result.
-// If inplace is true, store the result not in mask but in the heap bitmap for mask.
-// ppos is a pointer to position in mask, in bits.
-// sparse says to generate 4-bits per word mask for heap (1-bit for data/bss otherwise).
-//go:nowritebarrier
-func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte {
- pos := *ppos
- mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
+ p := prog
+Run:
for {
- switch *prog {
- default:
- throw("unrollgcprog: unknown instruction")
+ // Flush accumulated full bytes.
+ // The rest of the loop assumes that nbits <= 7.
+ for ; nbits >= 8; nbits -= 8 {
+ if size == 1 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ } else {
+ v := bits&bitPointerAll | bitMarkedAll
+ *dst = uint8(v)
+ dst = subtract1(dst)
+ bits >>= 4
+ v = bits&bitPointerAll | bitMarkedAll
+ *dst = uint8(v)
+ dst = subtract1(dst)
+ bits >>= 4
+ }
+ }
- case insData:
- prog = addb(prog, 1)
- siz := int(*prog)
- prog = addb(prog, 1)
- p := (*[1 << 30]byte)(unsafe.Pointer(prog))
- for i := 0; i < siz; i++ {
- const typeBitsPerByte = 8 / typeBitsWidth
- v := p[i/typeBitsPerByte]
- v >>= (uint(i) % typeBitsPerByte) * typeBitsWidth
- v &= typeMask
- if inplace {
- // Store directly into GC bitmap.
- h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos])))
- if h.shift == 0 {
- *h.bitp = v << typeShift
- } else {
- *h.bitp |= v << (4 + typeShift)
- }
- pos += ptrSize
- } else if sparse {
- // 4-bits per word, type bits in high bits
- v <<= (pos % 8) + typeShift
- mask[pos/8] |= v
- pos += heapBitsWidth
+ // Process one instruction.
+ inst := uintptr(*p)
+ p = add1(p)
+ n := inst & 0x7F
+ if inst&0x80 == 0 {
+ // Literal bits; n == 0 means end of program.
+ if n == 0 {
+ // Program is over; continue in trailer if present.
+ if trailer != nil {
+ //println("trailer")
+ p = trailer
+ trailer = nil
+ continue
+ }
+ //println("done")
+ break Run
+ }
+ //println("lit", n, dst)
+ nbyte := n / 8
+ for i := uintptr(0); i < nbyte; i++ {
+ bits |= uintptr(*p) << nbits
+ p = add1(p)
+ if size == 1 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
} else {
- // 1 bit per word, for data/bss bitmap
- v >>= 1 // convert typePointer to 1, others to 0
- mask[pos/8] |= v << (pos % 8)
- pos++
+ v := bits&0xf | bitMarkedAll
+ *dst = uint8(v)
+ dst = subtract1(dst)
+ bits >>= 4
+ v = bits&0xf | bitMarkedAll
+ *dst = uint8(v)
+ dst = subtract1(dst)
+ bits >>= 4
+ }
+ }
+ if n %= 8; n > 0 {
+ bits |= uintptr(*p) << nbits
+ p = add1(p)
+ nbits += n
+ }
+ continue Run
+ }
+
+ // Repeat. If n == 0, it is encoded in a varint in the next bytes.
+ if n == 0 {
+ for off := uint(0); ; off += 7 {
+ x := uintptr(*p)
+ p = add1(p)
+ n |= (x & 0x7F) << off
+ if x&0x80 == 0 {
+ break
+ }
+ }
+ }
+
+ // Count is encoded in a varint in the next bytes.
+ c := uintptr(0)
+ for off := uint(0); ; off += 7 {
+ x := uintptr(*p)
+ p = add1(p)
+ c |= (x & 0x7F) << off
+ if x&0x80 == 0 {
+ break
+ }
+ }
+ c *= n // now total number of bits to copy
+
+ // If the number of bits being repeated is small, load them
+ // into a register and use that register for the entire loop
+ // instead of repeatedly reading from memory.
+ // Handling fewer than 8 bits here makes the general loop simpler.
+ // The cutoff is ptrSize*8 - 7 to guarantee that when we add
+ // the pattern to a bit buffer holding at most 7 bits (a partial byte)
+ // it will not overflow.
+ src := dst
+ const maxBits = ptrSize*8 - 7
+ if n <= maxBits {
+ // Start with bits in output buffer.
+ pattern := bits
+ npattern := nbits
+
+ // If we need more bits, fetch them from memory.
+ if size == 1 {
+ src = subtract1(src)
+ for npattern < n {
+ pattern <<= 8
+ pattern |= uintptr(*src)
+ src = subtract1(src)
+ npattern += 8
+ }
+ } else {
+ src = add1(src)
+ for npattern < n {
+ pattern <<= 4
+ pattern |= uintptr(*src) & 0xf
+ src = add1(src)
+ npattern += 4
}
}
- prog = addb(prog, round(uintptr(siz)*typeBitsWidth, 8)/8)
- case insArray:
- prog = (*byte)(add(unsafe.Pointer(prog), 1))
- siz := uintptr(0)
- for i := uintptr(0); i < ptrSize; i++ {
- siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
+ // We started with the whole bit output buffer,
+ // and then we loaded bits from whole bytes.
+ // Either way, we might now have too many instead of too few.
+ // Discard the extra.
+ if npattern > n {
+ pattern >>= npattern - n
+ npattern = n
}
- prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
- var prog1 *byte
- for i := uintptr(0); i < siz; i++ {
- prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse)
+
+ // Replicate pattern to at most maxBits.
+ if npattern == 1 {
+ // One bit being repeated.
+ // If the bit is 1, make the pattern all 1s.
+ // If the bit is 0, the pattern is already all 0s,
+ // but we can claim that the number of bits
+ // in the word is equal to the number we need (c),
+ // because right shift of bits will zero fill.
+ if pattern == 1 {
+ pattern = 1<<maxBits - 1
+ npattern = maxBits
+ } else {
+ npattern = c
+ }
+ } else {
+ b := pattern
+ nb := npattern
+ if nb+nb <= maxBits {
+ // Double pattern until the whole uintptr is filled.
+ for nb <= ptrSize*8 {
+ b |= b << nb
+ nb += nb
+ }
+ // Trim away incomplete copy of original pattern in high bits.
+ // TODO(rsc): Replace with table lookup or loop on systems without divide?
+ nb = maxBits / npattern * npattern
+ b &= 1<<nb - 1
+ pattern = b
+ npattern = nb
+ }
}
- if *prog1 != insArrayEnd {
- throw("unrollgcprog: array does not end with insArrayEnd")
+
+ // Add pattern to bit buffer and flush bit buffer, c/npattern times.
+ // Since pattern contains >8 bits, there will be full bytes to flush
+ // on each iteration.
+ for ; c >= npattern; c -= npattern {
+ bits |= pattern << nbits
+ nbits += npattern
+ if size == 1 {
+ for nbits >= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ nbits -= 8
+ }
+ } else {
+ for nbits >= 4 {
+ *dst = uint8(bits&0xf | bitMarkedAll)
+ dst = subtract1(dst)
+ bits >>= 4
+ nbits -= 4
+ }
+ }
}
- prog = (*byte)(add(unsafe.Pointer(prog1), 1))
- case insArrayEnd, insEnd:
- *ppos = pos
- return prog
+ // Add final fragment to bit buffer.
+ if c > 0 {
+ pattern &= 1<<c - 1
+ bits |= pattern << nbits
+ nbits += c
+ }
+ continue Run
}
- }
-}
-
-// Unrolls GC program prog for data/bss, returns dense GC mask.
-func unrollglobgcprog(prog *byte, size uintptr) bitvector {
- masksize := round(round(size, ptrSize)/ptrSize, 8) / 8
- mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
- mask[masksize] = 0xa1
- pos := uintptr(0)
- prog = unrollgcprog1(&mask[0], prog, &pos, false, false)
- if pos != size/ptrSize {
- print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n")
- throw("unrollglobgcprog: bad program size")
- }
- if *prog != insEnd {
- throw("unrollglobgcprog: program does not end with insEnd")
- }
- if mask[masksize] != 0xa1 {
- throw("unrollglobgcprog: overflow")
- }
- return bitvector{int32(masksize * 8), &mask[0]}
-}
-func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
- // TODO(rsc): Explain why these non-atomic updates are okay.
- pos := uintptr(0)
- prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
- for pos != size0 {
- unrollgcprog1((*byte)(v), prog, &pos, true, true)
+ // Repeat; n too large to fit in a register.
+ // Since nbits <= 7, we know the first few bytes of repeated data
+ // are already written to memory.
+ off := n - nbits // n > nbits because n > maxBits and nbits <= 7
+ if size == 1 {
+ // Leading src fragment.
+ src = subtractb(src, (off+7)/8)
+ if frag := off & 7; frag != 0 {
+ bits |= uintptr(*src) >> (8 - frag) << nbits
+ src = add1(src)
+ nbits += frag
+ c -= frag
+ }
+ // Main loop: load one byte, write another.
+ // The bits are rotating through the bit buffer.
+ for i := c / 8; i > 0; i-- {
+ bits |= uintptr(*src) << nbits
+ src = add1(src)
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ }
+ // Final src fragment.
+ if c %= 8; c > 0 {
+ bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+ nbits += c
+ }
+ } else {
+ // Leading src fragment.
+ src = addb(src, (off+3)/4)
+ if frag := off & 3; frag != 0 {
+ bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
+ src = subtract1(src)
+ nbits += frag
+ c -= frag
+ }
+ // Main loop: load one byte, write another.
+ // The bits are rotating through the bit buffer.
+ for i := c / 4; i > 0; i-- {
+ bits |= (uintptr(*src) & 0xf) << nbits
+ src = subtract1(src)
+ *dst = uint8(bits&0xf | bitMarkedAll)
+ dst = subtract1(dst)
+ bits >>= 4
+ }
+ // Final src fragment.
+ if c %= 4; c > 0 {
+ bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+ nbits += c
+ }
+ }
}
- // Mark first word as bitAllocated.
- // Mark word after last as typeDead.
- if size0 < size {
- h := heapBitsForAddr(uintptr(v) + size0)
- *h.bitp &^= typeMask << typeShift
+ // Write any final bits out, using full-byte writes, even for the final byte.
+ var totalBits uintptr
+ if size == 1 {
+ totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
+ nbits += -nbits & 7
+ for ; nbits > 0; nbits -= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ }
+ } else {
+ totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
+ nbits += -nbits & 3
+ for ; nbits > 0; nbits -= 4 {
+ v := bits&0xf | bitMarkedAll
+ *dst = uint8(v)
+ dst = subtract1(dst)
+ bits >>= 4
+ }
+ // Clear the mark bits in the first two entries.
+ // They are the actual mark and checkmark bits,
+ // not non-dead markers. It simplified the code
+ // above to set the marker in every bit written and
+ // then clear these two as a special case at the end.
+ *dstStart &^= bitMarked | bitMarked<<heapBitsShift
}
+ return totalBits
}
-var unroll mutex
-
-// Unrolls GC program in typ.gc[1] into typ.gc[0]
-//go:nowritebarrier
-func unrollgcprog_m(typ *_type) {
- lock(&unroll)
- mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
- if *mask == 0 {
- pos := uintptr(8) // skip the unroll flag
- prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
- prog = unrollgcprog1(mask, prog, &pos, false, true)
- if *prog != insEnd {
- throw("unrollgcprog: program does not end with insEnd")
+func dumpGCProg(p *byte) {
+ nptr := 0
+ for {
+ x := *p
+ p = add1(p)
+ if x == 0 {
+ print("\t", nptr, " end\n")
+ break
}
- if typ.size/ptrSize%2 != 0 {
- // repeat the program
- prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
- unrollgcprog1(mask, prog, &pos, false, true)
+ if x&0x80 == 0 {
+ print("\t", nptr, " lit ", x, ":")
+ n := int(x+7) / 8
+ for i := 0; i < n; i++ {
+ print(" ", hex(*p))
+ p = add1(p)
+ }
+ print("\n")
+ nptr += int(x)
+ } else {
+ nbit := int(x &^ 0x80)
+ if nbit == 0 {
+ for nb := uint(0); ; nb += 7 {
+ x := *p
+ p = add1(p)
+ nbit |= int(x&0x7f) << nb
+ if x&0x80 == 0 {
+ break
+ }
+ }
+ }
+ count := 0
+ for nb := uint(0); ; nb += 7 {
+ x := *p
+ p = add1(p)
+ count |= int(x&0x7f) << nb
+ if x&0x80 == 0 {
+ break
+ }
+ }
+ print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
+ nptr += nbit * count
}
-
- // atomic way to say mask[0] = 1
- atomicor8(mask, 1)
}
- unlock(&unroll)
}
// Testing.
@@ -748,36 +1510,46 @@ func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
return true
}
-// Returns GC type info for object p for testing.
-func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
- *mask = nil
- *len = 0
+// gcbits returns the GC type info for x, for testing.
+// The result is the bitmap entries (0 or 1), one entry per byte.
+//go:linkname reflect_gcbits reflect.gcbits
+func reflect_gcbits(x interface{}) []byte {
+ ret := getgcmask(x)
+ typ := (*ptrtype)(unsafe.Pointer((*eface)(unsafe.Pointer(&x))._type)).elem
+ nptr := typ.ptrdata / ptrSize
+ for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
+ ret = ret[:len(ret)-1]
+ }
+ return ret
+}
- // data
+// Returns GC type info for object p for testing.
+func getgcmask(ep interface{}) (mask []byte) {
+ e := *(*eface)(unsafe.Pointer(&ep))
+ p := e.data
+ t := e._type
+ // data or bss
for datap := &firstmoduledata; datap != nil; datap = datap.next {
+ // data
if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
+ bitmap := datap.gcdatamask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- *len = n / ptrSize
- *mask = &make([]byte, *len)[0]
+ mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.data) / ptrSize
- bits := (*addb(datap.gcdatamask.bytedata, off/8) >> (off % 8)) & 1
- bits += 1 // convert 1-bit to 2-bit
- *addb(*mask, i/ptrSize) = bits
+ mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
// bss
if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
+ bitmap := datap.gcbssmask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- *len = n / ptrSize
- *mask = &make([]byte, *len)[0]
+ mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.bss) / ptrSize
- bits := (*addb(datap.gcbssmask.bytedata, off/8) >> (off % 8)) & 1
- bits += 1 // convert 1-bit to 2-bit
- *addb(*mask, i/ptrSize) = bits
+ mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
@@ -787,47 +1559,58 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
var n uintptr
var base uintptr
if mlookup(uintptr(p), &base, &n, nil) != 0 {
- *len = n / ptrSize
- *mask = &make([]byte, *len)[0]
+ mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize {
- bits := heapBitsForAddr(base + i).typeBits()
- *addb(*mask, i/ptrSize) = bits
+ hbits := heapBitsForAddr(base + i)
+ if hbits.isPointer() {
+ mask[i/ptrSize] = 1
+ }
+ if i >= 2*ptrSize && !hbits.isMarked() {
+ mask = mask[:i/ptrSize]
+ break
+ }
}
return
}
// stack
- var frame stkframe
- frame.sp = uintptr(p)
- _g_ := getg()
- gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
- if frame.fn != nil {
- f := frame.fn
- targetpc := frame.continpc
- if targetpc == 0 {
- return
- }
- if targetpc != f.entry {
- targetpc--
- }
- pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
- if pcdata == -1 {
- return
- }
- stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
- if stkmap == nil || stkmap.n <= 0 {
- return
- }
- bv := stackmapdata(stkmap, pcdata)
- size := uintptr(bv.n) * ptrSize
- n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- *len = n / ptrSize
- *mask = &make([]byte, *len)[0]
- for i := uintptr(0); i < n; i += ptrSize {
- off := (uintptr(p) + i - frame.varp + size) / ptrSize
- bits := (*addb(bv.bytedata, off/8) >> (off % 8)) & 1
- bits += 1 // convert 1-bit to 2-bit
- *addb(*mask, i/ptrSize) = bits
+ if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
+ var frame stkframe
+ frame.sp = uintptr(p)
+ _g_ := getg()
+ gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
+ if frame.fn != nil {
+ f := frame.fn
+ targetpc := frame.continpc
+ if targetpc == 0 {
+ return
+ }
+ if targetpc != f.entry {
+ targetpc--
+ }
+ pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+ if pcdata == -1 {
+ return
+ }
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+ if stkmap == nil || stkmap.n <= 0 {
+ return
+ }
+ bv := stackmapdata(stkmap, pcdata)
+ size := uintptr(bv.n) * ptrSize
+ n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+ mask = make([]byte, n/ptrSize)
+ for i := uintptr(0); i < n; i += ptrSize {
+ bitmap := bv.bytedata
+ off := (uintptr(p) + i - frame.varp + size) / ptrSize
+ mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
+ }
}
+ return
}
+
+ // otherwise, not something the GC knows about.
+ // possibly read-only data, like malloc(0).
+ // must not have pointers
+ return
}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 9bd36d1a5e..db5b2dcd36 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -127,13 +127,22 @@ const (
_RootCount = 5
)
-// heapminimum is the minimum number of bytes in the heap.
-// This cleans up the corner case of where we have a very small live set but a lot
-// of allocations and collecting every GOGC * live set is expensive.
-// heapminimum is adjust by multiplying it by GOGC/100. In
-// the special case of GOGC==0 this will set heapminimum to 0 resulting
-// collecting at every allocation even when the heap size is small.
-var heapminimum = uint64(4 << 20)
+// heapminimum is the minimum heap size at which to trigger GC.
+// For small heaps, this overrides the usual GOGC*live set rule.
+//
+// When there is a very small live set but a lot of allocation, simply
+// collecting when the heap reaches GOGC*live results in many GC
+// cycles and high total per-GC overhead. This minimum amortizes this
+// per-GC overhead while keeping the heap reasonably small.
+//
+// During initialization this is set to 4MB*GOGC/100. In the case of
+// GOGC==0, this will set heapminimum to 0, resulting in constant
+// collection even when the heap size is small, which is useful for
+// debugging.
+var heapminimum uint64 = defaultHeapMinimum
+
+// defaultHeapMinimum is the value of heapminimum for GOGC==100.
+const defaultHeapMinimum = 4 << 20
// Initialized from $GOGC. GOGC=off means no GC.
var gcpercent int32
@@ -146,8 +155,8 @@ func gcinit() {
work.markfor = parforalloc(_MaxGcproc)
_ = setGCPercent(readgogc())
for datap := &firstmoduledata; datap != nil; datap = datap.next {
- datap.gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data)
- datap.gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss)
+ datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data)
+ datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss)
}
memstats.next_gc = heapminimum
}
@@ -180,7 +189,7 @@ func setGCPercent(in int32) (out int32) {
in = -1
}
gcpercent = in
- heapminimum = heapminimum * uint64(gcpercent) / 100
+ heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100
unlock(&mheap_.lock)
return out
}
@@ -197,7 +206,6 @@ var gcBlackenEnabled uint32
const (
_GCoff = iota // GC not running, write barrier disabled
- _GCquiesce // unused state
_GCstw // unused state
_GCscan // GC collecting roots into workbufs, write barrier disabled
_GCmark // GC marking from workbufs, write barrier ENABLED
@@ -208,7 +216,7 @@ const (
//go:nosplit
func setGCPhase(x uint32) {
atomicstore(&gcphase, x)
- writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled
+ writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination
}
// gcMarkWorkerMode represents the mode that a concurrent mark worker
@@ -699,11 +707,11 @@ const (
func startGC(mode int) {
// The gc is turned off (via enablegc) until the bootstrap has completed.
// Also, malloc gets called in the guts of a number of libraries that might be
- // holding locks. To avoid deadlocks during stoptheworld, don't bother
+ // holding locks. To avoid deadlocks during stop-the-world, don't bother
// trying to run gc while holding a lock. The next mallocgc without a lock
// will do the gc instead.
mp := acquirem()
- if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
+ if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
releasem(mp)
return
}
@@ -797,7 +805,7 @@ func gc(mode int) {
traceGCStart()
}
- systemstack(stoptheworld)
+ systemstack(stopTheWorldWithSema)
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
// clearpools before we start the GC. If we wait they memory will not be
// reclaimed until the next GC cycle.
@@ -814,7 +822,7 @@ func gc(mode int) {
setGCPhase(_GCscan)
// Concurrent scan.
- starttheworld()
+ startTheWorldWithSema()
if debug.gctrace > 0 {
tScan = nanotime()
}
@@ -858,7 +866,7 @@ func gc(mode int) {
if debug.gctrace > 0 {
tMarkTerm = nanotime()
}
- systemstack(stoptheworld)
+ systemstack(stopTheWorldWithSema)
// The gcphase is _GCmark, it will transition to _GCmarktermination
// below. The important thing is that the wb remains active until
// all marking is complete. This includes writes made by the GC.
@@ -952,13 +960,12 @@ func gc(mode int) {
// all done
mp.preemptoff = ""
- semrelease(&worldsema)
-
if gcphase != _GCoff {
throw("gc done but gcphase != _GCoff")
}
- systemstack(starttheworld)
+ systemstack(startTheWorldWithSema)
+ semrelease(&worldsema)
releasem(mp)
mp = nil
@@ -1160,6 +1167,18 @@ func gcBgMarkDone() {
}
}
+// gcMarkWorkAvailable determines if mark work is readily available.
+// It is used by the scheduler to decide if this p run a mark work.
+func gcMarkWorkAvailable(p *p) bool {
+ if !p.gcw.empty() {
+ return true
+ }
+ if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 {
+ return true // global work available
+ }
+ return false
+}
+
// gcFlushGCWork disposes the gcWork caches of all Ps. The world must
// be stopped.
//go:nowritebarrier
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 9d78ddecae..62fa33895b 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -261,7 +261,7 @@ func gcphasework(gp *g) {
switch gcphase {
default:
throw("gcphasework in bad gcphase")
- case _GCoff, _GCquiesce, _GCstw, _GCsweep:
+ case _GCoff, _GCstw, _GCsweep:
// No work.
case _GCscan:
// scan the stack, mark the objects, put pointers in work buffers
@@ -557,9 +557,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
// Same work as in scanobject; see comments there.
obj := *(*uintptr)(unsafe.Pointer(b + i))
if obj != 0 && arena_start <= obj && obj < arena_used {
- if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark {
- checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
- }
if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
}
@@ -597,32 +594,25 @@ func scanobject(b uintptr, gcw *gcWork) {
// Avoid needless hbits.next() on last iteration.
hbits = hbits.next()
}
- bits := uintptr(hbits.typeBits())
- if bits == typeDead {
+ // During checkmarking, 1-word objects store the checkmark
+ // in the type bit for the one word. The only one-word objects
+ // are pointers, or else they'd be merged with other non-pointer
+ // data into larger allocations.
+ bits := hbits.bits()
+ if i >= 2*ptrSize && bits&bitMarked == 0 {
break // no more pointers in this object
}
-
- if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
- continue
- }
-
- if bits&typePointer != typePointer {
- print("gc useCheckmark=", useCheckmark, " b=", hex(b), "\n")
- throw("unexpected garbage collection bits")
+ if bits&bitPointer == 0 {
+ continue // not a pointer
}
- // Work here is duplicated in scanblock.
+ // Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
-
obj := *(*uintptr)(unsafe.Pointer(b + i))
// At this point we have extracted the next potential pointer.
- // Check if it points into heap.
- if obj != 0 && arena_start <= obj && obj < arena_used {
- if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark {
- checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
- }
-
+ // Check if it points into heap and not back at the current object.
+ if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
// Mark the object.
if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
@@ -673,11 +663,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
throw("checkmark found unmarked object")
}
- if hbits.isCheckmarked() {
+ if hbits.isCheckmarked(span.elemsize) {
return
}
- hbits.setCheckmarked()
- if !hbits.isCheckmarked() {
+ hbits.setCheckmarked(span.elemsize)
+ if !hbits.isCheckmarked(span.elemsize) {
throw("setCheckmarked and isCheckmarked disagree")
}
} else {
@@ -685,12 +675,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
if hbits.isMarked() {
return
}
-
hbits.setMarked()
// If this is a noscan object, fast-track it to black
// instead of greying it.
- if hbits.typeBits() == typeDead {
+ if !hbits.hasPointers(span.elemsize) {
gcw.bytesMarked += uint64(span.elemsize)
return
}
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index 9c32ae8880..b7feb847b4 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -7,7 +7,7 @@ package runtime
import "unsafe"
const (
- _Debugwbufs = true // if true check wbufs consistency
+ _Debugwbufs = false // if true check wbufs consistency
_WorkbufSize = 1 * 256 // in bytes - if small wbufs are passed to GC in a timely fashion.
)
@@ -182,6 +182,13 @@ func (w *gcWork) balance() {
}
}
+// empty returns true if w has no mark work available.
+//go:nowritebarrier
+func (w *gcWork) empty() bool {
+ wbuf := w.wbuf
+ return wbuf == 0 || wbuf.ptr().nobj == 0
+}
+
// Internally, the GC work pool is kept in arrays in work buffers.
// The gcWork interface caches a work buffer until full (or empty) to
// avoid contending on the global work buffer lists.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 10878ee5cf..04fa050bc5 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -28,6 +28,15 @@ type mheap struct {
spans **mspan
spans_mapped uintptr
+ // Proportional sweep
+ pagesSwept uint64 // pages swept this cycle; updated atomically
+ sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
+
+ // Malloc stats.
+ largefree uint64 // bytes freed for large objects (>maxsmallsize)
+ nlargefree uint64 // number of frees for large objects (>maxsmallsize)
+ nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
+
// range of addresses we might see in the heap
bitmap uintptr
bitmap_mapped uintptr
@@ -36,14 +45,6 @@ type mheap struct {
arena_end uintptr
arena_reserved bool
- // write barrier shadow heap.
- // 64-bit systems only, enabled by GODEBUG=wbshadow=1.
- // See also shadow_data, data_start, data_end fields on moduledata in
- // symtab.go.
- shadow_enabled bool // shadow should be updated and checked
- shadow_reserved bool // shadow memory is reserved
- shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr
-
// central free lists for small size classes.
// the padding makes sure that the MCentrals are
// spaced CacheLineSize bytes apart, so that each MCentral.lock
@@ -58,15 +59,6 @@ type mheap struct {
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for sepcial record allocators.
-
- // Proportional sweep
- pagesSwept uint64 // pages swept this cycle; updated atomically
- sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
-
- // Malloc stats.
- largefree uint64 // bytes freed for large objects (>maxsmallsize)
- nlargefree uint64 // number of frees for large objects (>maxsmallsize)
- nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
}
var mheap_ mheap
@@ -176,7 +168,9 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
// inheap reports whether b is a pointer into a (potentially dead) heap object.
// It returns false for pointers into stack spans.
+// Non-preemptible because it is used by write barriers.
//go:nowritebarrier
+//go:nosplit
func inheap(b uintptr) bool {
if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
return false
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 4544344780..a618bd5e81 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -521,9 +521,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
n = NumGoroutine()
if n <= len(p) {
gp := getg()
- semacquire(&worldsema, false)
- gp.m.preemptoff = "profile"
- systemstack(stoptheworld)
+ stopTheWorld("profile")
n = NumGoroutine()
if n <= len(p) {
@@ -544,9 +542,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
}
}
- gp.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
}
return n, ok
@@ -565,10 +561,7 @@ func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
// into buf after the trace for the current goroutine.
func Stack(buf []byte, all bool) int {
if all {
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "stack trace"
- systemstack(stoptheworld)
+ stopTheWorld("stack trace")
}
n := 0
@@ -590,10 +583,7 @@ func Stack(buf []byte, all bool) int {
}
if all {
- gp := getg()
- gp.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
}
return n
}
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index c8e5249156..3eff7f6b3e 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -153,24 +153,13 @@ func init() {
// ReadMemStats populates m with memory allocator statistics.
func ReadMemStats(m *MemStats) {
- // Have to acquire worldsema to stop the world,
- // because stoptheworld can only be used by
- // one goroutine at a time, and there might be
- // a pending garbage collection already calling it.
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "read mem stats"
- systemstack(stoptheworld)
+ stopTheWorld("read mem stats")
systemstack(func() {
readmemstats_m(m)
})
- gp.m.preemptoff = ""
- gp.m.locks++
- semrelease(&worldsema)
- systemstack(starttheworld)
- gp.m.locks--
+ startTheWorld()
}
func readmemstats_m(stats *MemStats) {
diff --git a/src/runtime/os1_darwin.go b/src/runtime/os1_darwin.go
index 10cf460f7f..1b74e3e653 100644
--- a/src/runtime/os1_darwin.go
+++ b/src/runtime/os1_darwin.go
@@ -8,7 +8,6 @@ import "unsafe"
//extern SigTabTT runtime·sigtab[];
-var sigset_none = uint32(0)
var sigset_all = ^uint32(0)
func unimplemented(name string) {
@@ -126,17 +125,36 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*uint32)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ sigprocmask(_SIG_SETMASK, nil, smask)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
// Initialize signal handling.
_g_ := getg()
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask &^= 1 << (uint32(i) - 1)
+ }
+ }
+ sigprocmask(_SIG_SETMASK, &nmask, nil)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*uint32)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(_SIG_SETMASK, smask, nil)
signalstack(nil, 0)
}
@@ -447,6 +465,6 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+func updatesigmask(m sigmask) {
+ sigprocmask(_SIG_SETMASK, &m[0], nil)
}
diff --git a/src/runtime/os1_dragonfly.go b/src/runtime/os1_dragonfly.go
index a590aea39b..eb42b54e2b 100644
--- a/src/runtime/os1_dragonfly.go
+++ b/src/runtime/os1_dragonfly.go
@@ -12,7 +12,6 @@ const (
_HW_NCPU = 3
)
-var sigset_none = sigset{}
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
func getncpu() int32 {
@@ -120,6 +119,14 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*sigset)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ sigprocmask(nil, smask)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -130,11 +137,22 @@ func minit() {
// Initialize signal handling
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(&sigset_none, nil)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ sigprocmask(&nmask, nil)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(smask, nil)
signalstack(nil, 0)
}
@@ -215,6 +233,8 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(&sigset_none, nil)
+func updatesigmask(m sigmask) {
+ var mask sigset
+ copy(mask.__bits[:], m[:])
+ sigprocmask(&mask, nil)
}
diff --git a/src/runtime/os1_freebsd.go b/src/runtime/os1_freebsd.go
index 8719a49286..f7f34bd386 100644
--- a/src/runtime/os1_freebsd.go
+++ b/src/runtime/os1_freebsd.go
@@ -12,7 +12,6 @@ const (
_HW_NCPU = 3
)
-var sigset_none = sigset{}
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
func getncpu() int32 {
@@ -119,6 +118,14 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*sigset)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ sigprocmask(nil, smask)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -132,11 +139,22 @@ func minit() {
// Initialize signal handling.
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(&sigset_none, nil)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ sigprocmask(&nmask, nil)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(smask, nil)
signalstack(nil, 0)
}
@@ -217,6 +235,8 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(&sigset_none, nil)
+func updatesigmask(m [(_NSIG + 31) / 32]uint32) {
+ var mask sigset
+ copy(mask.__bits[:], m[:])
+ sigprocmask(&mask, nil)
}
diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go
index e4b18c79b3..02f98d7c5f 100644
--- a/src/runtime/os1_linux.go
+++ b/src/runtime/os1_linux.go
@@ -6,7 +6,6 @@ package runtime
import "unsafe"
-var sigset_none sigset
var sigset_all sigset = sigset{^uint32(0), ^uint32(0)}
// Linux futex.
@@ -190,17 +189,36 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*sigset)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ rtsigprocmask(_SIG_SETMASK, nil, smask, int32(unsafe.Sizeof(*smask)))
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
// Initialize signal handling.
_g_ := getg()
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none)))
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ rtsigprocmask(_SIG_SETMASK, &nmask, nil, int32(unsafe.Sizeof(nmask)))
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ rtsigprocmask(_SIG_SETMASK, smask, nil, int32(unsafe.Sizeof(*smask)))
signalstack(nil, 0)
}
@@ -304,6 +322,8 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none)))
+func updatesigmask(m sigmask) {
+ var mask sigset
+ copy(mask[:], m[:])
+ rtsigprocmask(_SIG_SETMASK, &mask, nil, int32(unsafe.Sizeof(mask)))
}
diff --git a/src/runtime/os1_nacl.go b/src/runtime/os1_nacl.go
index dbb5dec2fd..66e60f8b12 100644
--- a/src/runtime/os1_nacl.go
+++ b/src/runtime/os1_nacl.go
@@ -15,6 +15,9 @@ func mpreinit(mp *m) {
func sigtramp()
+func msigsave(mp *m) {
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
diff --git a/src/runtime/os1_netbsd.go b/src/runtime/os1_netbsd.go
index 8df74b5593..3fb05989e7 100644
--- a/src/runtime/os1_netbsd.go
+++ b/src/runtime/os1_netbsd.go
@@ -17,7 +17,6 @@ const (
_CLOCK_MONOTONIC = 3
)
-var sigset_none = sigset{}
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
// From NetBSD's <sys/sysctl.h>
@@ -139,6 +138,14 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*sigset)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ sigprocmask(_SIG_SETMASK, nil, smask)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -147,11 +154,23 @@ func minit() {
// Initialize signal handling
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask.__bits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ sigprocmask(_SIG_SETMASK, &nmask, nil)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(_SIG_SETMASK, smask, nil)
+
signalstack(nil, 0)
}
@@ -206,6 +225,8 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+func updatesigmask(m sigmask) {
+ var mask sigset
+ copy(mask.__bits[:], m[:])
+ sigprocmask(_SIG_SETMASK, &mask, nil)
}
diff --git a/src/runtime/os1_openbsd.go b/src/runtime/os1_openbsd.go
index 95729a56df..5ccf642468 100644
--- a/src/runtime/os1_openbsd.go
+++ b/src/runtime/os1_openbsd.go
@@ -148,6 +148,14 @@ func mpreinit(mp *m) {
mp.gsignal.m = mp
}
+func msigsave(mp *m) {
+ smask := (*uint32)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ *smask = sigprocmask(_SIG_BLOCK, 0)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -158,11 +166,22 @@ func minit() {
// Initialize signal handling
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(_SIG_SETMASK, sigset_none)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask &^= 1 << (uint32(i) - 1)
+ }
+ }
+ sigprocmask(_SIG_SETMASK, nmask)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := *(*uint32)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(_SIG_SETMASK, smask)
signalstack(nil, 0)
}
@@ -217,6 +236,6 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(_SIG_SETMASK, sigset_none)
+func updatesigmask(m sigmask) {
+ sigprocmask(_SIG_SETMASK, m[0])
}
diff --git a/src/runtime/os1_plan9.go b/src/runtime/os1_plan9.go
index c026218241..bda7057f44 100644
--- a/src/runtime/os1_plan9.go
+++ b/src/runtime/os1_plan9.go
@@ -18,6 +18,9 @@ func mpreinit(mp *m) {
mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, _FlagNoScan))
}
+func msigsave(mp *m) {
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -177,7 +180,7 @@ func exit(e int) {
} else {
// build error string
var tmp [32]byte
- status = []byte(gostringnocopy(&itoa(tmp[:len(tmp)-1], uint64(e))[0]))
+ status = append(itoa(tmp[:len(tmp)-1], uint64(e)), 0)
}
goexitsall(&status[0])
exits(&status[0])
diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index 5719b320f5..bc472d0de9 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -292,6 +292,9 @@ func newosproc(mp *m, stk unsafe.Pointer) {
func mpreinit(mp *m) {
}
+func msigsave(mp *m) {
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go
index 69ac5b4970..e4fe92de41 100644
--- a/src/runtime/os3_solaris.go
+++ b/src/runtime/os3_solaris.go
@@ -114,7 +114,6 @@ var (
libc_write libcFunc
)
-var sigset_none = sigset{}
var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
func getncpu() int32 {
@@ -190,6 +189,14 @@ func mpreinit(mp *m) {
func miniterrno()
+func msigsave(mp *m) {
+ smask := (*sigset)(unsafe.Pointer(&mp.sigmask))
+ if unsafe.Sizeof(*smask) > unsafe.Sizeof(mp.sigmask) {
+ throw("insufficient storage for signal mask")
+ }
+ sigprocmask(_SIG_SETMASK, nil, smask)
+}
+
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, can not allocate memory.
func minit() {
@@ -197,11 +204,23 @@ func minit() {
asmcgocall(unsafe.Pointer(funcPC(miniterrno)), unsafe.Pointer(&libc____errno))
// Initialize signal handling
signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+
+ // restore signal mask from m.sigmask and unblock essential signals
+ nmask := *(*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ nmask.__sigbits[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ sigprocmask(_SIG_SETMASK, &nmask, nil)
}
// Called from dropm to undo the effect of an minit.
func unminit() {
+ _g_ := getg()
+ smask := (*sigset)(unsafe.Pointer(&_g_.m.sigmask))
+ sigprocmask(_SIG_SETMASK, smask, nil)
+
signalstack(nil, 0)
}
@@ -278,8 +297,10 @@ func signalstack(p *byte, n int32) {
sigaltstack(&st, nil)
}
-func unblocksignals() {
- sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+func updatesigmask(m sigmask) {
+ var mask sigset
+ copy(mask.__sigbits[:], m[:])
+ sigprocmask(_SIG_SETMASK, &mask, nil)
}
//go:nosplit
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 0e4086c7ef..47563f450e 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -188,16 +188,6 @@ func newdefer(siz int32) *_defer {
d = (*_defer)(mallocgc(total, deferType, 0))
}
d.siz = siz
- if mheap_.shadow_enabled {
- // This memory will be written directly, with no write barrier,
- // and then scanned like stacks during collection.
- // Unlike real stacks, it is from heap spans, so mark the
- // shadow as explicitly unusable.
- p := deferArgs(d)
- for i := uintptr(0); i+ptrSize <= uintptr(siz); i += ptrSize {
- writebarrierptr_noshadow((*uintptr)(add(p, i)))
- }
- }
gp := mp.curg
d.link = gp._defer
gp._defer = d
@@ -214,12 +204,6 @@ func freedefer(d *_defer) {
if d.fn != nil {
freedeferfn()
}
- if mheap_.shadow_enabled {
- // Undo the marking in newdefer.
- systemstack(func() {
- clearshadow(uintptr(deferArgs(d)), uintptr(d.siz))
- })
- }
sc := deferclass(uintptr(d.siz))
if sc < uintptr(len(p{}.deferpool)) {
mp := acquirem()
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index b3d0ae9b64..4290edb7be 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -442,35 +442,33 @@ func writeHeap(w io.Writer, debug int) error {
// Print memstats information too.
// Pprof will ignore, but useful for people
- if debug > 0 {
- s := new(runtime.MemStats)
- runtime.ReadMemStats(s)
- fmt.Fprintf(w, "\n# runtime.MemStats\n")
- fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
- fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
- fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
- fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
- fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
- fmt.Fprintf(w, "# Frees = %d\n", s.Frees)
+ s := new(runtime.MemStats)
+ runtime.ReadMemStats(s)
+ fmt.Fprintf(w, "\n# runtime.MemStats\n")
+ fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
+ fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
+ fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
+ fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
+ fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
+ fmt.Fprintf(w, "# Frees = %d\n", s.Frees)
- fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
- fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
- fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
- fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
- fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased)
- fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects)
+ fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
+ fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
+ fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
+ fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
+ fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased)
+ fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects)
- fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
- fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
- fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
- fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
+ fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
+ fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
+ fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
+ fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
- fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
- fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
- fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
- fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC)
- fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
- }
+ fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
+ fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
+ fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
+ fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC)
+ fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
if tw != nil {
tw.Flush()
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index f725fc890b..805b96e627 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -203,7 +203,7 @@ func acquireSudog() *sudog {
// acquireSudog, acquireSudog calls new(sudog),
// new calls malloc, malloc can call the garbage collector,
// and the garbage collector calls the semaphore implementation
- // in stoptheworld.
+ // in stopTheWorld.
// Break the cycle by doing acquirem/releasem around new(sudog).
// The acquirem/releasem increments m.locks during new(sudog),
// which keeps the garbage collector from being invoked.
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 00535da77d..c070f7d773 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -59,7 +59,6 @@ func schedinit() {
goargs()
goenvs()
parsedebugvars()
- wbshadowinit()
gcinit()
sched.lastpoll = uint64(nanotime())
@@ -212,7 +211,7 @@ func helpgc(nproc int32) {
// sched.stopwait to in order to request that all Gs permanently stop.
const freezeStopWait = 0x7fffffff
-// Similar to stoptheworld but best-effort and can be called several times.
+// Similar to stopTheWorld but best-effort and can be called several times.
// There is no reverse operation, used during crashing.
// This function must not lock any mutexes.
func freezetheworld() {
@@ -466,94 +465,68 @@ func stopscanstart(gp *g) {
}
}
-// Runs on g0 and does the actual work after putting the g back on the run queue.
-func mquiesce(gpmaster *g) {
- // enqueue the calling goroutine.
- restartg(gpmaster)
-
- activeglen := len(allgs)
- for i := 0; i < activeglen; i++ {
- gp := allgs[i]
- if readgstatus(gp) == _Gdead {
- gp.gcworkdone = true // noop scan.
- } else {
- gp.gcworkdone = false
- }
- stopscanstart(gp)
- }
-
- // Check that the G's gcwork (such as scanning) has been done. If not do it now.
- // You can end up doing work here if the page trap on a Grunning Goroutine has
- // not been sprung or in some race situations. For example a runnable goes dead
- // and is started up again with a gp->gcworkdone set to false.
- for i := 0; i < activeglen; i++ {
- gp := allgs[i]
- for !gp.gcworkdone {
- status := readgstatus(gp)
- if status == _Gdead {
- //do nothing, scan not needed.
- gp.gcworkdone = true // scan is a noop
- break
- }
- if status == _Grunning && gp.stackguard0 == uintptr(stackPreempt) && notetsleep(&sched.stopnote, 100*1000) { // nanosecond arg
- noteclear(&sched.stopnote)
- } else {
- stopscanstart(gp)
- }
- }
- }
-
- for i := 0; i < activeglen; i++ {
- gp := allgs[i]
- status := readgstatus(gp)
- if isscanstatus(status) {
- print("mstopandscang:bottom: post scan bad status gp=", gp, " has status ", hex(status), "\n")
- dumpgstatus(gp)
- }
- if !gp.gcworkdone && status != _Gdead {
- print("mstopandscang:bottom: post scan gp=", gp, "->gcworkdone still false\n")
- dumpgstatus(gp)
- }
- }
-
- schedule() // Never returns.
+// stopTheWorld stops all P's from executing goroutines, interrupting
+// all goroutines at GC safe points and records reason as the reason
+// for the stop. On return, only the current goroutine's P is running.
+// stopTheWorld must not be called from a system stack and the caller
+// must not hold worldsema. The caller must call startTheWorld when
+// other P's should resume execution.
+//
+// stopTheWorld is safe for multiple goroutines to call at the
+// same time. Each will execute its own stop, and the stops will
+// be serialized.
+//
+// This is also used by routines that do stack dumps. If the system is
+// in panic or being exited, this may not reliably stop all
+// goroutines.
+func stopTheWorld(reason string) {
+ semacquire(&worldsema, false)
+ getg().m.preemptoff = reason
+ systemstack(stopTheWorldWithSema)
}
-// quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point.
-// If the global gcphase is GCmark quiesce will ensure that all of the goroutine's stacks
-// have been scanned before it returns.
-func quiesce(mastergp *g) {
- castogscanstatus(mastergp, _Grunning, _Gscanenqueue)
- // Now move this to the g0 (aka m) stack.
- // g0 will potentially scan this thread and put mastergp on the runqueue
- mcall(mquiesce)
+// startTheWorld undoes the effects of stopTheWorld.
+func startTheWorld() {
+ systemstack(startTheWorldWithSema)
+ // worldsema must be held over startTheWorldWithSema to ensure
+ // gomaxprocs cannot change while worldsema is held.
+ semrelease(&worldsema)
+ getg().m.preemptoff = ""
}
-// Holding worldsema grants an M the right to try to stop the world.
-// The procedure is:
+// Holding worldsema grants an M the right to try to stop the world
+// and prevents gomaxprocs from changing concurrently.
+var worldsema uint32 = 1
+
+// stopTheWorldWithSema is the core implementation of stopTheWorld.
+// The caller is responsible for acquiring worldsema and disabling
+// preemption first and then should stopTheWorldWithSema on the system
+// stack:
//
-// semacquire(&worldsema);
-// m.preemptoff = "reason";
-// stoptheworld();
+// semacquire(&worldsema, false)
+// m.preemptoff = "reason"
+// systemstack(stopTheWorldWithSema)
//
-// ... do stuff ...
+// When finished, the caller must either call startTheWorld or undo
+// these three operations separately:
//
-// m.preemptoff = "";
-// semrelease(&worldsema);
-// starttheworld();
+// m.preemptoff = ""
+// systemstack(startTheWorldWithSema)
+// semrelease(&worldsema)
//
-var worldsema uint32 = 1
-
-// This is used by the GC as well as the routines that do stack dumps. In the case
-// of GC all the routines can be reliably stopped. This is not always the case
-// when the system is in panic or being exited.
-func stoptheworld() {
+// It is allowed to acquire worldsema once and then execute multiple
+// startTheWorldWithSema/stopTheWorldWithSema pairs.
+// Other P's are able to execute between successive calls to
+// startTheWorldWithSema and stopTheWorldWithSema.
+// Holding worldsema causes any other goroutines invoking
+// stopTheWorld to block.
+func stopTheWorldWithSema() {
_g_ := getg()
// If we hold a lock, then we won't be able to stop another M
// that is blocked trying to acquire the lock.
if _g_.m.locks > 0 {
- throw("stoptheworld: holding locks")
+ throw("stopTheWorld: holding locks")
}
lock(&sched.lock)
@@ -600,12 +573,12 @@ func stoptheworld() {
}
}
if sched.stopwait != 0 {
- throw("stoptheworld: not stopped")
+ throw("stopTheWorld: not stopped")
}
for i := 0; i < int(gomaxprocs); i++ {
p := allp[i]
if p.status != _Pgcstop {
- throw("stoptheworld: not stopped")
+ throw("stopTheWorld: not stopped")
}
}
}
@@ -615,7 +588,7 @@ func mhelpgc() {
_g_.m.helpgc = -1
}
-func starttheworld() {
+func startTheWorldWithSema() {
_g_ := getg()
_g_.m.locks++ // disable preemption because it can be holding p in a local var
@@ -644,7 +617,7 @@ func starttheworld() {
mp := p.m.ptr()
p.m = 0
if mp.nextp != 0 {
- throw("starttheworld: inconsistent mp->nextp")
+ throw("startTheWorld: inconsistent mp->nextp")
}
mp.nextp.set(p)
notewakeup(&mp.park)
@@ -754,10 +727,10 @@ func forEachP(fn func(*p)) {
_p_ := getg().m.p.ptr()
lock(&sched.lock)
- if sched.stopwait != 0 {
- throw("forEachP: sched.stopwait != 0")
+ if sched.safePointWait != 0 {
+ throw("forEachP: sched.safePointWait != 0")
}
- sched.stopwait = gomaxprocs - 1
+ sched.safePointWait = gomaxprocs - 1
sched.safePointFn = fn
// Ask all Ps to run the safe point function.
@@ -777,11 +750,11 @@ func forEachP(fn func(*p)) {
for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
if cas(&p.runSafePointFn, 1, 0) {
fn(p)
- sched.stopwait--
+ sched.safePointWait--
}
}
- wait := sched.stopwait > 0
+ wait := sched.safePointWait > 0
unlock(&sched.lock)
// Run fn for the current P.
@@ -807,15 +780,15 @@ func forEachP(fn func(*p)) {
for {
// Wait for 100us, then try to re-preempt in
// case of any races.
- if notetsleep(&sched.stopnote, 100*1000) {
- noteclear(&sched.stopnote)
+ if notetsleep(&sched.safePointNote, 100*1000) {
+ noteclear(&sched.safePointNote)
break
}
preemptall()
}
}
- if sched.stopwait != 0 {
- throw("forEachP: not stopped")
+ if sched.safePointWait != 0 {
+ throw("forEachP: not done")
}
for i := 0; i < int(gomaxprocs); i++ {
p := allp[i]
@@ -851,9 +824,9 @@ func runSafePointFn() {
}
sched.safePointFn(p)
lock(&sched.lock)
- sched.stopwait--
- if sched.stopwait == 0 {
- notewakeup(&sched.stopnote)
+ sched.safePointWait--
+ if sched.safePointWait == 0 {
+ notewakeup(&sched.safePointNote)
}
unlock(&sched.lock)
}
@@ -971,6 +944,7 @@ func needm(x byte) {
_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
_g_.stackguard0 = _g_.stack.lo + _StackGuard
+ msigsave(mp)
// Initialize this thread to use the m.
asminit()
minit()
@@ -1098,6 +1072,7 @@ func unlockextra(mp *m) {
func newm(fn func(), _p_ *p) {
mp := allocm(_p_, fn)
mp.nextp.set(_p_)
+ msigsave(mp)
if iscgo {
var ts cgothreadstart
if _cgo_thread_start == nil {
@@ -1226,9 +1201,9 @@ func handoffp(_p_ *p) {
}
if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) {
sched.safePointFn(_p_)
- sched.stopwait--
- if sched.stopwait == 0 {
- notewakeup(&sched.stopnote)
+ sched.safePointWait--
+ if sched.safePointWait == 0 {
+ notewakeup(&sched.safePointNote)
}
}
if sched.runqsize != 0 {
@@ -1305,7 +1280,7 @@ func startlockedm(gp *g) {
stopm()
}
-// Stops the current m for stoptheworld.
+// Stops the current m for stopTheWorld.
// Returns when the world is restarted.
func gcstopm() {
_g_ := getg()
@@ -1421,7 +1396,7 @@ top:
xadd(&sched.nmspinning, 1)
}
// random steal from other P's
- for i := 0; i < int(2*gomaxprocs); i++ {
+ for i := 0; i < int(4*gomaxprocs); i++ {
if sched.gcwaiting != 0 {
goto top
}
@@ -1430,18 +1405,20 @@ top:
if _p_ == _g_.m.p.ptr() {
gp, _ = runqget(_p_)
} else {
- gp = runqsteal(_g_.m.p.ptr(), _p_)
+ stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g
+ gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG)
}
if gp != nil {
return gp, false
}
}
+
stop:
- // We have nothing to do. If we're in the GC mark phaseand can
+ // We have nothing to do. If we're in the GC mark phase and can
// safely scan and blacken objects, run idle-time marking
// rather than give up the P.
- if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil {
+ if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil && gcMarkWorkAvailable(_p_) {
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
gp := _p_.gcBgMarkWorker
casgstatus(gp, _Gwaiting, _Grunnable)
@@ -2484,11 +2461,9 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
mp.mallocing++
// Define that a "user g" is a user-created goroutine, and a "system g"
- // is one that is m->g0 or m->gsignal. We've only made sure that we
- // can unwind user g's, so exclude the system g's.
+ // is one that is m->g0 or m->gsignal.
//
- // It is not quite as easy as testing gp == m->curg (the current user g)
- // because we might be interrupted for profiling halfway through a
+ // We might be interrupted for profiling halfway through a
// goroutine switch. The switch involves updating three (or four) values:
// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
// because once it gets updated the new g is running.
@@ -2497,8 +2472,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
// so the update only affects g, SP, and PC. Since PC must be last, there
// the possible partial transitions in ordinary execution are (1) g alone is updated,
// (2) both g and SP are updated, and (3) SP alone is updated.
- // If g is updated, we'll see a system g and not look closer.
- // If SP alone is updated, we can detect the partial transition by checking
+ // If SP or g alone is updated, we can detect the partial transition by checking
// whether the SP is within g's stack bounds. (We could also require that SP
// be changed only after g, but the stack bounds check is needed by other
// cases, so there is no need to impose an additional requirement.)
@@ -2527,15 +2501,11 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
// disabled, so a profiling signal cannot arrive then anyway.
//
// Third, the common case: it may be that the switch updates g, SP, and PC
- // separately, as in gogo.
- //
- // Because gogo is the only instance, we check whether the PC lies
- // within that function, and if so, not ask for a traceback. This approach
- // requires knowing the size of the gogo function, which we
- // record in arch_*.h and check in runtime_test.go.
+ // separately. If the PC is within any of the functions that does this,
+ // we don't ask for a traceback. C.F. the function setsSP for more about this.
//
// There is another apparently viable approach, recorded here in case
- // the "PC within gogo" check turns out not to be usable.
+ // the "PC within setsSP function" check turns out not to be usable.
// It would be possible to delay the update of either g or SP until immediately
// before the PC update instruction. Then, because of the stack bounds check,
// the only problematic interrupt point is just before that PC update instruction,
@@ -2556,28 +2526,23 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
// transition. We simply require that g and SP match and that the PC is not
// in gogo.
traceback := true
- gogo := funcPC(gogo)
- if gp == nil || gp != mp.curg ||
- sp < gp.stack.lo || gp.stack.hi < sp ||
- (gogo <= pc && pc < gogo+_RuntimeGogoBytes) {
+ if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
traceback = false
}
-
var stk [maxCPUProfStack]uintptr
n := 0
- if traceback {
- n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap)
+ if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
+ // Cgo, we can't unwind and symbolize arbitrary C code,
+ // so instead collect Go stack that leads to the cgo call.
+ // This is especially important on windows, since all syscalls are cgo calls.
+ n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
+ } else if traceback {
+ n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
}
if !traceback || n <= 0 {
// Normal traceback is impossible or has failed.
// See if it falls into several common cases.
n = 0
- if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
- // Cgo, we can't unwind and symbolize arbitrary C code,
- // so instead collect Go stack that leads to the cgo call.
- // This is especially important on windows, since all syscalls are cgo calls.
- n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
- }
if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
@@ -2612,6 +2577,30 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
mp.mallocing--
}
+// Reports whether a function will set the SP
+// to an absolute value. Important that
+// we don't traceback when these are at the bottom
+// of the stack since we can't be sure that we will
+// find the caller.
+//
+// If the function is not on the bottom of the stack
+// we assume that it will have set it up so that traceback will be consistent,
+// either by being a traceback terminating function
+// or putting one on the stack at the right offset.
+func setsSP(pc uintptr) bool {
+ f := findfunc(pc)
+ if f == nil {
+ // couldn't find the function for this PC,
+ // so assume the worst and stop traceback
+ return true
+ }
+ switch f.entry {
+ case gogoPC, systemstackPC, mcallPC, morestackPC:
+ return true
+ }
+ return false
+}
+
// Arrange to call fn with a traceback hz times a second.
func setcpuprofilerate_m(hz int32) {
// Force sane arguments.
@@ -3447,23 +3436,34 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
}
}
-// Grabs a batch of goroutines from local runnable queue.
-// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines.
+// Grabs a batch of goroutines from _p_'s runnable queue into batch.
+// Batch is a ring buffer starting at batchHead.
+// Returns number of grabbed goroutines.
// Can be executed by any P.
-func runqgrab(_p_ *p, batch []*g) uint32 {
+func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
for {
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
n := t - h
n = n - n/2
if n == 0 {
- // Try to steal from _p_.runnext.
- if next := _p_.runnext; next != 0 {
- if !_p_.runnext.cas(next, 0) {
- continue
+ if stealRunNextG {
+ // Try to steal from _p_.runnext.
+ if next := _p_.runnext; next != 0 {
+ // Sleep to ensure that _p_ isn't about to run the g we
+ // are about to steal.
+ // The important use case here is when the g running on _p_
+ // ready()s another g and then almost immediately blocks.
+ // Instead of stealing runnext in this window, back off
+ // to give _p_ a chance to schedule runnext. This will avoid
+ // thrashing gs between different Ps.
+ usleep(100)
+ if !_p_.runnext.cas(next, 0) {
+ continue
+ }
+ batch[batchHead%uint32(len(batch))] = next.ptr()
+ return 1
}
- batch[0] = next.ptr()
- return 1
}
return 0
}
@@ -3471,7 +3471,8 @@ func runqgrab(_p_ *p, batch []*g) uint32 {
continue
}
for i := uint32(0); i < n; i++ {
- batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
+ g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
+ batch[(batchHead+i)%uint32(len(batch))] = g
}
if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
return n
@@ -3482,26 +3483,21 @@ func runqgrab(_p_ *p, batch []*g) uint32 {
// Steal half of elements from local runnable queue of p2
// and put onto local runnable queue of p.
// Returns one of the stolen elements (or nil if failed).
-func runqsteal(_p_, p2 *p) *g {
- var batch [len(_p_.runq) / 2]*g
-
- n := runqgrab(p2, batch[:])
+func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
+ t := _p_.runqtail
+ n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
if n == 0 {
return nil
}
n--
- gp := batch[n]
+ gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
if n == 0 {
return gp
}
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
- t := _p_.runqtail
if t-h+n >= uint32(len(_p_.runq)) {
throw("runqsteal: runq overflow")
}
- for i := uint32(0); i < n; i++ {
- _p_.runq[(t+i)%uint32(len(_p_.runq))] = batch[i]
- }
atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
return gp
}
@@ -3528,20 +3524,16 @@ func testSchedLocalQueue() {
}
}
-var pSink *p
-
func testSchedLocalQueueSteal() {
p1 := new(p)
p2 := new(p)
- pSink = p1 // Force to heap, too large to allocate on system stack ("G0 stack")
- pSink = p2 // Force to heap, too large to allocate on system stack ("G0 stack")
gs := make([]g, len(p1.runq))
for i := 0; i < len(p1.runq); i++ {
for j := 0; j < i; j++ {
gs[j].sig = 0
runqput(p1, &gs[j], false)
}
- gp := runqsteal(p2, p1)
+ gp := runqsteal(p2, p1, true)
s := 0
if gp != nil {
s++
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 4c5712d32f..4471ee5afb 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -7,6 +7,7 @@ package runtime_test
import (
"math"
"runtime"
+ "runtime/debug"
"sync"
"sync/atomic"
"syscall"
@@ -104,8 +105,8 @@ func TestGoroutineParallelism(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
// If runtime triggers a forced GC during this test then it will deadlock,
// since the goroutines can't be stopped/preempted.
- // So give this test as much time as possible.
- runtime.GC()
+ // Disable GC for this test (see issue #10958).
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
for try := 0; try < N; try++ {
done := make(chan bool)
x := uint32(0)
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index fe7d38a39c..f4014b2e05 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -59,7 +59,7 @@ func TestGdbPython(t *testing.T) {
cmd := exec.Command("go", "build", "-o", "a.exe")
cmd.Dir = dir
- out, err := cmd.CombinedOutput()
+ out, err := testEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}
@@ -85,7 +85,7 @@ func TestGdbPython(t *testing.T) {
// stack frames on RISC architectures.
canBackTrace := false
switch runtime.GOARCH {
- case "amd64", "386":
+ case "amd64", "386", "ppc64", "ppc64le", "arm", "arm64":
canBackTrace = true
args = append(args,
"-ex", "echo BEGIN goroutine 2 bt\n",
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index ac539b9a9d..3ee5d5d29d 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -266,6 +266,7 @@ type m struct {
// Fields not known to debuggers.
procid uint64 // for debuggers, but offset not hard-coded
gsignal *g // signal-handling g
+ sigmask [4]uintptr // storage for saved signal mask
tls [4]uintptr // thread-local storage (for x86 extern register)
mstartfn func()
curg *g // current running goroutine
@@ -441,7 +442,9 @@ type schedt struct {
// safepointFn should be called on each P at the next GC
// safepoint if p.runSafePointFn is set.
- safePointFn func(*p)
+ safePointFn func(*p)
+ safePointWait int32
+ safePointNote note
profilehz int32 // cpu profiling rate
@@ -467,15 +470,16 @@ type sigtabtt struct {
}
const (
- _SigNotify = 1 << 0 // let signal.Notify have signal, even if from kernel
- _SigKill = 1 << 1 // if signal.Notify doesn't take it, exit quietly
- _SigThrow = 1 << 2 // if signal.Notify doesn't take it, exit loudly
- _SigPanic = 1 << 3 // if the signal is from the kernel, panic
- _SigDefault = 1 << 4 // if the signal isn't explicitly requested, don't monitor it
- _SigHandling = 1 << 5 // our signal handler is registered
- _SigIgnored = 1 << 6 // the signal was ignored before we registered for it
- _SigGoExit = 1 << 7 // cause all runtime procs to exit (only used on Plan 9).
- _SigSetStack = 1 << 8 // add SA_ONSTACK to libc handler
+ _SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel
+ _SigKill // if signal.Notify doesn't take it, exit quietly
+ _SigThrow // if signal.Notify doesn't take it, exit loudly
+ _SigPanic // if the signal is from the kernel, panic
+ _SigDefault // if the signal isn't explicitly requested, don't monitor it
+ _SigHandling // our signal handler is registered
+ _SigIgnored // the signal was ignored before we registered for it
+ _SigGoExit // cause all runtime procs to exit (only used on Plan 9).
+ _SigSetStack // add SA_ONSTACK to libc handler
+ _SigUnblock // unblocked in minit
)
// Layout of in-memory per-function information prepared by linker
@@ -594,8 +598,9 @@ type stkframe struct {
}
const (
- _TraceRuntimeFrames = 1 << 0 // include frames for internal runtime functions.
- _TraceTrap = 1 << 1 // the initial PC, SP are from a trap, not a return PC from a call
+ _TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions.
+ _TraceTrap // the initial PC, SP are from a trap, not a return PC from a call
+ _TraceJumpStack // if traceback is on a systemstack, resume trace at g that called into it
)
const (
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index d4cccbf084..f65562ab91 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -6,13 +6,8 @@ package runtime_test
import (
"io"
- "io/ioutil"
- "os"
- "os/exec"
. "runtime"
"runtime/debug"
- "strconv"
- "strings"
"testing"
"unsafe"
)
@@ -88,53 +83,6 @@ func BenchmarkDeferMany(b *testing.B) {
}
}
-// The profiling signal handler needs to know whether it is executing runtime.gogo.
-// The constant RuntimeGogoBytes in arch_*.h gives the size of the function;
-// we don't have a way to obtain it from the linker (perhaps someday).
-// Test that the constant matches the size determined by 'go tool nm -S'.
-// The value reported will include the padding between runtime.gogo and the
-// next function in memory. That's fine.
-func TestRuntimeGogoBytes(t *testing.T) {
- switch GOOS {
- case "android", "nacl":
- t.Skipf("skipping on %s", GOOS)
- case "darwin":
- switch GOARCH {
- case "arm", "arm64":
- t.Skipf("skipping on %s/%s, no fork", GOOS, GOARCH)
- }
- }
-
- dir, err := ioutil.TempDir("", "go-build")
- if err != nil {
- t.Fatalf("failed to create temp directory: %v", err)
- }
- defer os.RemoveAll(dir)
-
- out, err := exec.Command("go", "build", "-o", dir+"/hello", "../../test/helloworld.go").CombinedOutput()
- if err != nil {
- t.Fatalf("building hello world: %v\n%s", err, out)
- }
-
- out, err = exec.Command("go", "tool", "nm", "-size", dir+"/hello").CombinedOutput()
- if err != nil {
- t.Fatalf("go tool nm: %v\n%s", err, out)
- }
-
- for _, line := range strings.Split(string(out), "\n") {
- f := strings.Fields(line)
- if len(f) == 4 && f[3] == "runtime.gogo" {
- size, _ := strconv.Atoi(f[1])
- if GogoBytes() != int32(size) {
- t.Fatalf("RuntimeGogoBytes = %d, should be %d", GogoBytes(), size)
- }
- return
- }
- }
-
- t.Fatalf("go tool nm did not report size for runtime.gogo")
-}
-
// golang.org/issue/7063
func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
SetCPUProfileRate(0)
diff --git a/src/runtime/signal1_unix.go b/src/runtime/signal1_unix.go
index 7577d43a64..d3e9dac097 100644
--- a/src/runtime/signal1_unix.go
+++ b/src/runtime/signal1_unix.go
@@ -19,6 +19,19 @@ const (
// Signal forwarding is currently available only on Linux.
var fwdSig [_NSIG]uintptr
+// sigmask represents a general signal mask compatible with the GOOS
+// specific sigset types: the signal numbered x is represented by bit x-1
+// to match the representation expected by sigprocmask.
+type sigmask [(_NSIG + 31) / 32]uint32
+
+// channels for synchronizing signal mask updates with the signal mask
+// thread
+var (
+ disableSigChan chan uint32
+ enableSigChan chan uint32
+ maskUpdatedChan chan struct{}
+)
+
func initsig() {
// _NSIG is the number of signals on this operating system.
// sigtable should describe what to do for all the possible signals.
@@ -61,12 +74,17 @@ func sigenable(sig uint32) {
}
t := &sigtable[sig]
- if t.flags&_SigNotify != 0 && t.flags&_SigHandling == 0 {
- t.flags |= _SigHandling
- if getsig(int32(sig)) == _SIG_IGN {
- t.flags |= _SigIgnored
+ if t.flags&_SigNotify != 0 {
+ ensureSigM()
+ enableSigChan <- sig
+ <-maskUpdatedChan
+ if t.flags&_SigHandling == 0 {
+ t.flags |= _SigHandling
+ if getsig(int32(sig)) == _SIG_IGN {
+ t.flags |= _SigIgnored
+ }
+ setsig(int32(sig), funcPC(sighandler), true)
}
- setsig(int32(sig), funcPC(sighandler), true)
}
}
@@ -76,12 +94,17 @@ func sigdisable(sig uint32) {
}
t := &sigtable[sig]
- if t.flags&_SigNotify != 0 && t.flags&_SigHandling != 0 {
- t.flags &^= _SigHandling
- if t.flags&_SigIgnored != 0 {
- setsig(int32(sig), _SIG_IGN, true)
- } else {
- setsig(int32(sig), _SIG_DFL, true)
+ if t.flags&_SigNotify != 0 {
+ ensureSigM()
+ disableSigChan <- sig
+ <-maskUpdatedChan
+ if t.flags&_SigHandling != 0 {
+ t.flags &^= _SigHandling
+ if t.flags&_SigIgnored != 0 {
+ setsig(int32(sig), _SIG_IGN, true)
+ } else {
+ setsig(int32(sig), _SIG_DFL, true)
+ }
}
}
}
@@ -130,7 +153,52 @@ func crash() {
}
}
- unblocksignals()
+ updatesigmask(sigmask{})
setsig(_SIGABRT, _SIG_DFL, false)
raise(_SIGABRT)
}
+
+// createSigM starts one global, sleeping thread to make sure at least one thread
+// is available to catch signals enabled for os/signal.
+func ensureSigM() {
+ if maskUpdatedChan != nil {
+ return
+ }
+ maskUpdatedChan = make(chan struct{})
+ disableSigChan = make(chan uint32)
+ enableSigChan = make(chan uint32)
+ go func() {
+ // Signal masks are per-thread, so make sure this goroutine stays on one
+ // thread.
+ LockOSThread()
+ defer UnlockOSThread()
+ // The sigBlocked mask contains the signals not active for os/signal,
+ // initially all signals except the essential. When signal.Notify()/Stop is called,
+ // sigenable/sigdisable in turn notify this thread to update its signal
+ // mask accordingly.
+ var sigBlocked sigmask
+ for i := range sigBlocked {
+ sigBlocked[i] = ^uint32(0)
+ }
+ for i := range sigtable {
+ if sigtable[i].flags&_SigUnblock != 0 {
+ sigBlocked[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
+ }
+ }
+ updatesigmask(sigBlocked)
+ for {
+ select {
+ case sig := <-enableSigChan:
+ if b := sig - 1; b >= 0 {
+ sigBlocked[b/32] &^= (1 << (b & 31))
+ }
+ case sig := <-disableSigChan:
+ if b := sig - 1; b >= 0 {
+ sigBlocked[b/32] |= (1 << (b & 31))
+ }
+ }
+ updatesigmask(sigBlocked)
+ maskUpdatedChan <- struct{}{}
+ }
+ }()
+}
diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go
index 32ecce0d7d..6cd18653d5 100644
--- a/src/runtime/signal_darwin.go
+++ b/src/runtime/signal_darwin.go
@@ -16,14 +16,14 @@ var sigtable = [...]sigTabT{
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
- /* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
- /* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+ /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+ /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
- /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+ /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
/* 9 */ {0, "SIGKILL: kill"},
- /* 10 */ {_SigPanic, "SIGBUS: bus error"},
- /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+ /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+ /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
@@ -32,14 +32,14 @@ var sigtable = [...]sigTabT{
/* 17 */ {0, "SIGSTOP: stop"},
/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
/* 19 */ {0, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+ /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
- /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+ /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
diff --git a/src/runtime/signal_linux.go b/src/runtime/signal_linux.go
index f8250b9fa1..2f25b59663 100644
--- a/src/runtime/signal_linux.go
+++ b/src/runtime/signal_linux.go
@@ -16,20 +16,20 @@ var sigtable = [...]sigTabT{
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
- /* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
- /* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+ /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+ /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
- /* 7 */ {_SigPanic, "SIGBUS: bus error"},
- /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+ /* 7 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+ /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
/* 9 */ {0, "SIGKILL: kill"},
/* 10 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
- /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+ /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
/* 12 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
- /* 16 */ {_SigThrow, "SIGSTKFLT: stack fault"},
- /* 17 */ {_SigNotify, "SIGCHLD: child status has changed"},
+ /* 16 */ {_SigThrow + _SigUnblock, "SIGSTKFLT: stack fault"},
+ /* 17 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
/* 18 */ {0, "SIGCONT: continue"},
/* 19 */ {0, "SIGSTOP: stop, unblockable"},
/* 20 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
@@ -39,7 +39,7 @@ var sigtable = [...]sigTabT{
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
- /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+ /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
/* 29 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 30 */ {_SigNotify, "SIGPWR: power failure restart"},
diff --git a/src/runtime/signal_netbsd.go b/src/runtime/signal_netbsd.go
index 78afc59efa..d93a450d98 100644
--- a/src/runtime/signal_netbsd.go
+++ b/src/runtime/signal_netbsd.go
@@ -14,14 +14,14 @@ var sigtable = [...]sigTabT{
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
- /* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
- /* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+ /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction"},
+ /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap"},
/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
- /* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+ /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating-point exception"},
/* 9 */ {0, "SIGKILL: kill"},
- /* 10 */ {_SigPanic, "SIGBUS: bus error"},
- /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+ /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+ /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
@@ -30,14 +30,14 @@ var sigtable = [...]sigTabT{
/* 17 */ {0, "SIGSTOP: stop"},
/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
/* 19 */ {0, "SIGCONT: continue after stop"},
- /* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+ /* 20 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status has changed"},
/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
- /* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+ /* 27 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling alarm clock"},
/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go
index 2986c5aabc..d8ac676846 100644
--- a/src/runtime/signal_solaris.go
+++ b/src/runtime/signal_solaris.go
@@ -14,21 +14,21 @@ var sigtable = [...]sigTabT{
/* 1 */ {_SigNotify + _SigKill, "SIGHUP: hangup"},
/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt (rubout)"},
/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit (ASCII FS)"},
- /* 4 */ {_SigThrow, "SIGILL: illegal instruction (not reset when caught)"},
- /* 5 */ {_SigThrow, "SIGTRAP: trace trap (not reset when caught)"},
+ /* 4 */ {_SigThrow + _SigUnblock, "SIGILL: illegal instruction (not reset when caught)"},
+ /* 5 */ {_SigThrow + _SigUnblock, "SIGTRAP: trace trap (not reset when caught)"},
/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: used by abort, replace SIGIOT in the future"},
/* 7 */ {_SigThrow, "SIGEMT: EMT instruction"},
- /* 8 */ {_SigPanic, "SIGFPE: floating point exception"},
+ /* 8 */ {_SigPanic + _SigUnblock, "SIGFPE: floating point exception"},
/* 9 */ {0, "SIGKILL: kill (cannot be caught or ignored)"},
- /* 10 */ {_SigPanic, "SIGBUS: bus error"},
- /* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+ /* 10 */ {_SigPanic + _SigUnblock, "SIGBUS: bus error"},
+ /* 11 */ {_SigPanic + _SigUnblock, "SIGSEGV: segmentation violation"},
/* 12 */ {_SigThrow, "SIGSYS: bad argument to system call"},
/* 13 */ {_SigNotify, "SIGPIPE: write on a pipe with no one to read it"},
/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
/* 15 */ {_SigNotify + _SigKill, "SIGTERM: software termination signal from kill"},
/* 16 */ {_SigNotify, "SIGUSR1: user defined signal 1"},
/* 17 */ {_SigNotify, "SIGUSR2: user defined signal 2"},
- /* 18 */ {_SigNotify, "SIGCHLD: child status change alias (POSIX)"},
+ /* 18 */ {_SigNotify + _SigUnblock, "SIGCHLD: child status change alias (POSIX)"},
/* 19 */ {_SigNotify, "SIGPWR: power-fail restart"},
/* 20 */ {_SigNotify, "SIGWINCH: window size change"},
/* 21 */ {_SigNotify, "SIGURG: urgent socket condition"},
@@ -39,7 +39,7 @@ var sigtable = [...]sigTabT{
/* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background tty read attempted"},
/* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background tty write attempted"},
/* 28 */ {_SigNotify, "SIGVTALRM: virtual timer expired"},
- /* 29 */ {_SigNotify, "SIGPROF: profiling timer expired"},
+ /* 29 */ {_SigNotify + _SigUnblock, "SIGPROF: profiling timer expired"},
/* 30 */ {_SigNotify, "SIGXCPU: exceeded cpu limit"},
/* 31 */ {_SigNotify, "SIGXFSZ: exceeded file size limit"},
/* 32 */ {_SigNotify, "SIGWAITING: reserved signal no longer used by"},
diff --git a/src/runtime/signal_windows.go b/src/runtime/signal_windows.go
index da8a1c5801..b2fce53534 100644
--- a/src/runtime/signal_windows.go
+++ b/src/runtime/signal_windows.go
@@ -131,7 +131,9 @@ func lastcontinuehandler(info *exceptionrecord, r *context, gp *g) int32 {
print("PC=", hex(r.ip()), "\n")
if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
- print("signal arrived during cgo execution\n")
+ if iscgo {
+ print("signal arrived during external code execution\n")
+ }
gp = _g_.m.lockedg
}
print("\n")
diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go
index 38f0a57b90..f000fabd1a 100644
--- a/src/runtime/sigqueue_plan9.go
+++ b/src/runtime/sigqueue_plan9.go
@@ -17,21 +17,29 @@ var sig struct {
sleeping bool
}
+type noteData struct {
+ s [_ERRMAX]byte
+ n int // n bytes of s are valid
+}
+
type noteQueue struct {
lock mutex
- data [qsize]*byte
+ data [qsize]noteData
ri int
wi int
full bool
}
+// It is not allowed to allocate memory in the signal handler.
func (q *noteQueue) push(item *byte) bool {
lock(&q.lock)
if q.full {
unlock(&q.lock)
return false
}
- q.data[q.wi] = item
+ s := gostringnocopy(item)
+ copy(q.data[q.wi].s[:], s)
+ q.data[q.wi].n = len(s)
q.wi++
if q.wi == qsize {
q.wi = 0
@@ -43,14 +51,15 @@ func (q *noteQueue) push(item *byte) bool {
return true
}
-func (q *noteQueue) pop() *byte {
+func (q *noteQueue) pop() string {
lock(&q.lock)
q.full = false
if q.ri == q.wi {
unlock(&q.lock)
- return nil
+ return ""
}
- item := q.data[q.ri]
+ note := &q.data[q.ri]
+ item := string(note.s[:note.n])
q.ri++
if q.ri == qsize {
q.ri = 0
@@ -86,8 +95,8 @@ func sendNote(s *byte) bool {
func signal_recv() string {
for {
note := sig.q.pop()
- if note != nil {
- return gostring(note)
+ if note != "" {
+ return note
}
lock(&sig.lock)
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 5ccc6592bf..79b611839d 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -84,10 +84,13 @@ func growslice(t *slicetype, old slice, n int) slice {
memclr(add(p, lenmem), capmem-lenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory.
- // TODO(rsc): Use memmove when !writeBarrierEnabled.
p = newarray(et, uintptr(newcap))
- for i := 0; i < old.len; i++ {
- typedmemmove(et, add(p, uintptr(i)*et.size), add(old.array, uintptr(i)*et.size))
+ if !writeBarrierEnabled {
+ memmove(p, old.array, lenmem)
+ } else {
+ for i := uintptr(0); i < lenmem; i += et.size {
+ typedmemmove(et, add(p, i), add(old.array, i))
+ }
}
}
diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go
index f74694b7e9..27427af955 100644
--- a/src/runtime/stack1.go
+++ b/src/runtime/stack1.go
@@ -352,6 +352,12 @@ func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) {
}
}
+// Information from the compiler about the layout of stack frames.
+type bitvector struct {
+ n int32 // # of bits
+ bytedata *uint8
+}
+
type gobitvector struct {
n uintptr
bytedata []uint8
@@ -381,20 +387,20 @@ func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f
print(" ", add(scanp, i*ptrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n")
}
if ptrbit(&bv, i) == 1 {
- p := *(*unsafe.Pointer)(add(scanp, i*ptrSize))
- up := uintptr(p)
- if f != nil && 0 < up && up < _PageSize && debug.invalidptr != 0 || up == poisonStack {
+ pp := (*uintptr)(add(scanp, i*ptrSize))
+ p := *pp
+ if f != nil && 0 < p && p < _PageSize && debug.invalidptr != 0 || p == poisonStack {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg().m.traceback = 2
- print("runtime: bad pointer in frame ", funcname(f), " at ", add(scanp, i*ptrSize), ": ", p, "\n")
+ print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
throw("invalid stack pointer")
}
- if minp <= up && up < maxp {
+ if minp <= p && p < maxp {
if stackDebug >= 3 {
print("adjust ptr ", p, " ", funcname(f), "\n")
}
- *(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(up + delta)
+ *pp = p + delta
}
}
}
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 25f5bf46fb..687f067cb9 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -32,6 +32,8 @@ const (
// moduledata records information about the layout of the executable
// image. It is written by the linker. Any changes here must be
// matched changes to the code in cmd/internal/ld/symtab.go:symtab.
+// moduledata is stored in read-only memory; none of the pointers here
+// are visible to the garbage collector.
type moduledata struct {
pclntable []byte
ftab []functab
@@ -48,18 +50,24 @@ type moduledata struct {
typelinks []*_type
- gcdatamask, gcbssmask bitvector
+ modulename string
+ modulehashes []modulehash
- // write barrier shadow data
- // 64-bit systems only, enabled by GODEBUG=wbshadow=1.
- // See also the shadow_* fields on mheap in mheap.go.
- shadow_data uintptr // data-addr + shadow_data = shadow data addr
- data_start uintptr // start of shadowed data addresses
- data_end uintptr // end of shadowed data addresses
+ gcdatamask, gcbssmask bitvector
next *moduledata
}
+// For each shared library a module links against, the linker creates an entry in the
+// moduledata.modulehashes slice containing the name of the module, the abi hash seen
+// at link time and a pointer to the runtime abi hash. These are checked in
+// moduledataverify1 below.
+type modulehash struct {
+ modulename string
+ linktimehash string
+ runtimehash *string
+}
+
var firstmoduledata moduledata // linker symbol
var lastmoduledatap *moduledata // linker symbol
@@ -124,6 +132,13 @@ func moduledataverify1(datap *moduledata) {
datap.maxpc != datap.ftab[nftab].entry {
throw("minpc or maxpc invalid")
}
+
+ for _, modulehash := range datap.modulehashes {
+ if modulehash.linktimehash != *modulehash.runtimehash {
+ println("abi mismatch detected between", datap.modulename, "and", modulehash.modulename)
+ throw("abi mismatch")
+ }
+ }
}
// FuncForPC returns a *Func describing the function that contains the
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 3b7501b9b4..6da7baddc5 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -132,10 +132,7 @@ type traceBuf struct {
func StartTrace() error {
// Stop the world, so that we can take a consistent snapshot
// of all goroutines at the beginning of the trace.
- semacquire(&worldsema, false)
- _g_ := getg()
- _g_.m.preemptoff = "start tracing"
- systemstack(stoptheworld)
+ stopTheWorld("start tracing")
// We are in stop-the-world, but syscalls can finish and write to trace concurrently.
// Exitsyscall could check trace.enabled long before and then suddenly wake up
@@ -146,9 +143,7 @@ func StartTrace() error {
if trace.enabled || trace.shutdown {
unlock(&trace.bufLock)
- _g_.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
return errorString("tracing is already enabled")
}
@@ -175,9 +170,7 @@ func StartTrace() error {
unlock(&trace.bufLock)
- _g_.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
return nil
}
@@ -186,19 +179,14 @@ func StartTrace() error {
func StopTrace() {
// Stop the world so that we can collect the trace buffers from all p's below,
// and also to avoid races with traceEvent.
- semacquire(&worldsema, false)
- _g_ := getg()
- _g_.m.preemptoff = "stop tracing"
- systemstack(stoptheworld)
+ stopTheWorld("stop tracing")
// See the comment in StartTrace.
lock(&trace.bufLock)
if !trace.enabled {
unlock(&trace.bufLock)
- _g_.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
return
}
@@ -236,9 +224,7 @@ func StopTrace() {
unlock(&trace.bufLock)
- _g_.m.preemptoff = ""
- semrelease(&worldsema)
- systemstack(starttheworld)
+ startTheWorld()
// The world is started but we've set trace.shutdown, so new tracing can't start.
// Wait for the trace reader to flush pending buffers and stop.
@@ -428,9 +414,9 @@ func traceEvent(ev byte, skip int, args ...uint64) {
// The caller checked that trace.enabled == true, but trace.enabled might have been
// turned off between the check and now. Check again. traceLockBuffer did mp.locks++,
- // StopTrace does stoptheworld, and stoptheworld waits for mp.locks to go back to zero,
+ // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero,
// so if we see trace.enabled == true now, we know it's true for the rest of the function.
- // Exitsyscall can run even during stoptheworld. The race with StartTrace/StopTrace
+ // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace
// during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer.
if !trace.enabled {
traceReleaseBuffer(pid)
@@ -733,7 +719,7 @@ func traceProcStart() {
}
func traceProcStop(pp *p) {
- // Sysmon and stoptheworld can stop Ps blocked in syscalls,
+ // Sysmon and stopTheWorld can stop Ps blocked in syscalls,
// to handle this we temporary employ the P.
mp := acquirem()
oldp := mp.p
@@ -807,7 +793,7 @@ func traceGoSysExit(ts int64) {
}
func traceGoSysBlock(pp *p) {
- // Sysmon and stoptheworld can declare syscalls running on remote Ps as blocked,
+ // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked,
// to handle this we temporary employ the P.
mp := acquirem()
oldp := mp.p
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 9f34e37ea4..5ed601e6f3 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -46,6 +46,9 @@ var (
timerprocPC uintptr
gcBgMarkWorkerPC uintptr
systemstack_switchPC uintptr
+ systemstackPC uintptr
+
+ gogoPC uintptr
externalthreadhandlerp uintptr // initialized elsewhere
)
@@ -69,6 +72,10 @@ func tracebackinit() {
timerprocPC = funcPC(timerproc)
gcBgMarkWorkerPC = funcPC(gcBgMarkWorker)
systemstack_switchPC = funcPC(systemstack_switch)
+ systemstackPC = funcPC(systemstack)
+
+ // used by sigprof handler
+ gogoPC = funcPC(gogo)
}
// Traceback over the deferred function calls.
@@ -194,7 +201,14 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in
// Found an actual function.
// Derive frame pointer and link register.
if frame.fp == 0 {
- frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc))
+ // We want to jump over the systemstack switch. If we're running on the
+ // g0, this systemstack is at the top of the stack.
+ // if we're not on g0 or there's a no curg, then this is a regular call.
+ sp := frame.sp
+ if flags&_TraceJumpStack != 0 && f.entry == systemstackPC && gp == g.m.g0 && gp.m.curg != nil {
+ sp = gp.m.curg.sched.sp
+ }
+ frame.fp = sp + uintptr(funcspdelta(f, frame.pc))
if !usesLR {
// On x86, call instruction pushes return PC before entering new function.
frame.fp += regSize
@@ -455,7 +469,7 @@ func setArgInfo(frame *stkframe, f *_func, needArgMap bool) {
throw("reflect mismatch")
}
bv := (*bitvector)(unsafe.Pointer(fn[1]))
- frame.arglen = uintptr(bv.n / 2 * ptrSize)
+ frame.arglen = uintptr(bv.n * ptrSize)
frame.argmap = bv
}
}
@@ -517,9 +531,10 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags uint) {
func callers(skip int, pcbuf []uintptr) int {
sp := getcallersp(unsafe.Pointer(&skip))
pc := uintptr(getcallerpc(unsafe.Pointer(&skip)))
+ gp := getg()
var n int
systemstack(func() {
- n = gentraceback(pc, sp, 0, getg(), skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
+ n = gentraceback(pc, sp, 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
})
return n
}
diff --git a/src/runtime/type.go b/src/runtime/type.go
index 48df2a4382..45bdac8b91 100644
--- a/src/runtime/type.go
+++ b/src/runtime/type.go
@@ -20,17 +20,10 @@ type _type struct {
fieldalign uint8
kind uint8
alg *typeAlg
- // gc stores type info required for garbage collector.
- // If (kind&KindGCProg)==0, then gc[0] points at sparse GC bitmap
- // (no indirection), 4 bits per word.
- // If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated
- // read-only GC program; and gc[0] points to BSS space for sparse GC bitmap.
- // For huge types (>maxGCMask), runtime unrolls the program directly into
- // GC bitmap and gc[0] is not used. For moderately-sized types, runtime
- // unrolls the program into gc[0] space on first use. The first byte of gc[0]
- // (gc[0][0]) contains 'unroll' flag saying whether the program is already
- // unrolled into gc[0] or not.
- gc [2]uintptr
+ // gcdata stores the GC type data for the garbage collector.
+ // If the KindGCProg bit is set in kind, gcdata is a GC program.
+ // Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
+ gcdata *byte
_string *string
x *uncommontype
ptrto *_type