aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/malloc.go
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2017-12-19 22:05:23 -0800
committerAustin Clements <austin@google.com>2018-02-15 21:12:23 +0000
commit2b415549b813ba36caafa34fc34d72e47ee8335c (patch)
treea168125e11269c5c3f74c95b6012c13fbe2f98ae /src/runtime/malloc.go
parent45ffeab549fa4b03b231a0872025364e13c7f7f0 (diff)
downloadgo-2b415549b813ba36caafa34fc34d72e47ee8335c.tar.xz
runtime: use sparse mappings for the heap
This replaces the contiguous heap arena mapping with a potentially sparse mapping that can support heap mappings anywhere in the address space. This has several advantages over the current approach: * There is no longer any limit on the size of the Go heap. (Currently it's limited to 512GB.) Hence, this fixes #10460. * It eliminates many failures modes of heap initialization and growing. In particular it eliminates any possibility of panicking with an address space conflict. This can happen for many reasons and even causes a low but steady rate of TSAN test failures because of conflicts with the TSAN runtime. See #16936 and #11993. * It eliminates the notion of "non-reserved" heap, which was added because creating huge address space reservations (particularly on 64-bit) led to huge process VSIZE. This was at best confusing and at worst conflicted badly with ulimit -v. However, the non-reserved heap logic is complicated, can race with other mappings in non-pure Go binaries (e.g., #18976), and requires that the entire heap be either reserved or non-reserved. We currently maintain the latter property, but it's quite difficult to convince yourself of that, and hence difficult to keep correct. This logic is still present, but will be removed in the next CL. * It fixes problems on 32-bit where skipping over parts of the address space leads to mapping huge (and never-to-be-used) metadata structures. See #19831. This also completely rewrites and significantly simplifies mheap.sysAlloc, which has been a source of many bugs. E.g., #21044, #20259, #18651, and #13143 (and maybe #23222). This change also makes it possible to allocate individual objects larger than 512GB. As a result, a few tests that expected huge allocations to fail needed to be changed to make even larger allocations. However, at the moment attempting to allocate a humongous object may cause the program to freeze for several minutes on Linux as we fall back to probing every page with addrspace_free. That logic (and this failure mode) will be removed in the next CL. Fixes #10460. Fixes #22204 (since it rewrites the code involved). This slightly slows down compilebench and the x/benchmarks garbage benchmark. name old time/op new time/op delta Template 184ms ± 1% 185ms ± 1% ~ (p=0.065 n=10+9) Unicode 86.9ms ± 3% 86.3ms ± 1% ~ (p=0.631 n=10+10) GoTypes 599ms ± 0% 602ms ± 0% +0.56% (p=0.000 n=10+9) Compiler 2.87s ± 1% 2.89s ± 1% +0.51% (p=0.002 n=9+10) SSA 7.29s ± 1% 7.25s ± 1% ~ (p=0.182 n=10+9) Flate 118ms ± 2% 118ms ± 1% ~ (p=0.113 n=9+9) GoParser 147ms ± 1% 148ms ± 1% +1.07% (p=0.003 n=9+10) Reflect 401ms ± 1% 404ms ± 1% +0.71% (p=0.003 n=10+9) Tar 175ms ± 1% 175ms ± 1% ~ (p=0.604 n=9+10) XML 209ms ± 1% 210ms ± 1% ~ (p=0.052 n=10+10) (https://perf.golang.org/search?q=upload:20171231.4) name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.23ms ± 1% 2.25ms ± 1% +0.84% (p=0.000 n=19+19) (https://perf.golang.org/search?q=upload:20171231.3) Relative to the start of the sparse heap changes (starting at and including "runtime: fix various contiguous bitmap assumptions"), overall slowdown is roughly 1% on GC-intensive benchmarks: name old time/op new time/op delta Template 183ms ± 1% 185ms ± 1% +1.32% (p=0.000 n=9+9) Unicode 84.9ms ± 2% 86.3ms ± 1% +1.65% (p=0.000 n=9+10) GoTypes 595ms ± 1% 602ms ± 0% +1.19% (p=0.000 n=9+9) Compiler 2.86s ± 0% 2.89s ± 1% +0.91% (p=0.000 n=9+10) SSA 7.19s ± 0% 7.25s ± 1% +0.75% (p=0.000 n=8+9) Flate 117ms ± 1% 118ms ± 1% +1.10% (p=0.000 n=10+9) GoParser 146ms ± 2% 148ms ± 1% +1.48% (p=0.002 n=10+10) Reflect 398ms ± 1% 404ms ± 1% +1.51% (p=0.000 n=10+9) Tar 173ms ± 1% 175ms ± 1% +1.17% (p=0.000 n=10+10) XML 208ms ± 1% 210ms ± 1% +0.62% (p=0.011 n=10+10) [Geo mean] 369ms 373ms +1.17% (https://perf.golang.org/search?q=upload:20180101.2) name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.22ms ± 1% 2.25ms ± 1% +1.51% (p=0.000 n=20+19) (https://perf.golang.org/search?q=upload:20180101.3) Change-Id: I5daf4cfec24b252e5a57001f0a6c03f22479d0f0 Reviewed-on: https://go-review.googlesource.com/85887 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
Diffstat (limited to 'src/runtime/malloc.go')
-rw-r--r--src/runtime/malloc.go507
1 files changed, 281 insertions, 226 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index a95a7fffde..02c0be6690 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -78,9 +78,32 @@
//
// 3. We don't zero pages that never get reused.
+// Virtual memory layout
+//
+// The heap consists of a set of arenas, which are 64MB on 64-bit and
+// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also
+// aligned to the arena size.
+//
+// Each arena has an associated heapArena object that stores the
+// metadata for that arena: the heap bitmap for all words in the arena
+// and the span map for all pages in the arena. heapArena objects are
+// themselves allocated off-heap.
+//
+// Since arenas are aligned, the address space can be viewed as a
+// series of arena frames. The arena index (mheap_.arenas) maps from
+// arena frame number to *heapArena, or nil for parts of the address
+// space not backed by the Go heap. Since arenas are large, the arena
+// index is just a single-level mapping.
+//
+// The arena index covers the entire possible address space, allowing
+// the Go heap to use any part of the address space. The allocator
+// attempts to keep arenas contiguous so that large spans (and hence
+// large objects) can cross arenas.
+
package runtime
import (
+ "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -113,9 +136,8 @@ const (
_TinySize = 16
_TinySizeClass = int8(2)
- _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
- _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
- _HeapAllocChunk = 1 << 20 // Chunk size for heap growth
+ _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
+ _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
// Per-P, per order stack segment cache size.
_StackCacheSize = 32 * 1024
@@ -134,26 +156,6 @@ const (
// plan9 | 4KB | 3
_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
- // Number of bits in page to span calculations (4k pages).
- // On Windows 64-bit we limit the arena to 32GB or 35 bits.
- // Windows counts memory used by page table into committed memory
- // of the process, so we can't reserve too much memory.
- // See https://golang.org/issue/5402 and https://golang.org/issue/5236.
- // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
- // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
- // The only exception is mips32 which only has access to low 2GB of virtual memory.
- // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
- // but as most devices have less than 4GB of physical memory anyway, we
- // try to be conservative here, and only ask for a 2GB heap.
- _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
- _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
-
- // _MaxMem is the maximum heap arena size minus 1.
- //
- // On 32-bit, this is also the maximum heap pointer value,
- // since the arena starts at address 0.
- _MaxMem = 1<<_MHeapMap_TotalBits - 1
-
// memLimitBits is the maximum number of bits in a heap address.
//
// On 64-bit platforms, we limit this to 48 bits because that
@@ -174,14 +176,14 @@ const (
memLimitBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
// memLimit is one past the highest possible heap pointer value.
+ //
+ // This is also the maximum heap pointer value.
memLimit = 1 << memLimitBits
+ _MaxMem = memLimit - 1
// heapArenaBytes is the size of a heap arena. The heap
// consists of mappings of size heapArenaBytes, aligned to
// heapArenaBytes. The initial heap mapping is one arena.
- //
- // TODO: Right now only the bitmap is divided into separate
- // arenas, but shortly all of the heap will be.
heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit)
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
@@ -281,43 +283,53 @@ func mallocinit() {
throw("bad system page size")
}
- // The auxiliary regions start at p and are laid out in the
- // following order: spans, bitmap, arena.
- var p, pSize uintptr
- var reserved bool
+ // Map the arena index. Most of this will never be written to,
+ // so we don't account it.
+ var untracked uint64
+ mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
+ if mheap_.arenas == nil {
+ throw("failed to allocate arena index")
+ }
- // Set up the allocation arena, a contiguous area of memory where
- // allocated data will be found.
+ // Initialize the heap.
+ mheap_.init()
+ _g_ := getg()
+ _g_.m.mcache = allocmcache()
+
+ // Create initial arena growth hints.
if sys.PtrSize == 8 {
- // On a 64-bit machine, allocate from a single contiguous reservation.
- // 512 GB (MaxMem) should be big enough for now.
+ // On a 64-bit machine, we pick the following hints
+ // because:
+ //
+ // 1. Starting from the middle of the address space
+ // makes it easier to grow out a contiguous range
+ // without running in to some other mapping.
+ //
+ // 2. This makes Go heap addresses more easily
+ // recognizable when debugging.
//
- // The code will work with the reservation at any address, but ask
- // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
- // Allocating a 512 GB region takes away 39 bits, and the amd64
- // doesn't let us choose the top 17 bits, so that leaves the 9 bits
- // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
- // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
- // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+ // 3. Stack scanning in gccgo is still conservative,
+ // so it's important that addresses be distinguishable
+ // from other data.
+ //
+ // Starting at 0x00c0 means that the valid memory addresses
+ // will begin 0x00c0, 0x00c1, ...
+ // In little-endian, that's c0 00, c1 00, ... None of those are valid
// UTF-8 sequences, and they are otherwise as far away from
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
// on OS X during thread allocations. 0x00c0 causes conflicts with
// AddressSanitizer which reserves all memory up to 0x0100.
- // These choices are both for debuggability and to reduce the
- // odds of a conservative garbage collector (as is still used in gccgo)
+ // These choices reduce the odds of a conservative garbage collector
// not collecting memory because some non-pointer block of memory
// had a bit pattern that matched a memory address.
//
- // If this fails we fall back to the 32 bit memory mechanism
- //
// However, on arm64, we ignore all this advice above and slam the
// allocation at 0x40 << 32 because when using 4k pages with 3-level
// translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller.
- arenaSize := round(_MaxMem, _PageSize)
- pSize = arenaSize + _PageSize
- for i := 0; i <= 0x7f; i++ {
+ for i := 0x7f; i >= 0; i-- {
+ var p uintptr
switch {
case GOARCH == "arm64" && GOOS == "darwin":
p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
@@ -326,225 +338,240 @@ func mallocinit() {
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
- p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
- if p != 0 {
- break
- }
+ hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+ hint.addr = p
+ hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
- }
+ } else {
+ // On a 32-bit machine, we're much more concerned
+ // about keeping the usable heap contiguous.
+ // Hence:
+ //
+ // 1. We reserve space for all heapArenas up front so
+ // they don't get interleaved with the heap. They're
+ // ~258MB, so this isn't too bad. (We could reserve a
+ // smaller amount of space up front if this is a
+ // problem.)
+ //
+ // 2. We hint the heap to start right above the end of
+ // the binary so we have the best chance of keeping it
+ // contiguous.
+ //
+ // 3. We try to stake out a reasonably large initial
+ // heap reservation.
- if p == 0 {
- // On a 32-bit machine, we can't typically get away
- // with a giant virtual address space reservation.
- // Instead we map the memory information bitmap
- // immediately after the data segment, large enough
- // to handle the entire 4GB address space (256 MB),
- // along with a reservation for an initial arena.
- // When that gets used up, we'll start asking the kernel
- // for any memory anywhere.
+ const arenaMetaSize = unsafe.Sizeof(heapArena{}) * uintptr(len(*mheap_.arenas))
+ var reserved bool
+ meta := uintptr(sysReserve(nil, arenaMetaSize, &reserved))
+ if meta != 0 {
+ mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
+ }
// We want to start the arena low, but if we're linked
// against C code, it's possible global constructors
// have called malloc and adjusted the process' brk.
// Query the brk so we can avoid trying to map the
- // arena over it (which will cause the kernel to put
- // the arena somewhere else, likely at a high
+ // region over it (which will cause the kernel to put
+ // the region somewhere else, likely at a high
// address).
procBrk := sbrk0()
- // If we fail to allocate, try again with a smaller arena.
- // This is necessary on Android L where we share a process
- // with ART, which reserves virtual memory aggressively.
- // In the worst case, fall back to a 0-sized initial arena,
- // in the hope that subsequent reservations will succeed.
+ // If we ask for the end of the data segment but the
+ // operating system requires a little more space
+ // before we can start allocating, it will give out a
+ // slightly higher pointer. Except QEMU, which is
+ // buggy, as usual: it won't adjust the pointer
+ // upward. So adjust it upward a little bit ourselves:
+ // 1/4 MB to get away from the running binary image.
+ p := firstmoduledata.end
+ if p < procBrk {
+ p = procBrk
+ }
+ if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
+ p = mheap_.heapArenaAlloc.end
+ }
+ p = round(p+(256<<10), heapArenaBytes)
+ // Because we're worried about fragmentation on
+ // 32-bit, we try to make a large initial reservation.
arenaSizes := []uintptr{
512 << 20,
256 << 20,
128 << 20,
- 0,
}
-
for _, arenaSize := range arenaSizes {
- // SysReserve treats the address we ask for, end, as a hint,
- // not as an absolute requirement. If we ask for the end
- // of the data segment but the operating system requires
- // a little more space before we can start allocating, it will
- // give out a slightly higher pointer. Except QEMU, which
- // is buggy, as usual: it won't adjust the pointer upward.
- // So adjust it upward a little bit ourselves: 1/4 MB to get
- // away from the running binary image and then round up
- // to a MB boundary.
- p = round(firstmoduledata.end+(1<<18), 1<<20)
- pSize = arenaSize + _PageSize
- if p <= procBrk && procBrk < p+pSize {
- // Move the start above the brk,
- // leaving some room for future brk
- // expansion.
- p = round(procBrk+(1<<20), 1<<20)
- }
- p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
- if p != 0 {
+ a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes, &reserved)
+ if a != nil {
+ mheap_.arena.init(uintptr(a), size)
+ p = uintptr(a) + size // For hint below
break
}
}
- if p == 0 {
- throw("runtime: cannot reserve arena virtual address space")
- }
- }
-
- // PageSize can be larger than OS definition of page size,
- // so SysReserve can give us a PageSize-unaligned pointer.
- // To overcome this we ask for PageSize more and round up the pointer.
- p1 := round(p, _PageSize)
- pSize -= p1 - p
-
- if sys.PtrSize == 4 {
- // Set arena_start such that we can accept memory
- // reservations located anywhere in the 4GB virtual space.
- mheap_.arena_start = 0
- } else {
- mheap_.arena_start = p1
+ hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+ hint.addr = p
+ hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
- mheap_.arena_end = p + pSize
- mheap_.arena_used = p1
- mheap_.arena_alloc = p1
- mheap_.arena_reserved = reserved
-
- if mheap_.arena_start&(_PageSize-1) != 0 {
- println("bad pagesize", hex(p), hex(p1), hex(_PageSize), "start", hex(mheap_.arena_start))
- throw("misrounded allocation in mallocinit")
- }
-
- // Map the arena index. Most of this will never be touched.
- var untracked uint64
- mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
- if mheap_.arenas == nil {
- throw("failed to allocate arena index")
- }
-
- // Initialize the rest of the allocator.
- mheap_.init()
- _g_ := getg()
- _g_.m.mcache = allocmcache()
}
-// sysAlloc allocates the next n bytes from the heap arena. The
-// returned pointer is always _PageSize aligned and between
-// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
+// sysAlloc allocates heap arena space for at least n bytes. The
+// returned pointer is always heapArenaBytes-aligned and backed by
+// h.arenas metadata. The returned size is always a multiple of
+// heapArenaBytes. sysAlloc returns nil on failure.
// There is no corresponding free function.
-func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
- // strandLimit is the maximum number of bytes to strand from
- // the current arena block. If we would need to strand more
- // than this, we fall back to sysAlloc'ing just enough for
- // this allocation.
- const strandLimit = 16 << 20
+//
+// h must be locked.
+func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+ n = round(n, heapArenaBytes)
- if n > h.arena_end-h.arena_alloc {
- // If we haven't grown the arena to _MaxMem yet, try
- // to reserve some more address space.
- p_size := round(n+_PageSize, 256<<20)
- new_end := h.arena_end + p_size // Careful: can overflow
- if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem {
- // TODO: It would be bad if part of the arena
- // is reserved and part is not.
- var reserved bool
- p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
- if p == 0 {
- // TODO: Try smaller reservation
- // growths in case we're in a crowded
- // 32-bit address space.
- goto reservationFailed
- }
- // p can be just about anywhere in the address
- // space, including before arena_end.
- if p == h.arena_end {
- // The new block is contiguous with
- // the current block. Extend the
- // current arena block.
- h.arena_end = new_end
- h.arena_reserved = reserved
- } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit {
- // We were able to reserve more memory
- // within the arena space, but it's
- // not contiguous with our previous
- // reservation. It could be before or
- // after our current arena_used.
- //
- // Keep everything page-aligned.
- // Our pages are bigger than hardware pages.
- h.arena_end = p + p_size
- p = round(p, _PageSize)
- h.arena_alloc = p
- h.arena_reserved = reserved
- } else {
- // We got a mapping, but either
- //
- // 1) It's not in the arena, so we
- // can't use it. (This should never
- // happen on 32-bit.)
- //
- // 2) We would need to discard too
- // much of our current arena block to
- // use it.
- //
- // We haven't added this allocation to
- // the stats, so subtract it from a
- // fake stat (but avoid underflow).
- //
- // We'll fall back to a small sysAlloc.
- stat := uint64(p_size)
- sysFree(unsafe.Pointer(p), p_size, &stat)
+ // First, try the arena pre-reservation.
+ v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
+ if v != nil {
+ size = n
+ goto mapped
+ }
+
+ // Try to grow the heap at a hint address.
+ for h.arenaHints != nil {
+ hint := h.arenaHints
+ p := hint.addr
+ if hint.down {
+ p -= n
+ }
+ if p+n < p || p+n >= memLimit-1 {
+ // We can't use this, so don't ask.
+ v = nil
+ } else {
+ v = sysReserve(unsafe.Pointer(p), n, &h.arena_reserved)
+ }
+ if p == uintptr(v) {
+ // Success. Update the hint.
+ if !hint.down {
+ p += n
}
+ hint.addr = p
+ size = n
+ break
+ }
+ // Failed. Discard this hint and try the next.
+ //
+ // TODO: This would be cleaner if sysReserve could be
+ // told to only return the requested address. In
+ // particular, this is already how Windows behaves, so
+ // it would simply things there.
+ if v != nil {
+ sysFree(v, n, nil)
}
+ h.arenaHints = hint.next
+ h.arenaHintAlloc.free(unsafe.Pointer(hint))
}
- if n <= h.arena_end-h.arena_alloc {
- // Keep taking from our reservation.
- p := h.arena_alloc
- sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
- h.arena_alloc += n
- if h.arena_alloc > h.arena_used {
- h.setArenaUsed(h.arena_alloc, true)
+ if size == 0 {
+ // All of the hints failed, so we'll take any
+ // (sufficiently aligned) address the kernel will give
+ // us.
+ v, size = sysReserveAligned(nil, n, heapArenaBytes, &h.arena_reserved)
+ if v == nil {
+ return nil, 0
}
- if p&(_PageSize-1) != 0 {
- throw("misrounded allocation in MHeap_SysAlloc")
- }
- return unsafe.Pointer(p)
+ // Create new hints for extending this region.
+ hint := (*arenaHint)(h.arenaHintAlloc.alloc())
+ hint.addr, hint.down = uintptr(v), true
+ hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+ hint = (*arenaHint)(h.arenaHintAlloc.alloc())
+ hint.addr = uintptr(v) + size
+ hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
}
-reservationFailed:
- // If using 64-bit, our reservation is all we have.
- if sys.PtrSize != 4 {
- return nil
+ if v := uintptr(v); v+size < v || v+size >= memLimit-1 {
+ // This should be impossible on most architectures,
+ // but it would be really confusing to debug.
+ print("runtime: memory allocated by OS [", hex(v), ", ", hex(v+size), ") exceeds address space limit (", hex(int64(memLimit)), ")\n")
+ throw("memory reservation exceeds address space limit")
}
- // On 32-bit, once the reservation is gone we can
- // try to get memory at a location chosen by the OS.
- p_size := round(n, _PageSize) + _PageSize
- p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
- if p == 0 {
- return nil
+ if uintptr(v)&(heapArenaBytes-1) != 0 {
+ throw("misrounded allocation in sysAlloc")
}
- if p < h.arena_start || p+p_size-h.arena_start > _MaxMem {
- // This shouldn't be possible because _MaxMem is the
- // whole address space on 32-bit.
- top := uint64(h.arena_start) + _MaxMem
- print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
- sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
- return nil
+ // Back the reservation.
+ sysMap(v, size, h.arena_reserved, &memstats.heap_sys)
+
+mapped:
+ // Create arena metadata.
+ for ri := uintptr(v) / heapArenaBytes; ri < (uintptr(v)+size)/heapArenaBytes; ri++ {
+ if h.arenas[ri] != nil {
+ throw("arena already initialized")
+ }
+ var r *heapArena
+ r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+ if r == nil {
+ r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+ if r == nil {
+ throw("out of memory allocating heap arena metadata")
+ }
+ }
+
+ // Store atomically just in case an object from the
+ // new heap arena becomes visible before the heap lock
+ // is released (which shouldn't happen, but there's
+ // little downside to this).
+ atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
}
- p += -p & (_PageSize - 1)
- if p+n > h.arena_used {
- h.setArenaUsed(p+n, true)
+ // Tell the race detector about the new heap memory.
+ if raceenabled {
+ racemapshadow(v, size)
}
- if p&(_PageSize-1) != 0 {
- throw("misrounded allocation in MHeap_SysAlloc")
+ return
+}
+
+// sysReserveAligned is like sysReserve, but the returned pointer is
+// aligned to align bytes. It may reserve either n or n+align bytes,
+// so it returns the size that was reserved.
+func sysReserveAligned(v unsafe.Pointer, size, align uintptr, reserved *bool) (unsafe.Pointer, uintptr) {
+ // Since the alignment is rather large in uses of this
+ // function, we're not likely to get it by chance, so we ask
+ // for a larger region and remove the parts we don't need.
+ retries := 0
+retry:
+ p := uintptr(sysReserve(v, size+align, reserved))
+ switch {
+ case p == 0:
+ return nil, 0
+ case p&(align-1) == 0:
+ // We got lucky and got an aligned region, so we can
+ // use the whole thing.
+ return unsafe.Pointer(p), size + align
+ case GOOS == "windows":
+ // On Windows we can't release pieces of a
+ // reservation, so we release the whole thing and
+ // re-reserve the aligned sub-region. This may race,
+ // so we may have to try again.
+ sysFree(unsafe.Pointer(p), size+align, nil)
+ p = round(p, align)
+ p2 := sysReserve(unsafe.Pointer(p), size, reserved)
+ if p != uintptr(p2) {
+ // Must have raced. Try again.
+ sysFree(p2, size, nil)
+ if retries++; retries == 100 {
+ throw("failed to allocate aligned heap memory; too many retries")
+ }
+ goto retry
+ }
+ // Success.
+ return p2, size
+ default:
+ // Trim off the unaligned parts.
+ pAligned := round(p, align)
+ sysFree(unsafe.Pointer(p), pAligned-p, nil)
+ end := pAligned + size
+ endLen := (p + size + align) - end
+ if endLen > 0 {
+ sysFree(unsafe.Pointer(end), endLen, nil)
+ }
+ return unsafe.Pointer(pAligned), size
}
- return unsafe.Pointer(p)
}
// base address for all 0-byte allocations
@@ -1046,6 +1073,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
return p
}
+// linearAlloc is a simple linear allocator that pre-reserves a region
+// of memory and then maps that region as needed. The caller is
+// responsible for locking.
+type linearAlloc struct {
+ next uintptr // next free byte
+ mapped uintptr // one byte past end of mapped space
+ end uintptr // end of reserved space
+}
+
+func (l *linearAlloc) init(base, size uintptr) {
+ l.next, l.mapped = base, base
+ l.end = base + size
+}
+
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+ p := round(l.next, align)
+ if p+size > l.end {
+ return nil
+ }
+ l.next = p + size
+ if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+ // We need to map more of the reserved space.
+ sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, true, sysStat)
+ l.mapped = pEnd
+ }
+ return unsafe.Pointer(p)
+}
+
// notInHeap is off-heap memory allocated by a lower-level allocator
// like sysAlloc or persistentAlloc.
//