diff options
Diffstat (limited to 'src/internal/runtime')
| -rw-r--r-- | src/internal/runtime/gc/mksizeclasses.go | 357 | ||||
| -rw-r--r-- | src/internal/runtime/gc/sizeclasses.go | 99 |
2 files changed, 456 insertions, 0 deletions
diff --git a/src/internal/runtime/gc/mksizeclasses.go b/src/internal/runtime/gc/mksizeclasses.go new file mode 100644 index 0000000000..ea48cda469 --- /dev/null +++ b/src/internal/runtime/gc/mksizeclasses.go @@ -0,0 +1,357 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// Generate tables for small malloc size classes. +// +// See malloc.go for overview. +// +// The size classes are chosen so that rounding an allocation +// request up to the next size class wastes at most 12.5% (1.125x). +// +// Each size class has its own page count that gets allocated +// and chopped up when new objects of the size class are needed. +// That page count is chosen so that chopping up the run of +// pages into objects of the given size wastes at most 12.5% (1.125x) +// of the memory. It is not necessary that the cutoff here be +// the same as above. +// +// The two sources of waste multiply, so the worst possible case +// for the above constraints would be that allocations of some +// size might have a 26.6% (1.266x) overhead. +// In practice, only one of the wastes comes into play for a +// given size (sizes < 512 waste mainly on the round-up, +// sizes > 512 waste mainly on the page chopping). +// For really small sizes, alignment constraints force the +// overhead higher. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "io" + "log" + "math" + "math/bits" + "os" +) + +// Generate msize.go + +var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go") + +func main() { + flag.Parse() + + var b bytes.Buffer + fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.") + fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go") + fmt.Fprintln(&b) + fmt.Fprintln(&b, "package runtime") + classes := makeClasses() + + printComment(&b, classes) + + printClasses(&b, classes) + + out, err := format.Source(b.Bytes()) + if err != nil { + log.Fatal(err) + } + if *stdout { + _, err = os.Stdout.Write(out) + } else { + err = os.WriteFile("sizeclasses.go", out, 0666) + } + if err != nil { + log.Fatal(err) + } +} + +const ( + // Constants that we use and will transfer to the runtime. + minHeapAlign = 8 + maxSmallSize = 32 << 10 + smallSizeDiv = 8 + smallSizeMax = 1024 + largeSizeDiv = 128 + pageShift = 13 + + // Derived constants. + pageSize = 1 << pageShift +) + +type class struct { + size int // max size + npages int // number of pages +} + +func powerOfTwo(x int) bool { + return x != 0 && x&(x-1) == 0 +} + +func makeClasses() []class { + var classes []class + + classes = append(classes, class{}) // class #0 is a dummy entry + + align := minHeapAlign + for size := align; size <= maxSmallSize; size += align { + if powerOfTwo(size) { // bump alignment once in a while + if size >= 2048 { + align = 256 + } else if size >= 128 { + align = size / 8 + } else if size >= 32 { + align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes. + } + } + if !powerOfTwo(align) { + panic("incorrect alignment") + } + + // Make the allocnpages big enough that + // the leftover is less than 1/8 of the total, + // so wasted space is at most 12.5%. + allocsize := pageSize + for allocsize%size > allocsize/8 { + allocsize += pageSize + } + npages := allocsize / pageSize + + // If the previous sizeclass chose the same + // allocation size and fit the same number of + // objects into the page, we might as well + // use just this size instead of having two + // different sizes. + if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size { + classes[len(classes)-1].size = size + continue + } + classes = append(classes, class{size: size, npages: npages}) + } + + // Increase object sizes if we can fit the same number of larger objects + // into the same number of pages. For example, we choose size 8448 above + // with 6 objects in 7 pages. But we can well use object size 9472, + // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). + // We need to preserve at least largeSizeDiv alignment otherwise + // sizeToClass won't work. + for i := range classes { + if i == 0 { + continue + } + c := &classes[i] + psize := c.npages * pageSize + new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1) + if new_size > c.size { + c.size = new_size + } + } + + if len(classes) != 68 { + panic("number of size classes has changed") + } + + for i := range classes { + computeDivMagic(&classes[i]) + } + + return classes +} + +// computeDivMagic checks that the division required to compute object +// index from span offset can be computed using 32-bit multiplication. +// n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32 +// for all 0 <= n <= c.npages * pageSize +func computeDivMagic(c *class) { + // divisor + d := c.size + if d == 0 { + return + } + + // maximum input value for which the formula needs to work. + max := c.npages * pageSize + + // As reported in [1], if n and d are unsigned N-bit integers, we + // can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is + // computed with: + // + // Algorithm 2: Algorithm to select the number of fractional bits + // and the scaled approximate reciprocal in the case of unsigned + // integers. + // + // if d is a power of two then + // Let F ← log₂(d) and c = 1. + // else + // Let F ← N + L where L is the smallest integer + // such that d ≤ (2^(N+L) mod d) + 2^L. + // end if + // + // [1] "Faster Remainder by Direct Computation: Applications to + // Compilers and Software Libraries" Daniel Lemire, Owen Kaser, + // Nathan Kurz arXiv:1902.01961 + // + // To minimize the risk of introducing errors, we implement the + // algorithm exactly as stated, rather than trying to adapt it to + // fit typical Go idioms. + N := bits.Len(uint(max)) + var F int + if powerOfTwo(d) { + F = int(math.Log2(float64(d))) + if d != 1<<F { + panic("imprecise log2") + } + } else { + for L := 0; ; L++ { + if d <= ((1<<(N+L))%d)+(1<<L) { + F = N + L + break + } + } + } + + // Also, noted in the paper, F is the smallest number of fractional + // bits required. We use 32 bits, because it works for all size + // classes and is fast on all CPU architectures that we support. + if F > 32 { + fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F) + panic("size class requires more than 32 bits of precision") + } + + // Brute force double-check with the exact computation that will be + // done by the runtime. + m := ^uint32(0)/uint32(c.size) + 1 + for n := 0; n <= max; n++ { + if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) { + fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n) + panic("bad 32-bit multiply magic") + } + } +} + +func printComment(w io.Writer, classes []class) { + fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align") + prevSize := 0 + var minAligns [pageShift + 1]int + for i, c := range classes { + if i == 0 { + continue + } + spanSize := c.npages * pageSize + objects := spanSize / c.size + tailWaste := spanSize - c.size*(spanSize/c.size) + maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize) + alignBits := bits.TrailingZeros(uint(c.size)) + if alignBits > pageShift { + // object alignment is capped at page alignment + alignBits = pageShift + } + for i := range minAligns { + if i > alignBits { + minAligns[i] = 0 + } else if minAligns[i] == 0 { + minAligns[i] = c.size + } + } + prevSize = c.size + fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%% %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits) + } + fmt.Fprintf(w, "\n") + + fmt.Fprintf(w, "// %-9s %-4s %-12s\n", "alignment", "bits", "min obj size") + for bits, size := range minAligns { + if size == 0 { + break + } + if bits+1 < len(minAligns) && size == minAligns[bits+1] { + continue + } + fmt.Fprintf(w, "// %9d %4d %12d\n", 1<<bits, bits, size) + } + fmt.Fprintf(w, "\n") +} + +func maxObjsPerSpan(classes []class) int { + most := 0 + for _, c := range classes[1:] { + n := c.npages * pageSize / c.size + most = max(most, n) + } + return most +} + +func printClasses(w io.Writer, classes []class) { + fmt.Fprintln(w, "const (") + fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign) + fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize) + fmt.Fprintf(w, "SmallSizeDiv = %d\n", smallSizeDiv) + fmt.Fprintf(w, "SmallSizeMax = %d\n", smallSizeMax) + fmt.Fprintf(w, "LargeSizeDiv = %d\n", largeSizeDiv) + fmt.Fprintf(w, "NumSizeClasses = %d\n", len(classes)) + fmt.Fprintf(w, "PageShift = %d\n", pageShift) + fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes)) + fmt.Fprintln(w, ")") + + fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {") + for _, c := range classes { + fmt.Fprintf(w, "%d,", c.size) + } + fmt.Fprintln(w, "}") + + fmt.Fprint(w, "var SizeClassToNPages = [NumSizeClasses]uint8 {") + for _, c := range classes { + fmt.Fprintf(w, "%d,", c.npages) + } + fmt.Fprintln(w, "}") + + fmt.Fprint(w, "var SizeClassToDivMagic = [NumSizeClasses]uint32 {") + for _, c := range classes { + if c.size == 0 { + fmt.Fprintf(w, "0,") + continue + } + fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size) + } + fmt.Fprintln(w, "}") + + // map from size to size class, for small sizes. + sc := make([]int, smallSizeMax/smallSizeDiv+1) + for i := range sc { + size := i * smallSizeDiv + for j, c := range classes { + if c.size >= size { + sc[i] = j + break + } + } + } + fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {") + for _, v := range sc { + fmt.Fprintf(w, "%d,", v) + } + fmt.Fprintln(w, "}") + + // map from size to size class, for large sizes. + sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1) + for i := range sc { + size := smallSizeMax + i*largeSizeDiv + for j, c := range classes { + if c.size >= size { + sc[i] = j + break + } + } + } + fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {") + for _, v := range sc { + fmt.Fprintf(w, "%d,", v) + } + fmt.Fprintln(w, "}") +} diff --git a/src/internal/runtime/gc/sizeclasses.go b/src/internal/runtime/gc/sizeclasses.go new file mode 100644 index 0000000000..d2cca1cef1 --- /dev/null +++ b/src/internal/runtime/gc/sizeclasses.go @@ -0,0 +1,99 @@ +// Code generated by mksizeclasses.go; DO NOT EDIT. +//go:generate go run mksizeclasses.go + +package gc + +// class bytes/obj bytes/span objects tail waste max waste min align +// 1 8 8192 1024 0 87.50% 8 +// 2 16 8192 512 0 43.75% 16 +// 3 24 8192 341 8 29.24% 8 +// 4 32 8192 256 0 21.88% 32 +// 5 48 8192 170 32 31.52% 16 +// 6 64 8192 128 0 23.44% 64 +// 7 80 8192 102 32 19.07% 16 +// 8 96 8192 85 32 15.95% 32 +// 9 112 8192 73 16 13.56% 16 +// 10 128 8192 64 0 11.72% 128 +// 11 144 8192 56 128 11.82% 16 +// 12 160 8192 51 32 9.73% 32 +// 13 176 8192 46 96 9.59% 16 +// 14 192 8192 42 128 9.25% 64 +// 15 208 8192 39 80 8.12% 16 +// 16 224 8192 36 128 8.15% 32 +// 17 240 8192 34 32 6.62% 16 +// 18 256 8192 32 0 5.86% 256 +// 19 288 8192 28 128 12.16% 32 +// 20 320 8192 25 192 11.80% 64 +// 21 352 8192 23 96 9.88% 32 +// 22 384 8192 21 128 9.51% 128 +// 23 416 8192 19 288 10.71% 32 +// 24 448 8192 18 128 8.37% 64 +// 25 480 8192 17 32 6.82% 32 +// 26 512 8192 16 0 6.05% 512 +// 27 576 8192 14 128 12.33% 64 +// 28 640 8192 12 512 15.48% 128 +// 29 704 8192 11 448 13.93% 64 +// 30 768 8192 10 512 13.94% 256 +// 31 896 8192 9 128 15.52% 128 +// 32 1024 8192 8 0 12.40% 1024 +// 33 1152 8192 7 128 12.41% 128 +// 34 1280 8192 6 512 15.55% 256 +// 35 1408 16384 11 896 14.00% 128 +// 36 1536 8192 5 512 14.00% 512 +// 37 1792 16384 9 256 15.57% 256 +// 38 2048 8192 4 0 12.45% 2048 +// 39 2304 16384 7 256 12.46% 256 +// 40 2688 8192 3 128 15.59% 128 +// 41 3072 24576 8 0 12.47% 1024 +// 42 3200 16384 5 384 6.22% 128 +// 43 3456 24576 7 384 8.83% 128 +// 44 4096 8192 2 0 15.60% 4096 +// 45 4864 24576 5 256 16.65% 256 +// 46 5376 16384 3 256 10.92% 256 +// 47 6144 24576 4 0 12.48% 2048 +// 48 6528 32768 5 128 6.23% 128 +// 49 6784 40960 6 256 4.36% 128 +// 50 6912 49152 7 768 3.37% 256 +// 51 8192 8192 1 0 15.61% 8192 +// 52 9472 57344 6 512 14.28% 256 +// 53 9728 49152 5 512 3.64% 512 +// 54 10240 40960 4 0 4.99% 2048 +// 55 10880 32768 3 128 6.24% 128 +// 56 12288 24576 2 0 11.45% 4096 +// 57 13568 40960 3 256 9.99% 256 +// 58 14336 57344 4 0 5.35% 2048 +// 59 16384 16384 1 0 12.49% 8192 +// 60 18432 73728 4 0 11.11% 2048 +// 61 19072 57344 3 128 3.57% 128 +// 62 20480 40960 2 0 6.87% 4096 +// 63 21760 65536 3 256 6.25% 256 +// 64 24576 24576 1 0 11.45% 8192 +// 65 27264 81920 3 128 10.00% 128 +// 66 28672 57344 2 0 4.91% 4096 +// 67 32768 32768 1 0 12.50% 8192 + +// alignment bits min obj size +// 8 3 8 +// 16 4 32 +// 32 5 256 +// 64 6 512 +// 128 7 768 +// 4096 12 28672 +// 8192 13 32768 + +const ( + MinHeapAlign = 8 + MaxSmallSize = 32768 + SmallSizeDiv = 8 + SmallSizeMax = 1024 + LargeSizeDiv = 128 + NumSizeClasses = 68 + PageShift = 13 + MaxObjsPerSpan = 1024 +) + +var SizeClassToSize = [NumSizeClasses]uint16{0, 8, 16, 24, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768} +var SizeClassToNPages = [NumSizeClasses]uint8{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2, 3, 4, 5, 6, 1, 7, 6, 5, 4, 3, 5, 7, 2, 9, 7, 5, 8, 3, 10, 7, 4} +var SizeClassToDivMagic = [NumSizeClasses]uint32{0, ^uint32(0)/8 + 1, ^uint32(0)/16 + 1, ^uint32(0)/24 + 1, ^uint32(0)/32 + 1, ^uint32(0)/48 + 1, ^uint32(0)/64 + 1, ^uint32(0)/80 + 1, ^uint32(0)/96 + 1, ^uint32(0)/112 + 1, ^uint32(0)/128 + 1, ^uint32(0)/144 + 1, ^uint32(0)/160 + 1, ^uint32(0)/176 + 1, ^uint32(0)/192 + 1, ^uint32(0)/208 + 1, ^uint32(0)/224 + 1, ^uint32(0)/240 + 1, ^uint32(0)/256 + 1, ^uint32(0)/288 + 1, ^uint32(0)/320 + 1, ^uint32(0)/352 + 1, ^uint32(0)/384 + 1, ^uint32(0)/416 + 1, ^uint32(0)/448 + 1, ^uint32(0)/480 + 1, ^uint32(0)/512 + 1, ^uint32(0)/576 + 1, ^uint32(0)/640 + 1, ^uint32(0)/704 + 1, ^uint32(0)/768 + 1, ^uint32(0)/896 + 1, ^uint32(0)/1024 + 1, ^uint32(0)/1152 + 1, ^uint32(0)/1280 + 1, ^uint32(0)/1408 + 1, ^uint32(0)/1536 + 1, ^uint32(0)/1792 + 1, ^uint32(0)/2048 + 1, ^uint32(0)/2304 + 1, ^uint32(0)/2688 + 1, ^uint32(0)/3072 + 1, ^uint32(0)/3200 + 1, ^uint32(0)/3456 + 1, ^uint32(0)/4096 + 1, ^uint32(0)/4864 + 1, ^uint32(0)/5376 + 1, ^uint32(0)/6144 + 1, ^uint32(0)/6528 + 1, ^uint32(0)/6784 + 1, ^uint32(0)/6912 + 1, ^uint32(0)/8192 + 1, ^uint32(0)/9472 + 1, ^uint32(0)/9728 + 1, ^uint32(0)/10240 + 1, ^uint32(0)/10880 + 1, ^uint32(0)/12288 + 1, ^uint32(0)/13568 + 1, ^uint32(0)/14336 + 1, ^uint32(0)/16384 + 1, ^uint32(0)/18432 + 1, ^uint32(0)/19072 + 1, ^uint32(0)/20480 + 1, ^uint32(0)/21760 + 1, ^uint32(0)/24576 + 1, ^uint32(0)/27264 + 1, ^uint32(0)/28672 + 1, ^uint32(0)/32768 + 1} +var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv + 1]uint8{0, 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32} +var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv + 1]uint8{32, 33, 34, 35, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 41, 41, 41, 42, 43, 43, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67} |
