diff options
| author | Cherry Mui <cherryyz@google.com> | 2025-09-25 13:33:58 -0400 |
|---|---|---|
| committer | Cherry Mui <cherryyz@google.com> | 2025-09-25 13:33:59 -0400 |
| commit | a693ae1e9aebac896f6634583dbdd1cd319f3983 (patch) | |
| tree | 44ef04e84afe5ef8652222c5500ab6c779d09650 /src/runtime | |
| parent | 5a78e1a4a1c79185e86b5c18efffba2a9b9d3739 (diff) | |
| parent | d70ad4e740e24b4b76961c4b56d698fa23668aa2 (diff) | |
| download | go-a693ae1e9aebac896f6634583dbdd1cd319f3983.tar.xz | |
[dev.simd] all: merge master (d70ad4e) into dev.simd
Conflicts:
- src/cmd/compile/internal/types2/stdlib_test.go
- src/go/types/stdlib_test.go
Merge List:
+ 2025-09-25 d70ad4e740 sync/atomic: correct Uintptr.Or return doc
+ 2025-09-25 d7abfe4f0d runtime: acquire/release C TSAN lock when calling cgo symbolizer/tracebacker
+ 2025-09-25 393d91aea0 cmd/fix: remove all functionality
+ 2025-09-25 6dceff8bad cmd/link: handle -w flag in external linking mode
+ 2025-09-25 76d088eb74 cmd/internal/obj/riscv: remove ACFLWSP/ACFSWSP and ACFLW/ACFSW
+ 2025-09-25 5225e9dc49 doc/next: document new image/jpeg DCT in release notes
+ 2025-09-25 81a83bba21 cmd: update x/tools@4df13e3
+ 2025-09-25 6b32c613ca go/types: make typeset return an iterator
+ 2025-09-25 fbba930271 image/jpeg: replace fdct.go and idct.go with new implementation in dct.go
+ 2025-09-25 92e093467f image/jpeg: correct and test reference slowFDCT and slowIDCT
+ 2025-09-25 27c7bbc51c image/jpeg: prepare for new FDCT/IDCT implementations
+ 2025-09-24 f15cd63ec4 cmd/compile: don't rely on loop info when there are irreducible loops
+ 2025-09-24 371c1d2fcb cmd/internal/obj/riscv: add support for vector unit-stride fault-only-first load instructions
+ 2025-09-23 411c250d64 runtime: add specialized malloc functions for sizes up to 512 bytes
+ 2025-09-23 d7a38adf4c runtime: eliminate global span queue [green tea]
+ 2025-09-23 7bc1935db5 cmd/compile/internal: support new(expr)
+ 2025-09-23 eb78f13c9f doc/go_spec.html: document new(expr)
+ 2025-09-23 74cc463f9e go/token: add TestRemovedFileFileReturnsNil test
+ 2025-09-23 902dc27ae9 go/token: clear cache after grabbing the mutex in RemoveFile
+ 2025-09-23 a13d085a5b cmd/cgo: don't hardcode section name in TestNumberOfExportedFunctions
+ 2025-09-23 61bf26a9ee cmd/link: fix Macho-O X86_64_RELOC_SUBTRACTOR in internal linking
+ 2025-09-23 4b787c8c2b reflect: remove stale comment in unpackEface
+ 2025-09-23 3df27cd21a cmd/compile: fix typo in comment
+ 2025-09-23 684e8d3363 reflect: allocate memory in TypeAssert[I] only when the assertion succeeds
+ 2025-09-23 a5866ebe40 cmd/compile: prevent shapifying of pointer shape type
+ 2025-09-23 a27261c42f go/types,types2: allow new(expr)
+ 2025-09-23 e93f439ac4 runtime/cgo: retry when CreateThread fails with ERROR_ACCESS_DENIED
+ 2025-09-23 69e74b0aac runtime: deduplicate pMask resize code
+ 2025-09-23 fde10c4ce7 runtime: split gcMarkWorkAvailable into two separate conditions
+ 2025-09-23 5d040df092 runtime: use scan kernels in scanSpan [green tea]
+ 2025-09-23 7e0251bf58 runtime: don't report non-blocked goroutines as "(durable)" in stacks
+ 2025-09-23 22ac328856 cmd/link: make -w behavior consistent on Windows
Change-Id: Id76b5a30a3b6f6669437f97e3320c9bca65a1e96
Diffstat (limited to 'src/runtime')
34 files changed, 11838 insertions, 706 deletions
diff --git a/src/runtime/_mkmalloc/constants.go b/src/runtime/_mkmalloc/constants.go new file mode 100644 index 0000000000..ad20c7b52b --- /dev/null +++ b/src/runtime/_mkmalloc/constants.go @@ -0,0 +1,29 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +const ( + // Constants that we use and will transfer to the runtime. + minHeapAlign = 8 + maxSmallSize = 32 << 10 + smallSizeDiv = 8 + smallSizeMax = 1024 + largeSizeDiv = 128 + pageShift = 13 + tinySize = 16 + + // Derived constants. + pageSize = 1 << pageShift +) + +const ( + maxPtrSize = max(4, 8) + maxPtrBits = 8 * maxPtrSize + + // Maximum size smallScanNoHeader would be called for, which is the + // maximum value gc.MinSizeForMallocHeader can have on any platform. + // gc.MinSizeForMallocHeader is defined as goarch.PtrSize * goarch.PtrBits. + smallScanNoHeaderMax = maxPtrSize * maxPtrBits +) diff --git a/src/runtime/_mkmalloc/go.mod b/src/runtime/_mkmalloc/go.mod new file mode 100644 index 0000000000..623c341769 --- /dev/null +++ b/src/runtime/_mkmalloc/go.mod @@ -0,0 +1,5 @@ +module runtime/_mkmalloc + +go 1.24 + +require golang.org/x/tools v0.33.0 diff --git a/src/runtime/_mkmalloc/go.sum b/src/runtime/_mkmalloc/go.sum new file mode 100644 index 0000000000..bead5223ca --- /dev/null +++ b/src/runtime/_mkmalloc/go.sum @@ -0,0 +1,2 @@ +golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= diff --git a/src/runtime/_mkmalloc/mkmalloc.go b/src/runtime/_mkmalloc/mkmalloc.go new file mode 100644 index 0000000000..986b0aa9f8 --- /dev/null +++ b/src/runtime/_mkmalloc/mkmalloc.go @@ -0,0 +1,605 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/ast" + "go/format" + "go/parser" + "go/token" + "log" + "os" + "strings" + + "golang.org/x/tools/go/ast/astutil" + + internalastutil "runtime/_mkmalloc/astutil" +) + +var stdout = flag.Bool("stdout", false, "write sizeclasses source to stdout instead of sizeclasses.go") + +func makeSizeToSizeClass(classes []class) []uint8 { + sc := uint8(0) + ret := make([]uint8, smallScanNoHeaderMax+1) + for i := range ret { + if i > classes[sc].size { + sc++ + } + ret[i] = sc + } + return ret +} + +func main() { + log.SetFlags(0) + log.SetPrefix("mkmalloc: ") + + classes := makeClasses() + sizeToSizeClass := makeSizeToSizeClass(classes) + + if *stdout { + if _, err := os.Stdout.Write(mustFormat(generateSizeClasses(classes))); err != nil { + log.Fatal(err) + } + return + } + + sizeclasesesfile := "../../internal/runtime/gc/sizeclasses.go" + if err := os.WriteFile(sizeclasesesfile, mustFormat(generateSizeClasses(classes)), 0666); err != nil { + log.Fatal(err) + } + + outfile := "../malloc_generated.go" + if err := os.WriteFile(outfile, mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass))), 0666); err != nil { + log.Fatal(err) + } + + tablefile := "../malloc_tables_generated.go" + if err := os.WriteFile(tablefile, mustFormat(generateTable(sizeToSizeClass)), 0666); err != nil { + log.Fatal(err) + } +} + +// withLineNumbers returns b with line numbers added to help debugging. +func withLineNumbers(b []byte) []byte { + var buf bytes.Buffer + i := 1 + for line := range bytes.Lines(b) { + fmt.Fprintf(&buf, "%d: %s", i, line) + i++ + } + return buf.Bytes() +} + +// mustFormat formats the input source, or exits if there's an error. +func mustFormat(b []byte) []byte { + formatted, err := format.Source(b) + if err != nil { + log.Fatalf("error formatting source: %v\nsource:\n%s\n", err, withLineNumbers(b)) + } + return formatted +} + +// generatorConfig is the configuration for the generator. It uses the given file to find +// its templates, and generates each of the functions specified by specs. +type generatorConfig struct { + file string + specs []spec +} + +// spec is the specification for a function for the inliner to produce. The function gets +// the given name, and is produced by starting with the function with the name given by +// templateFunc and applying each of the ops. +type spec struct { + name string + templateFunc string + ops []op +} + +// replacementKind specifies the operation to ben done by a op. +type replacementKind int + +const ( + inlineFunc = replacementKind(iota) + subBasicLit +) + +// op is a single inlining operation for the inliner. Any calls to the function +// from are replaced with the inlined body of to. For non-functions, uses of from are +// replaced with the basic literal expression given by to. +type op struct { + kind replacementKind + from string + to string +} + +func smallScanNoHeaderSCFuncName(sc, scMax uint8) string { + if sc == 0 || sc > scMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", sc) +} + +func tinyFuncName(size uintptr) string { + if size == 0 || size > smallScanNoHeaderMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocTiny%d", size) +} + +func smallNoScanSCFuncName(sc, scMax uint8) string { + if sc < 2 || sc > scMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocgcSmallNoScanSC%d", sc) +} + +// specializedMallocConfig produces an inlining config to stamp out the definitions of the size-specialized +// malloc functions to be written by mkmalloc. +func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generatorConfig { + config := generatorConfig{file: "../malloc_stubs.go"} + + // Only generate specialized functions for sizes that don't have + // a header on 64-bit platforms. (They may have a header on 32-bit, but + // we will fall back to the non-specialized versions in that case) + scMax := sizeToSizeClass[smallScanNoHeaderMax] + + str := fmt.Sprint + + // allocations with pointer bits + { + const noscan = 0 + for sc := uint8(0); sc <= scMax; sc++ { + if sc == 0 { + continue + } + name := smallScanNoHeaderSCFuncName(sc, scMax) + elemsize := classes[sc].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "smallScanNoHeaderStub"}, + {inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"}, + {inlineFunc, "nextFreeFastStub", "nextFreeFastStub"}, + {inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(sc)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + } + + // allocations without pointer bits + { + const noscan = 1 + + // tiny + tinySizeClass := sizeToSizeClass[tinySize] + for s := range uintptr(16) { + if s == 0 { + continue + } + name := tinyFuncName(s) + elemsize := classes[tinySizeClass].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "tinyStub"}, + {inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(tinySizeClass)}, + {subBasicLit, "size_", str(s)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + + // non-tiny + for sc := uint8(tinySizeClass); sc <= scMax; sc++ { + name := smallNoScanSCFuncName(sc, scMax) + elemsize := classes[sc].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "smallNoScanStub"}, + {inlineFunc, "nextFreeFastStub", "nextFreeFastStub"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(sc)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + } + + return config +} + +// inline applies the inlining operations given by the config. +func inline(config generatorConfig) []byte { + var out bytes.Buffer + + // Read the template file in. + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, config.file, nil, 0) + if err != nil { + log.Fatalf("parsing %s: %v", config.file, err) + } + + // Collect the function and import declarations. The function + // declarations in the template file provide both the templates + // that will be stamped out, and the functions that will be inlined + // into them. The imports from the template file will be copied + // straight to the output. + funcDecls := map[string]*ast.FuncDecl{} + importDecls := []*ast.GenDecl{} + for _, decl := range f.Decls { + switch decl := decl.(type) { + case *ast.FuncDecl: + funcDecls[decl.Name.Name] = decl + case *ast.GenDecl: + if decl.Tok.String() == "import" { + importDecls = append(importDecls, decl) + continue + } + } + } + + // Write out the package and import declarations. + out.WriteString("// Code generated by mkmalloc.go; DO NOT EDIT.\n\n") + out.WriteString("package " + f.Name.Name + "\n\n") + for _, importDecl := range importDecls { + out.Write(mustFormatNode(fset, importDecl)) + out.WriteString("\n\n") + } + + // Produce each of the inlined functions specified by specs. + for _, spec := range config.specs { + // Start with a renamed copy of the template function. + containingFuncCopy := internalastutil.CloneNode(funcDecls[spec.templateFunc]) + if containingFuncCopy == nil { + log.Fatal("did not find", spec.templateFunc) + } + containingFuncCopy.Name.Name = spec.name + + // Apply each of the ops given by the specs + stamped := ast.Node(containingFuncCopy) + for _, repl := range spec.ops { + if toDecl, ok := funcDecls[repl.to]; ok { + stamped = inlineFunction(stamped, repl.from, toDecl) + } else { + stamped = substituteWithBasicLit(stamped, repl.from, repl.to) + } + } + + out.Write(mustFormatNode(fset, stamped)) + out.WriteString("\n\n") + } + + return out.Bytes() +} + +// substituteWithBasicLit recursively renames identifiers in the provided AST +// according to 'from' and 'to'. +func substituteWithBasicLit(node ast.Node, from, to string) ast.Node { + // The op is a substitution of an identifier with an basic literal. + toExpr, err := parser.ParseExpr(to) + if err != nil { + log.Fatalf("parsing expr %q: %v", to, err) + } + if _, ok := toExpr.(*ast.BasicLit); !ok { + log.Fatalf("op 'to' expr %q is not a basic literal", to) + } + return astutil.Apply(node, func(cursor *astutil.Cursor) bool { + if isIdentWithName(cursor.Node(), from) { + cursor.Replace(toExpr) + } + return true + }, nil) +} + +// inlineFunction recursively replaces calls to the function 'from' with the body of the function +// 'toDecl'. All calls to 'from' must appear in assignment statements. +// The replacement is very simple: it doesn't substitute the arguments for the parameters, so the +// arguments to the function call must be the same identifier as the parameters to the function +// declared by 'toDecl'. If there are any calls to from where that's not the case there will be a fatal error. +func inlineFunction(node ast.Node, from string, toDecl *ast.FuncDecl) ast.Node { + return astutil.Apply(node, func(cursor *astutil.Cursor) bool { + switch node := cursor.Node().(type) { + case *ast.AssignStmt: + // TODO(matloob) CHECK function args have same name + // as parameters (or parameter is "_"). + if len(node.Rhs) == 1 && isCallTo(node.Rhs[0], from) { + args := node.Rhs[0].(*ast.CallExpr).Args + if !argsMatchParameters(args, toDecl.Type.Params) { + log.Fatalf("applying op: arguments to %v don't match parameter names of %v: %v", from, toDecl.Name, debugPrint(args...)) + } + replaceAssignment(cursor, node, toDecl) + } + return false + case *ast.CallExpr: + // double check that all calls to from appear within an assignment + if isCallTo(node, from) { + if _, ok := cursor.Parent().(*ast.AssignStmt); !ok { + log.Fatalf("applying op: all calls to function %q being replaced must appear in an assignment statement, appears in %T", from, cursor.Parent()) + } + } + } + return true + }, nil) +} + +// argsMatchParameters reports whether the arguments given by args are all identifiers +// whose names are the same as the corresponding parameters in params. +func argsMatchParameters(args []ast.Expr, params *ast.FieldList) bool { + var paramIdents []*ast.Ident + for _, f := range params.List { + paramIdents = append(paramIdents, f.Names...) + } + + if len(args) != len(paramIdents) { + return false + } + + for i := range args { + if !isIdentWithName(args[i], paramIdents[i].Name) { + return false + } + } + + return true +} + +// isIdentWithName reports whether the expression is an identifier with the given name. +func isIdentWithName(expr ast.Node, name string) bool { + ident, ok := expr.(*ast.Ident) + if !ok { + return false + } + return ident.Name == name +} + +// isCallTo reports whether the expression is a call expression to the function with the given name. +func isCallTo(expr ast.Expr, name string) bool { + callexpr, ok := expr.(*ast.CallExpr) + if !ok { + return false + } + return isIdentWithName(callexpr.Fun, name) +} + +// replaceAssignment replaces an assignment statement where the right hand side is a function call +// whose arguments have the same names as the parameters to funcdecl with the body of funcdecl. +// It sets the left hand side of the assignment to the return values of the function. +func replaceAssignment(cursor *astutil.Cursor, assign *ast.AssignStmt, funcdecl *ast.FuncDecl) { + if !hasTerminatingReturn(funcdecl.Body) { + log.Fatal("function being inlined must have a return at the end") + } + + body := internalastutil.CloneNode(funcdecl.Body) + if hasTerminatingAndNonterminatingReturn(funcdecl.Body) { + // The function has multiple return points. Add the code that we'd continue with in the caller + // after each of the return points. The calling function must have a terminating return + // so we don't continue execution in the replaced function after we finish executing the + // continue block that we add. + body = addContinues(cursor, assign, body, everythingFollowingInParent(cursor)).(*ast.BlockStmt) + } + + if len(body.List) < 1 { + log.Fatal("replacing with empty bodied function") + } + + // The op happens in two steps: first we insert the body of the function being inlined (except for + // the final return) before the assignment, and then we change the assignment statement to replace the function call + // with the expressions being returned. + + // Determine the expressions being returned. + beforeReturn, ret := body.List[:len(body.List)-1], body.List[len(body.List)-1] + returnStmt, ok := ret.(*ast.ReturnStmt) + if !ok { + log.Fatal("last stmt in function we're replacing with should be a return") + } + results := returnStmt.Results + + // Insert the body up to the final return. + for _, stmt := range beforeReturn { + cursor.InsertBefore(stmt) + } + + // Rewrite the assignment statement. + replaceWithAssignment(cursor, assign.Lhs, results, assign.Tok) +} + +// hasTerminatingReturn reparts whether the block ends in a return statement. +func hasTerminatingReturn(block *ast.BlockStmt) bool { + _, ok := block.List[len(block.List)-1].(*ast.ReturnStmt) + return ok +} + +// hasTerminatingAndNonterminatingReturn reports whether the block ends in a return +// statement, and also has a return elsewhere in it. +func hasTerminatingAndNonterminatingReturn(block *ast.BlockStmt) bool { + if !hasTerminatingReturn(block) { + return false + } + var ret bool + for i := range block.List[:len(block.List)-1] { + ast.Inspect(block.List[i], func(node ast.Node) bool { + _, ok := node.(*ast.ReturnStmt) + if ok { + ret = true + return false + } + return true + }) + } + return ret +} + +// everythingFollowingInParent returns a block with everything in the parent block node of the cursor after +// the cursor itself. The cursor must point to an element in a block node's list. +func everythingFollowingInParent(cursor *astutil.Cursor) *ast.BlockStmt { + parent := cursor.Parent() + block, ok := parent.(*ast.BlockStmt) + if !ok { + log.Fatal("internal error: in everythingFollowingInParent, cursor doesn't point to element in block list") + } + + blockcopy := internalastutil.CloneNode(block) // get a clean copy + blockcopy.List = blockcopy.List[cursor.Index()+1:] // and remove everything before and including stmt + + if _, ok := blockcopy.List[len(blockcopy.List)-1].(*ast.ReturnStmt); !ok { + log.Printf("%s", mustFormatNode(token.NewFileSet(), blockcopy)) + log.Fatal("internal error: parent doesn't end in a return") + } + return blockcopy +} + +// in the case that there's a return in the body being inlined (toBlock), addContinues +// replaces those returns that are not at the end of the function with the code in the +// caller after the function call that execution would continue with after the return. +// The block being added must end in a return. +func addContinues(cursor *astutil.Cursor, assignNode *ast.AssignStmt, toBlock *ast.BlockStmt, continueBlock *ast.BlockStmt) ast.Node { + if !hasTerminatingReturn(continueBlock) { + log.Fatal("the block being continued to in addContinues must end in a return") + } + applyFunc := func(cursor *astutil.Cursor) bool { + ret, ok := cursor.Node().(*ast.ReturnStmt) + if !ok { + return true + } + + if cursor.Parent() == toBlock && cursor.Index() == len(toBlock.List)-1 { + return false + } + + // This is the opposite of replacing a function call with the body. First + // we replace the return statement with the assignment from the caller, and + // then add the code we continue with. + replaceWithAssignment(cursor, assignNode.Lhs, ret.Results, assignNode.Tok) + cursor.InsertAfter(internalastutil.CloneNode(continueBlock)) + + return false + } + return astutil.Apply(toBlock, applyFunc, nil) +} + +// debugPrint prints out the expressions given by nodes for debugging. +func debugPrint(nodes ...ast.Expr) string { + var b strings.Builder + for i, node := range nodes { + b.Write(mustFormatNode(token.NewFileSet(), node)) + if i != len(nodes)-1 { + b.WriteString(", ") + } + } + return b.String() +} + +// mustFormatNode produces the formatted Go code for the given node. +func mustFormatNode(fset *token.FileSet, node any) []byte { + var buf bytes.Buffer + format.Node(&buf, fset, node) + return buf.Bytes() +} + +// mustMatchExprs makes sure that the expression lists have the same length, +// and returns the lists of the expressions on the lhs and rhs where the +// identifiers are not the same. These are used to produce assignment statements +// where the expressions on the right are assigned to the identifiers on the left. +func mustMatchExprs(lhs []ast.Expr, rhs []ast.Expr) ([]ast.Expr, []ast.Expr) { + if len(lhs) != len(rhs) { + log.Fatal("exprs don't match", debugPrint(lhs...), debugPrint(rhs...)) + } + + var newLhs, newRhs []ast.Expr + for i := range lhs { + lhsIdent, ok1 := lhs[i].(*ast.Ident) + rhsIdent, ok2 := rhs[i].(*ast.Ident) + if ok1 && ok2 && lhsIdent.Name == rhsIdent.Name { + continue + } + newLhs = append(newLhs, lhs[i]) + newRhs = append(newRhs, rhs[i]) + } + + return newLhs, newRhs +} + +// replaceWithAssignment replaces the node pointed to by the cursor with an assignment of the +// left hand side to the righthand side, removing any redundant assignments of a variable to itself, +// and replacing an assignment to a single basic literal with a constant declaration. +func replaceWithAssignment(cursor *astutil.Cursor, lhs, rhs []ast.Expr, tok token.Token) { + newLhs, newRhs := mustMatchExprs(lhs, rhs) + if len(newLhs) == 0 { + cursor.Delete() + return + } + if len(newRhs) == 1 { + if lit, ok := newRhs[0].(*ast.BasicLit); ok { + constDecl := &ast.DeclStmt{ + Decl: &ast.GenDecl{ + Tok: token.CONST, + Specs: []ast.Spec{ + &ast.ValueSpec{ + Names: []*ast.Ident{newLhs[0].(*ast.Ident)}, + Values: []ast.Expr{lit}, + }, + }, + }, + } + cursor.Replace(constDecl) + return + } + } + newAssignment := &ast.AssignStmt{ + Lhs: newLhs, + Rhs: newRhs, + Tok: tok, + } + cursor.Replace(newAssignment) +} + +// generateTable generates the file with the jump tables for the specialized malloc functions. +func generateTable(sizeToSizeClass []uint8) []byte { + scMax := sizeToSizeClass[smallScanNoHeaderMax] + + var b bytes.Buffer + fmt.Fprintln(&b, `// Code generated by mkmalloc.go; DO NOT EDIT. +//go:build !plan9 + +package runtime + +import "unsafe" + +var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`) + + for i := range uintptr(smallScanNoHeaderMax + 1) { + fmt.Fprintf(&b, "%s,\n", smallScanNoHeaderSCFuncName(sizeToSizeClass[i], scMax)) + } + + fmt.Fprintln(&b, ` +} + +var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`) + for i := range uintptr(smallScanNoHeaderMax + 1) { + if i < 16 { + fmt.Fprintf(&b, "%s,\n", tinyFuncName(i)) + } else { + fmt.Fprintf(&b, "%s,\n", smallNoScanSCFuncName(sizeToSizeClass[i], scMax)) + } + } + + fmt.Fprintln(&b, ` +}`) + + return b.Bytes() +} diff --git a/src/runtime/_mkmalloc/mkmalloc_test.go b/src/runtime/_mkmalloc/mkmalloc_test.go new file mode 100644 index 0000000000..bd15c3226a --- /dev/null +++ b/src/runtime/_mkmalloc/mkmalloc_test.go @@ -0,0 +1,36 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "os" + "testing" +) + +func TestNoChange(t *testing.T) { + classes := makeClasses() + sizeToSizeClass := makeSizeToSizeClass(classes) + + outfile := "../malloc_generated.go" + want, err := os.ReadFile(outfile) + if err != nil { + t.Fatal(err) + } + got := mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass))) + if !bytes.Equal(want, got) { + t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(want), withLineNumbers(got)) + } + + tablefile := "../malloc_tables_generated.go" + wanttable, err := os.ReadFile(tablefile) + if err != nil { + t.Fatal(err) + } + gotTable := mustFormat(generateTable(sizeToSizeClass)) + if !bytes.Equal(wanttable, gotTable) { + t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(wanttable), withLineNumbers(gotTable)) + } +} diff --git a/src/runtime/_mkmalloc/mksizeclasses.go b/src/runtime/_mkmalloc/mksizeclasses.go index a8d2d2db1e..2c39617c6b 100644 --- a/src/runtime/_mkmalloc/mksizeclasses.go +++ b/src/runtime/_mkmalloc/mksizeclasses.go @@ -31,19 +31,14 @@ import ( "bytes" "flag" "fmt" - "go/format" "io" - "log" "math" "math/bits" - "os" ) // Generate internal/runtime/gc/msize.go -var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go") - -func main() { +func generateSizeClasses(classes []class) []byte { flag.Parse() var b bytes.Buffer @@ -51,39 +46,14 @@ func main() { fmt.Fprintln(&b, "//go:generate go -C ../../../runtime/_mkmalloc run mksizeclasses.go") fmt.Fprintln(&b) fmt.Fprintln(&b, "package gc") - classes := makeClasses() printComment(&b, classes) printClasses(&b, classes) - out, err := format.Source(b.Bytes()) - if err != nil { - log.Fatal(err) - } - if *stdout { - _, err = os.Stdout.Write(out) - } else { - err = os.WriteFile("../../internal/runtime/gc/sizeclasses.go", out, 0666) - } - if err != nil { - log.Fatal(err) - } + return b.Bytes() } -const ( - // Constants that we use and will transfer to the runtime. - minHeapAlign = 8 - maxSmallSize = 32 << 10 - smallSizeDiv = 8 - smallSizeMax = 1024 - largeSizeDiv = 128 - pageShift = 13 - - // Derived constants. - pageSize = 1 << pageShift -) - type class struct { size int // max size npages int // number of pages @@ -294,6 +264,15 @@ func maxNPages(classes []class) int { } func printClasses(w io.Writer, classes []class) { + sizeToSizeClass := func(size int) int { + for j, c := range classes { + if c.size >= size { + return j + } + } + panic("unreachable") + } + fmt.Fprintln(w, "const (") fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign) fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize) @@ -304,6 +283,8 @@ func printClasses(w io.Writer, classes []class) { fmt.Fprintf(w, "PageShift = %d\n", pageShift) fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes)) fmt.Fprintf(w, "MaxSizeClassNPages = %d\n", maxNPages(classes)) + fmt.Fprintf(w, "TinySize = %d\n", tinySize) + fmt.Fprintf(w, "TinySizeClass = %d\n", sizeToSizeClass(tinySize)) fmt.Fprintln(w, ")") fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {") @@ -332,12 +313,7 @@ func printClasses(w io.Writer, classes []class) { sc := make([]int, smallSizeMax/smallSizeDiv+1) for i := range sc { size := i * smallSizeDiv - for j, c := range classes { - if c.size >= size { - sc[i] = j - break - } - } + sc[i] = sizeToSizeClass(size) } fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {") for _, v := range sc { @@ -349,12 +325,7 @@ func printClasses(w io.Writer, classes []class) { sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1) for i := range sc { size := smallSizeMax + i*largeSizeDiv - for j, c := range classes { - if c.size >= size { - sc[i] = j - break - } - } + sc[i] = sizeToSizeClass(size) } fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {") for _, v := range sc { diff --git a/src/runtime/cgo.go b/src/runtime/cgo.go index eca905bad9..60f2403d73 100644 --- a/src/runtime/cgo.go +++ b/src/runtime/cgo.go @@ -15,7 +15,9 @@ import "unsafe" //go:linkname _cgo_sys_thread_create _cgo_sys_thread_create //go:linkname _cgo_notify_runtime_init_done _cgo_notify_runtime_init_done //go:linkname _cgo_callers _cgo_callers -//go:linkname _cgo_set_context_function _cgo_set_context_function +//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions +//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function +//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function //go:linkname _cgo_yield _cgo_yield //go:linkname _cgo_pthread_key_created _cgo_pthread_key_created //go:linkname _cgo_bindm _cgo_bindm @@ -27,7 +29,9 @@ var ( _cgo_sys_thread_create unsafe.Pointer _cgo_notify_runtime_init_done unsafe.Pointer _cgo_callers unsafe.Pointer - _cgo_set_context_function unsafe.Pointer + _cgo_set_traceback_functions unsafe.Pointer + _cgo_call_traceback_function unsafe.Pointer + _cgo_call_symbolizer_function unsafe.Pointer _cgo_yield unsafe.Pointer _cgo_pthread_key_created unsafe.Pointer _cgo_bindm unsafe.Pointer diff --git a/src/runtime/cgo/callbacks.go b/src/runtime/cgo/callbacks.go index 3c246a88b6..986f61914f 100644 --- a/src/runtime/cgo/callbacks.go +++ b/src/runtime/cgo/callbacks.go @@ -121,13 +121,30 @@ var _cgo_bindm = &x_cgo_bindm var x_cgo_notify_runtime_init_done byte var _cgo_notify_runtime_init_done = &x_cgo_notify_runtime_init_done -// Sets the traceback context function. See runtime.SetCgoTraceback. +// Sets the traceback, context, and symbolizer functions. See +// runtime.SetCgoTraceback. -//go:cgo_import_static x_cgo_set_context_function -//go:linkname x_cgo_set_context_function x_cgo_set_context_function -//go:linkname _cgo_set_context_function _cgo_set_context_function -var x_cgo_set_context_function byte -var _cgo_set_context_function = &x_cgo_set_context_function +//go:cgo_import_static x_cgo_set_traceback_functions +//go:linkname x_cgo_set_traceback_functions x_cgo_set_traceback_functions +//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions +var x_cgo_set_traceback_functions byte +var _cgo_set_traceback_functions = &x_cgo_set_traceback_functions + +// Call the traceback function registered with x_cgo_set_traceback_functions. + +//go:cgo_import_static x_cgo_call_traceback_function +//go:linkname x_cgo_call_traceback_function x_cgo_call_traceback_function +//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function +var x_cgo_call_traceback_function byte +var _cgo_call_traceback_function = &x_cgo_call_traceback_function + +// Call the symbolizer function registered with x_cgo_set_symbolizer_functions. + +//go:cgo_import_static x_cgo_call_symbolizer_function +//go:linkname x_cgo_call_symbolizer_function x_cgo_call_symbolizer_function +//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function +var x_cgo_call_symbolizer_function byte +var _cgo_call_symbolizer_function = &x_cgo_call_symbolizer_function // Calls a libc function to execute background work injected via libc // interceptors, such as processing pending signals under the thread diff --git a/src/runtime/cgo/gcc_context.c b/src/runtime/cgo/gcc_context.c index ad58692821..b647c99a98 100644 --- a/src/runtime/cgo/gcc_context.c +++ b/src/runtime/cgo/gcc_context.c @@ -8,11 +8,11 @@ // Releases the cgo traceback context. void _cgo_release_context(uintptr_t ctxt) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); pfn = _cgo_get_context_function(); if (ctxt != 0 && pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = ctxt; (*pfn)(&arg); diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c index e9b0a3f769..05998fadf8 100644 --- a/src/runtime/cgo/gcc_libinit.c +++ b/src/runtime/cgo/gcc_libinit.c @@ -32,8 +32,14 @@ static void pthread_key_destructor(void* g); uintptr_t x_cgo_pthread_key_created; void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t); +// The traceback function, used when tracing C calls. +static void (*cgo_traceback_function)(struct cgoTracebackArg*); + // The context function, used when tracing back C calls into Go. -static void (*cgo_context_function)(struct context_arg*); +static void (*cgo_context_function)(struct cgoContextArg*); + +// The symbolizer function, used when symbolizing C frames. +static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*); void x_cgo_sys_thread_create(void* (*func)(void*), void* arg) { @@ -52,7 +58,7 @@ x_cgo_sys_thread_create(void* (*func)(void*), void* arg) { uintptr_t _cgo_wait_runtime_init_done(void) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); int done; pfn = __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); @@ -70,7 +76,6 @@ _cgo_wait_runtime_init_done(void) { x_cgo_pthread_key_created = 1; } - // TODO(iant): For the case of a new C thread calling into Go, such // as when using -buildmode=c-archive, we know that Go runtime // initialization is complete but we do not know that all Go init @@ -87,7 +92,7 @@ _cgo_wait_runtime_init_done(void) { } if (pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = 0; (*pfn)(&arg); @@ -138,17 +143,71 @@ x_cgo_notify_runtime_init_done(void* dummy __attribute__ ((unused))) { pthread_mutex_unlock(&runtime_init_mu); } -// Sets the context function to call to record the traceback context -// when calling a Go function from C code. Called from runtime.SetCgoTraceback. -void x_cgo_set_context_function(void (*context)(struct context_arg*)) { - __atomic_store_n(&cgo_context_function, context, __ATOMIC_RELEASE); +// Sets the traceback, context, and symbolizer functions. Called from +// runtime.SetCgoTraceback. +void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) { + __atomic_store_n(&cgo_traceback_function, arg->Traceback, __ATOMIC_RELEASE); + __atomic_store_n(&cgo_context_function, arg->Context, __ATOMIC_RELEASE); + __atomic_store_n(&cgo_symbolizer_function, arg->Symbolizer, __ATOMIC_RELEASE); +} + +// Gets the traceback function to call to trace C calls. +void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) { + return __atomic_load_n(&cgo_traceback_function, __ATOMIC_CONSUME); } -// Gets the context function. -void (*(_cgo_get_context_function(void)))(struct context_arg*) { +// Call the traceback function registered with x_cgo_set_traceback_functions. +// +// The traceback function is an arbitrary user C function which may be built +// with TSAN, and thus must be wrapped with TSAN acquire/release calls. For +// normal cgo calls, cmd/cgo automatically inserts TSAN acquire/release calls. +// Since the traceback, context, and symbolizer functions are registered at +// startup and called via the runtime, they do not get automatic TSAN +// acquire/release calls. +// +// The only purpose of this wrapper is to perform TSAN acquire/release. +// Alternatively, if the runtime arranged to safely call TSAN acquire/release, +// it could perform the call directly. +void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) { + void (*pfn)(struct cgoTracebackArg*); + + pfn = _cgo_get_traceback_function(); + if (pfn == nil) { + return; + } + + _cgo_tsan_acquire(); + (*pfn)(arg); + _cgo_tsan_release(); +} + +// Gets the context function to call to record the traceback context +// when calling a Go function from C code. +void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) { return __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); } +// Gets the symbolizer function to call to symbolize C frames. +void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) { + return __atomic_load_n(&cgo_symbolizer_function, __ATOMIC_CONSUME); +} + +// Call the symbolizer function registered with x_cgo_set_traceback_functions. +// +// See comment on x_cgo_call_traceback_function. +void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) { + void (*pfn)(struct cgoSymbolizerArg*); + + pfn = _cgo_get_symbolizer_function(); + if (pfn == nil) { + return; + } + + _cgo_tsan_acquire(); + (*pfn)(arg); + _cgo_tsan_release(); +} + // _cgo_try_pthread_create retries pthread_create if it fails with // EAGAIN. int diff --git a/src/runtime/cgo/gcc_libinit_windows.c b/src/runtime/cgo/gcc_libinit_windows.c index 83fc874348..926f916843 100644 --- a/src/runtime/cgo/gcc_libinit_windows.c +++ b/src/runtime/cgo/gcc_libinit_windows.c @@ -32,6 +32,7 @@ static CRITICAL_SECTION runtime_init_cs; static HANDLE runtime_init_wait; static int runtime_init_done; +// No pthreads on Windows, these are always zero. uintptr_t x_cgo_pthread_key_created; void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t); @@ -81,7 +82,7 @@ _cgo_is_runtime_initialized() { uintptr_t _cgo_wait_runtime_init_done(void) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); _cgo_maybe_run_preinit(); while (!_cgo_is_runtime_initialized()) { @@ -89,7 +90,7 @@ _cgo_wait_runtime_init_done(void) { } pfn = _cgo_get_context_function(); if (pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = 0; (*pfn)(&arg); @@ -118,20 +119,54 @@ x_cgo_notify_runtime_init_done(void* dummy) { } } +// The traceback function, used when tracing C calls. +static void (*cgo_traceback_function)(struct cgoTracebackArg*); + // The context function, used when tracing back C calls into Go. -static void (*cgo_context_function)(struct context_arg*); +static void (*cgo_context_function)(struct cgoContextArg*); + +// The symbolizer function, used when symbolizing C frames. +static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*); + +// Sets the traceback, context, and symbolizer functions. Called from +// runtime.SetCgoTraceback. +void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) { + EnterCriticalSection(&runtime_init_cs); + cgo_traceback_function = arg->Traceback; + cgo_context_function = arg->Context; + cgo_symbolizer_function = arg->Symbolizer; + LeaveCriticalSection(&runtime_init_cs); +} + +// Gets the traceback function to call to trace C calls. +void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) { + void (*ret)(struct cgoTracebackArg*); -// Sets the context function to call to record the traceback context -// when calling a Go function from C code. Called from runtime.SetCgoTraceback. -void x_cgo_set_context_function(void (*context)(struct context_arg*)) { EnterCriticalSection(&runtime_init_cs); - cgo_context_function = context; + ret = cgo_traceback_function; LeaveCriticalSection(&runtime_init_cs); + return ret; +} + +// Call the traceback function registered with x_cgo_set_traceback_functions. +// +// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is +// no C/C++ TSAN. +void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) { + void (*pfn)(struct cgoTracebackArg*); + + pfn = _cgo_get_traceback_function(); + if (pfn == nil) { + return; + } + + (*pfn)(arg); } -// Gets the context function. -void (*(_cgo_get_context_function(void)))(struct context_arg*) { - void (*ret)(struct context_arg*); +// Gets the context function to call to record the traceback context +// when calling a Go function from C code. +void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) { + void (*ret)(struct cgoContextArg*); EnterCriticalSection(&runtime_init_cs); ret = cgo_context_function; @@ -139,13 +174,38 @@ void (*(_cgo_get_context_function(void)))(struct context_arg*) { return ret; } +// Gets the symbolizer function to call to symbolize C frames. +void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) { + void (*ret)(struct cgoSymbolizerArg*); + + EnterCriticalSection(&runtime_init_cs); + ret = cgo_symbolizer_function; + LeaveCriticalSection(&runtime_init_cs); + return ret; +} + +// Call the symbolizer function registered with x_cgo_set_symbolizer_functions. +// +// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is +// no C/C++ TSAN. +void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) { + void (*pfn)(struct cgoSymbolizerArg*); + + pfn = _cgo_get_symbolizer_function(); + if (pfn == nil) { + return; + } + + (*pfn)(arg); +} + void _cgo_beginthread(unsigned long (__stdcall *func)(void*), void* arg) { int tries; HANDLE thandle; for (tries = 0; tries < 20; tries++) { thandle = CreateThread(NULL, 0, func, arg, 0, NULL); - if (thandle == 0 && GetLastError() == ERROR_NOT_ENOUGH_MEMORY) { + if (thandle == 0 && GetLastError() == ERROR_ACCESS_DENIED) { // "Insufficient resources", try again in a bit. // // Note that the first Sleep(0) is a yield. diff --git a/src/runtime/cgo/libcgo.h b/src/runtime/cgo/libcgo.h index 26da68fadb..aa0b57d6d7 100644 --- a/src/runtime/cgo/libcgo.h +++ b/src/runtime/cgo/libcgo.h @@ -89,15 +89,7 @@ void darwin_arm_init_thread_exception_port(void); void darwin_arm_init_mach_exception_handler(void); /* - * The cgo context function. See runtime.SetCgoTraceback. - */ -struct context_arg { - uintptr_t Context; -}; -extern void (*(_cgo_get_context_function(void)))(struct context_arg*); - -/* - * The argument for the cgo traceback callback. See runtime.SetCgoTraceback. + * The cgo traceback callback. See runtime.SetCgoTraceback. */ struct cgoTracebackArg { uintptr_t Context; @@ -105,6 +97,38 @@ struct cgoTracebackArg { uintptr_t* Buf; uintptr_t Max; }; +extern void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*); + +/* + * The cgo context callback. See runtime.SetCgoTraceback. + */ +struct cgoContextArg { + uintptr_t Context; +}; +extern void (*(_cgo_get_context_function(void)))(struct cgoContextArg*); + +/* + * The argument for the cgo symbolizer callback. See runtime.SetCgoTraceback. + */ +struct cgoSymbolizerArg { + uintptr_t PC; + const char* File; + uintptr_t Lineno; + const char* Func; + uintptr_t Entry; + uintptr_t More; + uintptr_t Data; +}; +extern void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*); + +/* + * The argument for x_cgo_set_traceback_functions. See runtime.SetCgoTraceback. + */ +struct cgoSetTracebackFunctionsArg { + void (*Traceback)(struct cgoTracebackArg*); + void (*Context)(struct cgoContextArg*); + void (*Symbolizer)(struct cgoSymbolizerArg*); +}; /* * TSAN support. This is only useful when building with @@ -121,11 +145,21 @@ struct cgoTracebackArg { #ifdef CGO_TSAN +// _cgo_tsan_acquire tells C/C++ TSAN that we are acquiring a dummy lock. We +// call this when calling from Go to C. This is necessary because TSAN cannot +// see the synchronization in Go. Note that C/C++ code built with TSAN is not +// the same as the Go race detector. +// +// cmd/cgo generates calls to _cgo_tsan_acquire and _cgo_tsan_release. For +// other cgo calls, manual calls are required. +// // These must match the definitions in yesTsanProlog in cmd/cgo/out.go. // In general we should call _cgo_tsan_acquire when we enter C code, // and call _cgo_tsan_release when we return to Go code. +// // This is only necessary when calling code that might be instrumented // by TSAN, which mostly means system library calls that TSAN intercepts. +// // See the comment in cmd/cgo/out.go for more details. long long _cgo_sync __attribute__ ((common)); diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index a7b51dd6c7..da3b620d49 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1289,30 +1289,6 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int { return result } -type MSpanQueue mSpanQueue - -func (q *MSpanQueue) Size() int { - return (*mSpanQueue)(q).n -} - -func (q *MSpanQueue) Push(s *MSpan) { - (*mSpanQueue)(q).push((*mspan)(s)) -} - -func (q *MSpanQueue) Pop() *MSpan { - s := (*mSpanQueue)(q).pop() - return (*MSpan)(s) -} - -func (q *MSpanQueue) TakeAll(p *MSpanQueue) { - (*mSpanQueue)(q).takeAll((*mSpanQueue)(p)) -} - -func (q *MSpanQueue) PopN(n int) MSpanQueue { - p := (*mSpanQueue)(q).popN(n) - return (MSpanQueue)(p) -} - const ( TimeHistSubBucketBits = timeHistSubBucketBits TimeHistNumSubBuckets = timeHistNumSubBuckets diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0a1e01cbcf..be4d3451f3 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -881,199 +881,6 @@ func TestWeakToStrongMarkTermination(t *testing.T) { } } -func TestMSpanQueue(t *testing.T) { - expectSize := func(t *testing.T, q *runtime.MSpanQueue, want int) { - t.Helper() - if got := q.Size(); got != want { - t.Errorf("expected size %d, got %d", want, got) - } - } - expectMSpan := func(t *testing.T, got, want *runtime.MSpan, op string) { - t.Helper() - if got != want { - t.Errorf("expected mspan %p from %s, got %p", want, op, got) - } - } - makeSpans := func(t *testing.T, n int) ([]*runtime.MSpan, func()) { - t.Helper() - spans := make([]*runtime.MSpan, 0, n) - for range cap(spans) { - spans = append(spans, runtime.AllocMSpan()) - } - return spans, func() { - for i, s := range spans { - runtime.FreeMSpan(s) - spans[i] = nil - } - } - } - t.Run("Empty", func(t *testing.T) { - var q runtime.MSpanQueue - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPop", func(t *testing.T) { - s := runtime.AllocMSpan() - defer runtime.FreeMSpan(s) - - var q runtime.MSpanQueue - q.Push(s) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPopPushPop", func(t *testing.T) { - s0 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s0) - s1 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s1) - - var q runtime.MSpanQueue - - // Push and pop s0. - q.Push(s0) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s0, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - - // Push and pop s1. - q.Push(s1) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s1, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPushPopPop", func(t *testing.T) { - s0 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s0) - s1 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s1) - - var q runtime.MSpanQueue - q.Push(s0) - expectSize(t, &q, 1) - q.Push(s1) - expectSize(t, &q, 2) - expectMSpan(t, q.Pop(), s0, "pop") - expectMSpan(t, q.Pop(), s1, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("EmptyTakeAll", func(t *testing.T) { - var q runtime.MSpanQueue - var p runtime.MSpanQueue - expectSize(t, &p, 0) - expectSize(t, &q, 0) - p.TakeAll(&q) - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4TakeAll", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - - var p runtime.MSpanQueue - p.TakeAll(&q) - expectSize(t, &p, 4) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop3", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(3) - expectSize(t, &p, 3) - expectSize(t, &q, 1) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectMSpan(t, q.Pop(), spans[len(spans)-1], "pop") - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop0", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(0) - expectSize(t, &p, 0) - expectSize(t, &q, 4) - for i := range q.Size() { - expectMSpan(t, q.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop4", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(4) - expectSize(t, &p, 4) - expectSize(t, &q, 0) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop5", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(5) - expectSize(t, &p, 4) - expectSize(t, &q, 0) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) -} - func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { got := runTestProg(t, "testprog", "DetectFinalizerAndCleanupLeaks", "GODEBUG=checkfinalizers=1") sp := strings.SplitN(got, "detected possible issues with cleanups and/or finalizers", 2) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 5b5a633d9a..db91e89359 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -127,8 +127,8 @@ const ( _64bit = 1 << (^uintptr(0) >> 63) / 2 // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. - _TinySize = 16 - _TinySizeClass = int8(2) + _TinySize = gc.TinySize + _TinySizeClass = int8(gc.TinySizeClass) _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc @@ -1080,6 +1080,12 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger // at scale. const doubleCheckMalloc = false +// sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized +// mallocgc implementation: the experiment must be enabled, and none of the sanitizers should +// be enabled. The tables used to select the size-specialized malloc function do not compile +// properly on plan9, so size-specialized malloc is also disabled on plan9. +const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled + // Allocate an object of size bytes. // Small objects are allocated from the per-P cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -1110,6 +1116,17 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { return unsafe.Pointer(&zerobase) } + if sizeSpecializedMallocEnabled && heapBitsInSpan(size) { + if typ == nil || !typ.Pointers() { + return mallocNoScanTable[size](size, typ, needzero) + } else { + if !needzero { + throw("objects with pointers must be zeroed") + } + return mallocScanTable[size](size, typ, needzero) + } + } + // It's possible for any malloc to trigger sweeping, which may in // turn queue finalizers. Record this dynamic lock edge. // N.B. Compiled away if lockrank experiment is not enabled. @@ -1138,25 +1155,41 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // Actually do the allocation. var x unsafe.Pointer var elemsize uintptr - if size <= maxSmallSize-gc.MallocHeaderSize { - if typ == nil || !typ.Pointers() { - if size < maxTinySize { - x, elemsize = mallocgcTiny(size, typ) - } else { + if sizeSpecializedMallocEnabled { + // we know that heapBitsInSpan is true. + if size <= maxSmallSize-gc.MallocHeaderSize { + if typ == nil || !typ.Pointers() { x, elemsize = mallocgcSmallNoscan(size, typ, needzero) - } - } else { - if !needzero { - throw("objects with pointers must be zeroed") - } - if heapBitsInSpan(size) { - x, elemsize = mallocgcSmallScanNoHeader(size, typ) } else { + if !needzero { + throw("objects with pointers must be zeroed") + } x, elemsize = mallocgcSmallScanHeader(size, typ) } + } else { + x, elemsize = mallocgcLarge(size, typ, needzero) } } else { - x, elemsize = mallocgcLarge(size, typ, needzero) + if size <= maxSmallSize-gc.MallocHeaderSize { + if typ == nil || !typ.Pointers() { + if size < maxTinySize { + x, elemsize = mallocgcTiny(size, typ) + } else { + x, elemsize = mallocgcSmallNoscan(size, typ, needzero) + } + } else { + if !needzero { + throw("objects with pointers must be zeroed") + } + if heapBitsInSpan(size) { + x, elemsize = mallocgcSmallScanNoHeader(size, typ) + } else { + x, elemsize = mallocgcSmallScanHeader(size, typ) + } + } + } else { + x, elemsize = mallocgcLarge(size, typ, needzero) + } } // Notify sanitizers, if enabled. diff --git a/src/runtime/malloc_generated.go b/src/runtime/malloc_generated.go new file mode 100644 index 0000000000..600048c675 --- /dev/null +++ b/src/runtime/malloc_generated.go @@ -0,0 +1,8468 @@ +// Code generated by mkmalloc.go; DO NOT EDIT. + +package runtime + +import ( + "internal/goarch" + "internal/runtime/sys" + "unsafe" +) + +func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 1 + + const elemsize = 8 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 8 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(8)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 8 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC2(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 2 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(16)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 16 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC3(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 3 + + const elemsize = 24 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 24 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(24)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 24 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC4(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 4 + + const elemsize = 32 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 32 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(32)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 32 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC5(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 5 + + const elemsize = 48 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 48 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(48)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 48 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC6(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 6 + + const elemsize = 64 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 64 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(64)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 64 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC7(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 7 + + const elemsize = 80 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 80 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(80)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 80 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC8(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 8 + + const elemsize = 96 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 96 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(96)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 96 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC9(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 9 + + const elemsize = 112 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 112 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(112)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 112 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC10(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 10 + + const elemsize = 128 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 128 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(128)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 128 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC11(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 11 + + const elemsize = 144 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 144 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(144)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 144 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC12(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 12 + + const elemsize = 160 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 160 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(160)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 160 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC13(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 13 + + const elemsize = 176 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 176 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(176)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 176 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC14(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 14 + + const elemsize = 192 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 192 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(192)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 192 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC15(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 15 + + const elemsize = 208 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 208 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(208)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 208 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC16(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 16 + + const elemsize = 224 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 224 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(224)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 224 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC17(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 17 + + const elemsize = 240 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 240 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(240)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 240 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC18(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 18 + + const elemsize = 256 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 256 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(256)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 256 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC19(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 19 + + const elemsize = 288 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 288 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(288)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 288 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC20(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 20 + + const elemsize = 320 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 320 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(320)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 320 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC21(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 21 + + const elemsize = 352 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 352 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(352)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 352 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC22(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 22 + + const elemsize = 384 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 384 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(384)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 384 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC23(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 23 + + const elemsize = 416 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 416 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(416)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 416 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC24(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 24 + + const elemsize = 448 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 448 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(448)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 448 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC25(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 25 + + const elemsize = 480 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 480 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(480)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 480 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallScanNoHeaderSC26(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 26 + + const elemsize = 512 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 512 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(512)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 512 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + c.scanAlloc += scanSize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny1(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 1 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny2(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 2 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny3(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 3 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny4(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 4 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny5(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 5 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny6(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 6 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny7(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 7 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny8(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 8 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny9(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 9 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny10(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 10 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny11(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 11 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny12(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 12 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny13(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 13 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny14(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 14 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny15(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 15 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 2 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 3 + + const elemsize = 24 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 24 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 4 + + const elemsize = 32 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 32 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 5 + + const elemsize = 48 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 48 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 6 + + const elemsize = 64 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 64 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 7 + + const elemsize = 80 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 80 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 8 + + const elemsize = 96 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 96 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 9 + + const elemsize = 112 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 112 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 10 + + const elemsize = 128 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 128 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 11 + + const elemsize = 144 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 144 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 12 + + const elemsize = 160 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 160 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 13 + + const elemsize = 176 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 176 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 14 + + const elemsize = 192 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 192 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 15 + + const elemsize = 208 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 208 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 16 + + const elemsize = 224 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 224 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 17 + + const elemsize = 240 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 240 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 18 + + const elemsize = 256 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 256 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 19 + + const elemsize = 288 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 288 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 20 + + const elemsize = 320 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 320 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 21 + + const elemsize = 352 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 352 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 22 + + const elemsize = 384 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 384 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 23 + + const elemsize = 416 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 416 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 24 + + const elemsize = 448 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 448 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 25 + + const elemsize = 480 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 480 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 26 + + const elemsize = 512 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 512 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} diff --git a/src/runtime/malloc_stubs.go b/src/runtime/malloc_stubs.go new file mode 100644 index 0000000000..7fd1444189 --- /dev/null +++ b/src/runtime/malloc_stubs.go @@ -0,0 +1,586 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains stub functions that are not meant to be called directly, +// but that will be assembled together using the inlining logic in runtime/_mkmalloc +// to produce a full mallocgc function that's specialized for a span class +// or specific size in the case of the tiny allocator. +// +// To assemble a mallocgc function, the mallocStub function is cloned, and the call to +// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub, +// smallNoScanStub or tinyStub, depending on the parameters being specialized. +// +// The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases) +// identifiers are replaced with the value of the parameter in the specialized case. +// The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub +// functions are also inlined by _mkmalloc. + +package runtime + +import ( + "internal/goarch" + "internal/runtime/sys" + "unsafe" +) + +// These identifiers will all be replaced by the inliner. So their values don't +// really matter: they just need to be set so that the stub functions, which +// will never be used on their own, can compile. elemsize_ can't be set to +// zero because we divide by it in nextFreeFastTiny, and the compiler would +// complain about a division by zero. Its replaced value will always be greater +// than zero. +const elemsize_ = 8 +const sizeclass_ = 0 +const noscanint_ = 0 +const size_ = 0 + +func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + // Short-circuit zero-sized allocation requests. + return unsafe.Pointer(&zerobase) +} + +func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + panic("not defined for sizeclass") +} + +func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + // It's possible for any malloc to trigger sweeping, which may in + // turn queue finalizers. Record this dynamic lock edge. + // N.B. Compiled away if lockrank experiment is not enabled. + lockRankMayQueueFinalizer() + + // Pre-malloc debug hooks. + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + // Assist the GC if needed. + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + // Actually do the allocation. + x, elemsize := inlinedMalloc(size, typ, needzero) + + // Adjust our GC assist debt to account for internal fragmentation. + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + // Post-malloc debug hooks. + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +// inlinedMalloc will never be called. It is defined just so that the compiler can compile +// the mallocStub function, which will also never be called, but instead used as a template +// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub +// will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub +// when generating the size-specialized malloc function. See the comment at the top of this +// file for more information. +func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + return unsafe.Pointer(uintptr(0)), 0 +} + +func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ == nil || !typ.Pointers() { + throw("noscan allocated in scan-only path") + } + if !heapBitsInSpan(size) { + throw("heap bits in not in span for non-header-only path") + } +} + +func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + const sizeclass = sizeclass_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) + span := c.alloc[spc] + v := nextFreeFastStub(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + // initHeapBits already set the pointer bits for the 8-byte sizeclass + // on 64-bit platforms. + c.scanAlloc += 8 + } else { + dataSize := size // make the inliner happy + x := uintptr(x) + scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span) + c.scanAlloc += scanSize + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + return x, elemsize +} + +func doubleCheckSmallNoScan(typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ != nil && typ.Pointers() { + throw("expected noscan type for noscan alloc") + } +} + +func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + // TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant + // sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class + // and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically + // spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself, + // so that its code could not diverge from the generated functions. + const sizeclass = sizeclass_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) + span := c.alloc[spc] + v := nextFreeFastStub(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + return x, elemsize +} + +func doubleCheckTiny(size uintptr, typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ != nil && typ.Pointers() { + throw("expected noscan for tiny alloc") + } +} + +func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + const constsize = size_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + // Tiny allocator. + // + // Tiny allocator combines several tiny allocation requests + // into a single memory block. The resulting memory block + // is freed when all subobjects are unreachable. The subobjects + // must be noscan (don't have pointers), this ensures that + // the amount of potentially wasted memory is bounded. + // + // Size of the memory block used for combining (maxTinySize) is tunable. + // Current setting is 16 bytes, which relates to 2x worst case memory + // wastage (when all but one subobjects are unreachable). + // 8 bytes would result in no wastage at all, but provides less + // opportunities for combining. + // 32 bytes provides more opportunities for combining, + // but can lead to 4x worst case wastage. + // The best case winning is 8x regardless of block size. + // + // Objects obtained from tiny allocator must not be freed explicitly. + // So when an object will be freed explicitly, we ensure that + // its size >= maxTinySize. + // + // SetFinalizer has a special case for objects potentially coming + // from tiny allocator, it such case it allows to set finalizers + // for an inner byte of a memory block. + // + // The main targets of tiny allocator are small strings and + // standalone escaping variables. On a json benchmark + // the allocator reduces number of allocations by ~12% and + // reduces heap size by ~20%. + c := getMCache(mp) + off := c.tinyoffset + // Align tiny pointer for required (conservative) alignment. + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + // Conservatively align 12-byte objects to 8 bytes on 32-bit + // systems so that objects whose first field is a 64-bit + // value is aligned to 8 bytes and does not cause a fault on + // atomic access. See issue 37262. + // TODO(mknyszek): Remove this workaround if/when issue 36606 + // is resolved. + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + // The object fits into existing tiny block. + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + return x, 0 + } + // Allocate a new maxTinySize block. + checkGCTrigger := false + span := c.alloc[tinySpanClass] + v := nextFreeFastTiny(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 // Always zero + (*[2]uint64)(x)[1] = 0 + // See if we need to replace the existing tiny block with the new one + // based on amount of remaining free space. + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + // Note: disabled when race detector is on, see comment near end of this function. + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + // Pad tinysize allocations so they are aligned with the end + // of the tinyalloc region. This ensures that any arithmetic + // that goes off the top end of the object will be detectable + // by checkptr (issue 38872). + // Note that we disable tinyalloc when raceenabled for this to work. + // TODO: This padding is only performed when the race detector + // is enabled. It would be nice to enable it if any package + // was compiled with checkptr, but there's no easy way to + // detect that (especially at compile time). + // TODO: enable this padding for all allocations, not just + // tinyalloc ones. It's tricky because of pointer maps. + // Maybe just all noscan objects? + x = add(x, elemsize-constsize) + } + return x, elemsize +} + +// TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc? +// We won't be able to use elemsize_ but that's probably ok. +func nextFreeFastTiny(span *mspan) gclinkptr { + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) + } + } + } + return nextFreeFastResult +} + +func nextFreeFastStub(span *mspan) gclinkptr { + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) + } + } + } + return nextFreeFastResult +} + +func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr { + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) { + throw("tried to write heap bits, but no heap bits in span") + } + scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ) + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + return scanSize +} + +// writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is +// stored as a bitmap at the end of the span. +// +// Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span. +// heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_. +// +//go:nosplit +func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr { + // The objects here are always really small, so a single load is sufficient. + src0 := readUintptr(getGCMask(typ)) + + const elemsize = elemsize_ + + // Create repetitions of the bitmap if we have a small slice backing store. + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + // N.B. We rely on dataSize being an exact multiple of the type size. + // The alternative is to be defensive and mask out src to the length + // of dataSize. The purpose is to save on one additional masking operation. + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + // Since we're never writing more than one uintptr's worth of bits, we're either going + // to do one or two writes. + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + // In the if statement below, we have to do two uintptr writes if the bits + // we need to write straddle across two different memory locations. But if + // the number of bits we're writing divides evenly into the number of bits + // in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo + // is a compile-time constant in the generated code, in the case where the size is + // a power of two less than or equal to ptrBits, the compiler can remove the + // 'two writes' branch of the if statement and always do only one write without + // the check. + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + // Two writes. + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) + } else { + // One write. + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + return scanSize +} + +func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) { + srcRead := span.heapBitsSmallForAddr(x) + if srcRead != src { + print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n") + print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n") + print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n") + throw("bad pointer bits written for small object") + } +} diff --git a/src/runtime/malloc_tables_generated.go b/src/runtime/malloc_tables_generated.go new file mode 100644 index 0000000000..36650881fe --- /dev/null +++ b/src/runtime/malloc_tables_generated.go @@ -0,0 +1,1038 @@ +// Code generated by mkmalloc.go; DO NOT EDIT. +//go:build !plan9 + +package runtime + +import "unsafe" + +var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{ + mallocPanic, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, +} + +var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{ + mallocPanic, + mallocTiny1, + mallocTiny2, + mallocTiny3, + mallocTiny4, + mallocTiny5, + mallocTiny6, + mallocTiny7, + mallocTiny8, + mallocTiny9, + mallocTiny10, + mallocTiny11, + mallocTiny12, + mallocTiny13, + mallocTiny14, + mallocTiny15, + mallocgcSmallNoScanSC2, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, +} diff --git a/src/runtime/malloc_tables_plan9.go b/src/runtime/malloc_tables_plan9.go new file mode 100644 index 0000000000..4d2740bbb2 --- /dev/null +++ b/src/runtime/malloc_tables_plan9.go @@ -0,0 +1,14 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build plan9 + +package runtime + +import "unsafe" + +var ( + mallocScanTable []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer + mallocNoScanTable []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer +) diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index 6cd525d5e9..bf58947bbc 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -452,3 +452,13 @@ func BenchmarkGoroutineIdle(b *testing.B) { close(quit) time.Sleep(10 * time.Millisecond) } + +func TestMkmalloc(t *testing.T) { + testenv.MustHaveGoRun(t) + testenv.MustHaveExternalNetwork(t) // To download the golang.org/x/tools dependency. + output, err := exec.Command("go", "-C", "_mkmalloc", "test").CombinedOutput() + t.Logf("test output:\n%s", output) + if err != nil { + t.Errorf("_mkmalloc tests failed: %v", err) + } +} diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index 318f40f2eb..083220f449 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -68,7 +68,7 @@ func startCheckmarks() { // endCheckmarks ends the checkmarks phase. func endCheckmarks() { - if gcMarkWorkAvailable(nil) { + if !gcIsMarkDone() { throw("GC work not flushed") } useCheckmark = false diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 26cec37f74..68cbfda500 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -326,7 +326,7 @@ type workType struct { full lfstack // lock-free list of full blocks workbuf _ cpu.CacheLinePad // prevents false-sharing between full and empty empty lfstack // lock-free list of empty blocks workbuf - _ cpu.CacheLinePad // prevents false-sharing between empty and nproc/nwait + _ cpu.CacheLinePad // prevents false-sharing between empty and wbufSpans wbufSpans struct { lock mutex @@ -337,12 +337,24 @@ type workType struct { // one of the workbuf lists. busy mSpanList } - _ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanq + _ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanWorkMask - // Global queue of spans to scan. + // spanqMask is a bitmap indicating which Ps have local work worth stealing. + // Set or cleared by the owning P, cleared by stealing Ps. + // + // spanqMask is like a proxy for a global queue. An important invariant is that + // forced flushing like gcw.dispose must set this bit on any P that has local + // span work. + spanqMask pMask + _ cpu.CacheLinePad // prevents false-sharing between spanqMask and everything else + + // List of all spanSPMCs. // // Only used if goexperiment.GreenTeaGC. - spanq spanQueue + spanSPMCs struct { + lock mutex // no lock rank because it's a leaf lock (see mklockrank.go). + all *spanSPMC + } // Restore 64-bit alignment on 32-bit. // _ uint32 @@ -711,8 +723,9 @@ func gcStart(trigger gcTrigger) { traceRelease(trace) } - // Check that all Ps have finished deferred mcache flushes. + // Check and setup per-P state. for _, p := range allp { + // Check that all Ps have finished deferred mcache flushes. if fg := p.mcache.flushGen.Load(); fg != mheap_.sweepgen { println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) throw("p mcache not flushed") @@ -869,10 +882,11 @@ var gcDebugMarkDone struct { // all local work to the global queues where it can be discovered by // other workers. // +// All goroutines performing GC work must call gcBeginWork to signal +// that they're executing GC work. They must call gcEndWork when done. // This should be called when all local mark work has been drained and -// there are no remaining workers. Specifically, when -// -// work.nwait == work.nproc && !gcMarkWorkAvailable(p) +// there are no remaining workers. Specifically, when gcEndWork returns +// true. // // The calling context must be preemptible. // @@ -896,7 +910,7 @@ top: // empty before performing the ragged barrier. Otherwise, // there could be global work that a P could take after the P // has passed the ragged barrier. - if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { + if !(gcphase == _GCmark && gcIsMarkDone()) { semrelease(&work.markDoneSema) return } @@ -922,6 +936,7 @@ top: // TODO(austin): Break up these workbufs to // better distribute work. pp.gcw.dispose() + // Collect the flushedWork flag. if pp.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) @@ -1514,11 +1529,7 @@ func gcBgMarkWorker(ready chan struct{}) { trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime) } - decnwait := atomic.Xadd(&work.nwait, -1) - if decnwait == work.nproc { - println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) - throw("work.nwait was > work.nproc") - } + gcBeginWork() systemstack(func() { // Mark our goroutine preemptible so its stack can be scanned or observed @@ -1570,15 +1581,6 @@ func gcBgMarkWorker(ready chan struct{}) { atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) } - // Was this the last worker and did we run out - // of work? - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, - "work.nwait=", incnwait, "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - // We'll releasem after this point and thus this P may run // something else. We must clear the worker mode to avoid // attributing the mode to a different (non-worker) G in @@ -1587,7 +1589,7 @@ func gcBgMarkWorker(ready chan struct{}) { // If this worker reached a background mark completion // point, signal the main GC goroutine. - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + if gcEndWork() { // We don't need the P-local buffers here, allow // preemption because we may schedule like a regular // goroutine in gcMarkDone (block on locks, etc). @@ -1599,20 +1601,40 @@ func gcBgMarkWorker(ready chan struct{}) { } } -// gcMarkWorkAvailable reports whether executing a mark worker -// on p is potentially useful. p may be nil, in which case it only -// checks the global sources of work. -func gcMarkWorkAvailable(p *p) bool { +// gcShouldScheduleWorker reports whether executing a mark worker +// on p is potentially useful. p may be nil. +func gcShouldScheduleWorker(p *p) bool { if p != nil && !p.gcw.empty() { return true } - if !work.full.empty() || !work.spanq.empty() { - return true // global work available + return gcMarkWorkAvailable() +} + +// gcIsMarkDone reports whether the mark phase is (probably) done. +func gcIsMarkDone() bool { + return work.nwait == work.nproc && !gcMarkWorkAvailable() +} + +// gcBeginWork signals to the garbage collector that a new worker is +// about to process GC work. +func gcBeginWork() { + decnwait := atomic.Xadd(&work.nwait, -1) + if decnwait == work.nproc { + println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) + throw("work.nwait was > work.nproc") } - if work.markrootNext < work.markrootJobs { - return true // root scan work available +} + +// gcEndWork signals to the garbage collector that a new worker has just finished +// its work. It reports whether it was the last worker and there's no more work +// to do. If it returns true, the caller must call gcMarkDone. +func gcEndWork() (last bool) { + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") } - return false + return incnwait == work.nproc && !gcMarkWorkAvailable() } // gcMark runs the mark (or, for concurrent GC, mark termination) @@ -1625,8 +1647,8 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + if work.full != 0 || work.markrootNext < work.markrootJobs { + print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } @@ -1742,10 +1764,12 @@ func gcSweep(mode gcMode) bool { // Sweep all spans eagerly. for sweepone() != ^uintptr(0) { } - // Free workbufs eagerly. + // Free workbufs and span rings eagerly. prepareFreeWorkbufs() for freeSomeWbufs(false) { } + for freeSomeSpanSPMCs(false) { + } // All "free" events for this mark/sweep cycle have // now happened, so we can make this profile cycle // available immediately. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8b306045c5..f85ebda260 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -666,6 +666,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { gp.gcAssistBytes = 0 return } + // Track time spent in this assist. Since we're on the // system stack, this is non-preemptible, so we can // just measure start and end time. @@ -675,11 +676,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { startTime := nanotime() trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime) - decnwait := atomic.Xadd(&work.nwait, -1) - if decnwait == work.nproc { - println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) - throw("nwait > work.nprocs") - } + gcBeginWork() // gcDrainN requires the caller to be preemptible. casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking) @@ -702,14 +699,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // If this is the last worker and we ran out of work, // signal a completion point. - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: work.nwait=", incnwait, - "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + if gcEndWork() { // This has reached a background completion point. Set // gp.param to a non-nil value to indicate this. It // doesn't matter what we set it to (it just has to be @@ -1242,14 +1232,18 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { var b uintptr var s objptr if b = gcw.tryGetObjFast(); b == 0 { - if s = gcw.tryGetSpan(false); s == 0 { + if s = gcw.tryGetSpanFast(); s == 0 { if b = gcw.tryGetObj(); b == 0 { - // Flush the write barrier - // buffer; this may create - // more work. - wbBufFlush() - if b = gcw.tryGetObj(); b == 0 { - s = gcw.tryGetSpan(true) + if s = gcw.tryGetSpan(); s == 0 { + // Flush the write barrier + // buffer; this may create + // more work. + wbBufFlush() + if b = gcw.tryGetObj(); b == 0 { + if s = gcw.tryGetSpan(); s == 0 { + s = gcw.tryStealSpan() + } + } } } } @@ -1338,22 +1332,26 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { var b uintptr var s objptr if b = gcw.tryGetObjFast(); b == 0 { - if s = gcw.tryGetSpan(false); s == 0 { + if s = gcw.tryGetSpanFast(); s == 0 { if b = gcw.tryGetObj(); b == 0 { - // Flush the write barrier - // buffer; this may create - // more work. - wbBufFlush() - if b = gcw.tryGetObj(); b == 0 { - // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { - workFlushed += markroot(gcw, job, false) - continue + if s = gcw.tryGetSpan(); s == 0 { + // Flush the write barrier + // buffer; this may create + // more work. + wbBufFlush() + if b = gcw.tryGetObj(); b == 0 { + if s = gcw.tryGetSpan(); s == 0 { + // Try to do a root job. + if work.markrootNext < work.markrootJobs { + job := atomic.Xadd(&work.markrootNext, +1) - 1 + if job < work.markrootJobs { + workFlushed += markroot(gcw, job, false) + continue + } + } + s = gcw.tryStealSpan() } } - s = gcw.tryGetSpan(true) } } } diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go index 845857a817..53fcd3d966 100644 --- a/src/runtime/mgcmark_greenteagc.go +++ b/src/runtime/mgcmark_greenteagc.go @@ -37,10 +37,10 @@ package runtime import ( - "internal/cpu" "internal/goarch" "internal/runtime/atomic" "internal/runtime/gc" + "internal/runtime/gc/scan" "internal/runtime/sys" "unsafe" ) @@ -259,7 +259,7 @@ func gcUsesSpanInlineMarkBits(size uintptr) bool { return heapBitsInSpan(size) && size >= 16 } -// tryQueueOnSpan tries to queue p on the span it points to, if it +// tryDeferToSpanScan tries to queue p on the span it points to, if it // points to a small object span (gcUsesSpanQueue size). func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { if useCheckmark { @@ -299,6 +299,12 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { if q.tryAcquire() { if gcw.spanq.put(makeObjPtr(base, objIndex)) { if gcphase == _GCmark { + // This is intentionally racy; the bit set here might get + // stomped on by a stealing P. See the comment in tryStealSpan + // for an explanation as to why this is OK. + if !work.spanqMask.read(uint32(gcw.id)) { + work.spanqMask.set(gcw.id) + } gcw.mayNeedWorker = true } gcw.flushedWork = true @@ -307,260 +313,487 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { return true } +// tryGetSpanFast attempts to get an entire span to scan. +func (w *gcWork) tryGetSpanFast() objptr { + return w.spanq.tryGetFast() +} + // tryGetSpan attempts to get an entire span to scan. -func (w *gcWork) tryGetSpan(slow bool) objptr { - if s := w.spanq.get(); s != 0 { +func (w *gcWork) tryGetSpan() objptr { + if s := w.spanq.tryGetFast(); s != 0 { return s } - - if slow { - // Check the global span queue. - if s := work.spanq.get(w); s != 0 { - return s - } - - // Attempt to steal spans to scan from other Ps. - return spanQueueSteal(w) + // "Steal" from ourselves. + if s := w.spanq.steal(&w.spanq); s != 0 { + return s + } + // We failed to get any local work, so we're fresh out. + // Nobody else is going to add work for us. Clear our bit. + if work.spanqMask.read(uint32(w.id)) { + work.spanqMask.clear(w.id) } return 0 } -// spanQueue is a concurrent safe queue of mspans. Each mspan is represented -// as an objptr whose spanBase is the base address of the span. +// spanQueue is a P-local stealable span queue. type spanQueue struct { - avail atomic.Bool // optimization to check emptiness w/o the lock - _ cpu.CacheLinePad // prevents false-sharing between lock and avail - lock mutex - q mSpanQueue -} + // head, tail, and ring represent a local non-thread-safe ring buffer. + head, tail uint32 + ring [256]objptr -func (q *spanQueue) empty() bool { - return !q.avail.Load() -} + // putsSinceDrain counts the number of put calls since the last drain. + putsSinceDrain int -func (q *spanQueue) size() int { - return q.q.n -} + // chain contains state visible to other Ps. + // + // In particular, that means a linked chain of single-producer multi-consumer + // ring buffers where the single producer is this P only. + // + // This linked chain structure is based off the sync.Pool dequeue. + chain struct { + // head is the spanSPMC to put to. This is only accessed + // by the producer, so doesn't need to be synchronized. + head *spanSPMC -// putBatch adds a whole batch of spans to the queue. -func (q *spanQueue) putBatch(batch []objptr) { - var list mSpanQueue - for _, p := range batch { - s := spanOfUnchecked(p.spanBase()) - s.scanIdx = p.objIndex() - list.push(s) + // tail is the spanSPMC to steal from. This is accessed + // by consumers, so reads and writes must be atomic. + tail atomic.UnsafePointer // *spanSPMC } +} - lock(&q.lock) - if q.q.n == 0 { - q.avail.Store(true) +// putFast tries to put s onto the queue, but may fail if it's full. +func (q *spanQueue) putFast(s objptr) (ok bool) { + if q.tail-q.head == uint32(len(q.ring)) { + return false } - q.q.takeAll(&list) - unlock(&q.lock) + q.ring[q.tail%uint32(len(q.ring))] = s + q.tail++ + return true } -// get tries to take a span off the queue. +// put puts s onto the queue. // -// Returns a non-zero objptr on success. Also, moves additional -// spans to gcw's local span queue. -func (q *spanQueue) get(gcw *gcWork) objptr { - if q.empty() { - return 0 - } - lock(&q.lock) - if q.q.n == 0 { - unlock(&q.lock) - return 0 - } - n := q.q.n/int(gomaxprocs) + 1 - if n > q.q.n { - n = q.q.n - } - if max := len(gcw.spanq.ring) / 2; n > max { - n = max +// Returns whether the caller should spin up a new worker. +func (q *spanQueue) put(s objptr) bool { + // The constants below define the period of and volume of + // spans we spill to the spmc chain when the local queue is + // not full. + // + // spillPeriod must be > spillMax, otherwise that sets the + // effective maximum size of our local span queue. Even if + // we have a span ring of size N, but we flush K spans every + // K puts, then K becomes our effective maximum length. When + // spillPeriod > spillMax, then we're always spilling spans + // at a slower rate than we're accumulating them. + const ( + // spillPeriod defines how often to check if we should + // spill some spans, counted in the number of calls to put. + spillPeriod = 64 + + // spillMax defines, at most, how many spans to drain with + // each spill. + spillMax = 16 + ) + + if q.putFast(s) { + // Occasionally try to spill some work to generate parallelism. + q.putsSinceDrain++ + if q.putsSinceDrain >= spillPeriod { + // Reset even if we don't drain, so we don't check every time. + q.putsSinceDrain = 0 + + // Try to drain some spans. Don't bother if there's very + // few of them or there's already spans in the spmc chain. + n := min((q.tail-q.head)/2, spillMax) + if n > 4 && q.chainEmpty() { + q.drain(n) + return true + } + } + return false } - newQ := q.q.popN(n) - if q.q.n == 0 { - q.avail.Store(false) + + // We're out of space. Drain out our local spans. + q.drain(uint32(len(q.ring)) / 2) + if !q.putFast(s) { + throw("failed putFast after drain") } - unlock(&q.lock) + return true +} - s := newQ.pop() - for newQ.n > 0 { - s := newQ.pop() - gcw.spanq.put(makeObjPtr(s.base(), s.scanIdx)) +// flush publishes all spans in the local queue to the spmc chain. +func (q *spanQueue) flush() { + n := q.tail - q.head + if n == 0 { + return } - return makeObjPtr(s.base(), s.scanIdx) + q.drain(n) } -// localSpanQueue is a P-local ring buffer of objptrs that represent spans. -// Accessed without a lock. +// empty returns true if there's no more work on the queue. // -// Multi-consumer, single-producer. The only producer is the P that owns this -// queue, but any other P may consume from it. -// -// This is based on the scheduler runqueues. If making changes there, consider -// also making them here. -type localSpanQueue struct { - head atomic.Uint32 - tail atomic.Uint32 - ring [256]objptr +// Not thread-safe. Must only be called by the owner of q. +func (q *spanQueue) empty() bool { + // Check the local queue for work. + if q.tail-q.head > 0 { + return false + } + return q.chainEmpty() } -// put adds s to the queue. Returns true if put flushed to the global queue -// because it was full. -func (q *localSpanQueue) put(s objptr) (flushed bool) { - for { - h := q.head.Load() // synchronize with consumers - t := q.tail.Load() - if t-h < uint32(len(q.ring)) { - q.ring[t%uint32(len(q.ring))] = s - q.tail.Store(t + 1) // Makes the item avail for consumption. +// chainEmpty returns true if the spmc chain is empty. +// +// Thread-safe. +func (q *spanQueue) chainEmpty() bool { + // Check the rest of the rings for work. + r := (*spanSPMC)(q.chain.tail.Load()) + for r != nil { + if !r.empty() { return false } - if q.putSlow(s, h, t) { - return true - } - // The queue is not full, now the put above must succeed. + r = (*spanSPMC)(r.prev.Load()) } + return true } -// putSlow is a helper for put to move spans to the global queue. -// Returns true on success, false on failure (nothing moved). -func (q *localSpanQueue) putSlow(s objptr, h, t uint32) bool { - var batch [len(q.ring)/2 + 1]objptr +// drain publishes n spans from the local queue to the spmc chain. +func (q *spanQueue) drain(n uint32) { + q.putsSinceDrain = 0 - // First, grab a batch from local queue. - n := t - h - n = n / 2 - if n != uint32(len(q.ring)/2) { - throw("localSpanQueue.putSlow: queue is not full") + if q.chain.head == nil { + // N.B. We target 1024, but this may be bigger if the physical + // page size is bigger, or if we can fit more uintptrs into a + // physical page. See newSpanSPMC docs. + r := newSpanSPMC(1024) + q.chain.head = r + q.chain.tail.StoreNoWB(unsafe.Pointer(r)) } - for i := uint32(0); i < n; i++ { - batch[i] = q.ring[(h+i)%uint32(len(q.ring))] + + // Try to drain some of the queue to the head spmc. + if q.tryDrain(q.chain.head, n) { + return } - if !q.head.CompareAndSwap(h, h+n) { // Commits consume. - return false + // No space. Create a bigger spmc and add it to the chain. + + // Double the size of the next one, up to a maximum. + // + // We double each time so we can avoid taking this slow path + // in the future, which involves a global lock. Ideally we want + // to hit a steady-state where the deepest any queue goes during + // a mark phase can fit in the ring. + // + // However, we still set a maximum on this. We set the maximum + // to something large to amortize the cost of lock acquisition, but + // still at a reasonable size for big heaps and/or a lot of Ps (which + // tend to be correlated). + // + // It's not too bad to burn relatively large-but-fixed amounts of per-P + // memory if we need to deal with really, really deep queues, since the + // constants of proportionality are small. Simultaneously, we want to + // avoid a situation where a single worker ends up queuing O(heap) + // work and then forever retains a queue of that size. + const maxCap = 1 << 20 / goarch.PtrSize + newCap := q.chain.head.cap * 2 + if newCap > maxCap { + newCap = maxCap } - batch[n] = s + newHead := newSpanSPMC(newCap) + if !q.tryDrain(newHead, n) { + throw("failed to put span on newly-allocated spanSPMC") + } + q.chain.head.prev.StoreNoWB(unsafe.Pointer(newHead)) + q.chain.head = newHead +} - work.spanq.putBatch(batch[:]) - return true +// tryDrain attempts to drain n spans from q's local queue to the chain. +// +// Returns whether it succeeded. +func (q *spanQueue) tryDrain(r *spanSPMC, n uint32) bool { + if q.head+n > q.tail { + throw("attempt to drain too many elements") + } + h := r.head.Load() // synchronize with consumers + t := r.tail.Load() + rn := t - h + if rn+n <= r.cap { + for i := uint32(0); i < n; i++ { + *r.slot(t + i) = q.ring[(q.head+i)%uint32(len(q.ring))] + } + r.tail.Store(t + n) // Makes the items avail for consumption. + q.head += n + return true + } + return false +} + +// tryGetFast attempts to get a span from the local queue, but may fail if it's empty, +// returning false. +func (q *spanQueue) tryGetFast() objptr { + if q.tail-q.head == 0 { + return 0 + } + s := q.ring[q.head%uint32(len(q.ring))] + q.head++ + return s } -// get attempts to take a span off the queue. Might fail if the -// queue is empty. May be called by multiple threads, but callers -// are better off using stealFrom to amortize the cost of stealing. -// This method is intended for use by the owner of this queue. -func (q *localSpanQueue) get() objptr { +// steal takes some spans from the ring chain of another span queue. +// +// q == q2 is OK. +func (q *spanQueue) steal(q2 *spanQueue) objptr { + r := (*spanSPMC)(q2.chain.tail.Load()) + if r == nil { + return 0 + } for { - h := q.head.Load() - t := q.tail.Load() - if t == h { + // It's important that we load the next pointer + // *before* popping the tail. In general, r may be + // transiently empty, but if next is non-nil before + // the pop and the pop fails, then r is permanently + // empty, which is the only condition under which it's + // safe to drop r from the chain. + r2 := (*spanSPMC)(r.prev.Load()) + + // Try to refill from one of the rings + if s := q.refill(r); s != 0 { + return s + } + + if r2 == nil { + // This is the only ring. It's empty right + // now, but could be pushed to in the future. return 0 } - s := q.ring[h%uint32(len(q.ring))] - if q.head.CompareAndSwap(h, h+1) { - return s + + // The tail of the chain has been drained, so move on + // to the next ring. Try to drop it from the chain + // so the next consumer doesn't have to look at the empty + // ring again. + if q2.chain.tail.CompareAndSwapNoWB(unsafe.Pointer(r), unsafe.Pointer(r2)) { + r.dead.Store(true) } - } -} -func (q *localSpanQueue) empty() bool { - h := q.head.Load() - t := q.tail.Load() - return t == h + r = r2 + } } -// stealFrom takes spans from q2 and puts them into q1. One span is removed -// from the stolen spans and returned on success. Failure to steal returns a -// zero objptr. -func (q1 *localSpanQueue) stealFrom(q2 *localSpanQueue) objptr { - writeHead := q1.tail.Load() +// refill takes some spans from r and puts them into q's local queue. +// +// One span is removed from the stolen spans and returned on success. +// Failure to steal returns a zero objptr. +// +// steal is thread-safe with respect to r. +func (q *spanQueue) refill(r *spanSPMC) objptr { + if q.tail-q.head != 0 { + throw("steal with local work available") + } + // Steal some spans. var n uint32 for { - h := q2.head.Load() // load-acquire, synchronize with other consumers - t := q2.tail.Load() // load-acquire, synchronize with the producer + h := r.head.Load() // load-acquire, synchronize with other consumers + t := r.tail.Load() // load-acquire, synchronize with the producer n = t - h n = n - n/2 if n == 0 { return 0 } - if n > uint32(len(q2.ring)/2) { // read inconsistent h and t + if n > r.cap { // read inconsistent h and t continue } + n = min(n, uint32(len(q.ring)/2)) for i := uint32(0); i < n; i++ { - c := q2.ring[(h+i)%uint32(len(q2.ring))] - q1.ring[(writeHead+i)%uint32(len(q1.ring))] = c + q.ring[i] = *r.slot(h + i) } - if q2.head.CompareAndSwap(h, h+n) { + if r.head.CompareAndSwap(h, h+n) { break } } - n-- - c := q1.ring[(writeHead+n)%uint32(len(q1.ring))] - if n == 0 { - return c + + // Update local queue head and tail to reflect new buffered values. + q.head = 0 + q.tail = n + + // Pop off the head of the queue and return it. + return q.tryGetFast() +} + +// spanSPMC is a ring buffer of objptrs that represent spans. +// Accessed without a lock. +// +// Single-producer, multi-consumer. The only producer is the P that owns this +// queue, but any other P may consume from it. +// +// ## Invariants for memory management +// +// 1. All spanSPMCs are allocated from mheap_.spanSPMCAlloc. +// 2. All allocated spanSPMCs must be on the work.spanSPMCs list. +// 3. spanSPMCs may only be allocated if gcphase != _GCoff. +// 4. spanSPMCs may only be deallocated if gcphase == _GCoff. +// +// Invariants (3) and (4) ensure that we do not need to concern ourselves with +// tricky reuse issues that stem from not knowing when a thread is truly done +// with a spanSPMC. For example, two threads could load the same spanSPMC from +// the tail of the chain. One thread is then paused while the other steals the +// last few elements off of it. It's not safe to free at that point since the +// other thread will still inspect that spanSPMC, and we have no way of knowing +// without more complex and/or heavyweight synchronization. +// +// Instead, we rely on the global synchronization inherent to GC phases, and +// the fact that spanSPMCs are only ever used during the mark phase, to ensure +// memory safety. This means we temporarily waste some memory, but it's only +// until the end of the mark phase. +type spanSPMC struct { + _ sys.NotInHeap + + // allnext is the link to the next spanSPMC on the work.spanSPMCs list. + // This is used to find and free dead spanSPMCs. Protected by + // work.spanSPMCs.lock. + allnext *spanSPMC + + // dead indicates whether the spanSPMC is no longer in use. + // Protected by the CAS to the prev field of the spanSPMC pointing + // to this spanSPMC. That is, whoever wins that CAS takes ownership + // of marking this spanSPMC as dead. See spanQueue.steal for details. + dead atomic.Bool + + // prev is the next link up a spanQueue's SPMC chain, from tail to head, + // hence the name "prev." Set by a spanQueue's producer, cleared by a + // CAS in spanQueue.steal. + prev atomic.UnsafePointer // *spanSPMC + + // head, tail, cap, and ring together represent a fixed-size SPMC lock-free + // ring buffer of size cap. The ring buffer contains objptr values. + head atomic.Uint32 + tail atomic.Uint32 + cap uint32 // cap(ring)) + ring *objptr +} + +// newSpanSPMC allocates and initializes a new spmc with the provided capacity. +// +// newSpanSPMC may override the capacity with a larger one if the provided one would +// waste memory. +func newSpanSPMC(cap uint32) *spanSPMC { + lock(&work.spanSPMCs.lock) + r := (*spanSPMC)(mheap_.spanSPMCAlloc.alloc()) + r.allnext = work.spanSPMCs.all + work.spanSPMCs.all = r + unlock(&work.spanSPMCs.lock) + + // If cap < the capacity of a single physical page, round up. + pageCap := uint32(physPageSize / goarch.PtrSize) // capacity of a single page + if cap < pageCap { + cap = pageCap } - h := q1.head.Load() - if writeHead-h+n >= uint32(len(q1.ring)) { - throw("localSpanQueue.stealFrom: queue overflow") + if cap&(cap-1) != 0 { + throw("spmc capacity must be a power of 2") } - q1.tail.Store(writeHead + n) - return c + + r.cap = cap + ring := sysAlloc(uintptr(cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys, "GC span queue") + atomic.StorepNoWB(unsafe.Pointer(&r.ring), ring) + return r } -// drain moves all spans in the queue to the global queue. +// empty returns true if the spmc is empty. // -// Returns true if anything was moved. -func (q *localSpanQueue) drain() bool { - var batch [len(q.ring)]objptr +// empty is thread-safe. +func (r *spanSPMC) empty() bool { + h := r.head.Load() + t := r.tail.Load() + return t == h +} - var n uint32 - for { - var h uint32 - for { - h = q.head.Load() - t := q.tail.Load() - n = t - h - if n == 0 { - return false - } - if n <= uint32(len(q.ring)) { - break - } - // Read inconsistent h and t. - } - for i := uint32(0); i < n; i++ { - batch[i] = q.ring[(h+i)%uint32(len(q.ring))] - } - if q.head.CompareAndSwap(h, h+n) { // Commits consume. +// deinit frees any resources the spanSPMC is holding onto and zeroes it. +func (r *spanSPMC) deinit() { + sysFree(unsafe.Pointer(r.ring), uintptr(r.cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys) + r.ring = nil + r.dead.Store(false) + r.prev.StoreNoWB(nil) + r.head.Store(0) + r.tail.Store(0) + r.cap = 0 +} + +// slot returns a pointer to slot i%r.cap. +func (r *spanSPMC) slot(i uint32) *objptr { + idx := uintptr(i & (r.cap - 1)) + return (*objptr)(unsafe.Add(unsafe.Pointer(r.ring), idx*unsafe.Sizeof(objptr(0)))) +} + +// freeSomeSpanSPMCs frees some spanSPMCs back to the OS and returns +// true if it should be called again to free more. +func freeSomeSpanSPMCs(preemptible bool) bool { + // TODO(mknyszek): This is arbitrary, but some kind of limit is necessary + // to help bound delays to cooperatively preempt ourselves. + const batchSize = 64 + + // According to the SPMC memory management invariants, we can only free + // spanSPMCs outside of the mark phase. We ensure we do this in two ways. + // + // 1. We take the work.spanSPMCs lock, which we need anyway. This ensures + // that we are non-preemptible. If this path becomes lock-free, we will + // need to become non-preemptible in some other way. + // 2. Once we are non-preemptible, we check the gcphase, and back out if + // it's not safe. + // + // This way, we ensure that we don't start freeing if we're in the wrong + // phase, and the phase can't change on us while we're freeing. + lock(&work.spanSPMCs.lock) + if gcphase != _GCoff || work.spanSPMCs.all == nil { + unlock(&work.spanSPMCs.lock) + return false + } + rp := &work.spanSPMCs.all + gp := getg() + more := true + for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { + r := *rp + if r == nil { + more = false break } + if r.dead.Load() { + // It's dead. Deinitialize and free it. + *rp = r.allnext + r.deinit() + mheap_.spanSPMCAlloc.free(unsafe.Pointer(r)) + } else { + // Still alive, likely in some P's chain. + // Skip it. + rp = &r.allnext + } } - if !q.empty() { - throw("drained local span queue, but not empty") - } - - work.spanq.putBatch(batch[:n]) - return true + unlock(&work.spanSPMCs.lock) + return more } -// spanQueueSteal attempts to steal a span from another P's local queue. +// tryStealSpan attempts to steal a span from another P's local queue. // // Returns a non-zero objptr on success. -func spanQueueSteal(gcw *gcWork) objptr { +func (w *gcWork) tryStealSpan() objptr { pp := getg().m.p.ptr() for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { + if !work.spanqMask.read(enum.position()) { + continue + } p2 := allp[enum.position()] if pp == p2 { continue } - if s := gcw.spanq.stealFrom(&p2.gcw.spanq); s != 0 { + if s := w.spanq.steal(&p2.gcw.spanq); s != 0 { return s } + // N.B. This is intentionally racy. We may stomp on a mask set by + // a P that just put a bunch of work into its local queue. + // + // This is OK because the ragged barrier in gcMarkDone will set + // the bit on each P if there's local work we missed. This race + // should generally be rare, since the window between noticing + // an empty local queue and this bit being set is quite small. + work.spanqMask.clear(int32(enum.position())) } return 0 } @@ -608,8 +841,7 @@ func scanSpan(p objptr, gcw *gcWork) { atomic.Or8(bytep, mask) gcw.bytesMarked += uint64(elemsize) if debug.gctrace > 1 { - gcw.stats[spanclass.sizeclass()].spansSparseScanned++ - gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++ + gcw.stats[spanclass.sizeclass()].sparseObjsScanned++ } b := spanBase + uintptr(objIndex)*elemsize scanObjectSmall(spanBase, b, elemsize, gcw) @@ -631,11 +863,47 @@ func scanSpan(p objptr, gcw *gcWork) { return } gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize) + + // Check if we have enough density to make a dartboard scan + // worthwhile. If not, just do what scanobject does, but + // localized to the span, using the dartboard. + if !scan.HasFastScanSpanPacked() || objsMarked < int(nelems/8) { + if debug.gctrace > 1 { + gcw.stats[spanclass.sizeclass()].spansSparseScanned++ + gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned += uint64(objsMarked) + } + scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan) + return + } + + // Scan the span. + // + // N.B. Use gcw.ptrBuf as the output buffer. This is a bit different + // from scanObjectsSmall, which puts addresses to dereference. ScanSpanPacked + // on the other hand, fills gcw.ptrBuf with already dereferenced pointers. + nptrs := scan.ScanSpanPacked( + unsafe.Pointer(spanBase), + &gcw.ptrBuf[0], + &toScan, + uintptr(spanclass.sizeclass()), + spanPtrMaskUnsafe(spanBase), + ) + gcw.heapScanWork += int64(objsMarked) * int64(elemsize) + if debug.gctrace > 1 { + // Write down some statistics. gcw.stats[spanclass.sizeclass()].spansDenseScanned++ gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked) } - scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan) + + // Process all the pointers we just got. + for _, p := range gcw.ptrBuf[:nptrs] { + if !tryDeferToSpanScan(p, gcw) { + if obj, span, objIndex := findObject(p, 0, 0); obj != 0 { + greyobject(obj, 0, 0, span, gcw, objIndex) + } + } + } } // spanSetScans sets any unset mark bits that have their mark bits set in the inline mark bits. @@ -798,12 +1066,27 @@ func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr { return read } +// spanPtrMaskUnsafe returns the pointer mask for a span with inline mark bits. +// +// The caller must ensure spanBase is the base of a span that: +// - 1 page in size, +// - Uses inline mark bits, +// - Contains pointers. +func spanPtrMaskUnsafe(spanBase uintptr) *gc.PtrMask { + base := spanBase + gc.PageSize - unsafe.Sizeof(gc.PtrMask{}) - unsafe.Sizeof(spanInlineMarkBits{}) + return (*gc.PtrMask)(unsafe.Pointer(base)) +} + type sizeClassScanStats struct { - spansDenseScanned uint64 - spanObjsDenseScanned uint64 - spansSparseScanned uint64 - spanObjsSparseScanned uint64 - sparseObjsScanned uint64 + spansDenseScanned uint64 // Spans scanned with ScanSpanPacked. + spanObjsDenseScanned uint64 // Objects scanned with ScanSpanPacked. + spansSparseScanned uint64 // Spans scanned with scanObjectsSmall. + spanObjsSparseScanned uint64 // Objects scanned with scanObjectsSmall. + sparseObjsScanned uint64 // Objects scanned with scanobject or scanObjectSmall. + // Note: sparseObjsScanned is sufficient for both cases because + // a particular size class either uses scanobject or scanObjectSmall, + // not both. In the latter case, we also know that there was one + // object scanned per span, so no need for a span counter. } func dumpScanStats() { @@ -852,6 +1135,23 @@ func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) { clear(w.stats[:]) } +// gcMarkWorkAvailable reports whether there's any non-local work available to do. +// +// This is a heavyweight check and must only be used for correctness, not +// as a hint. +func gcMarkWorkAvailable() bool { + if !work.full.empty() { + return true // global work available + } + if work.markrootNext < work.markrootJobs { + return true // root scan work available + } + if work.spanqMask.any() { + return true // stealable local work available + } + return false +} + // scanObject scans the object starting at b, adding pointers to gcw. // b must point to the beginning of a heap object or an oblet. // scanObject consults the GC bitmap for the pointer mask and the diff --git a/src/runtime/mgcmark_nogreenteagc.go b/src/runtime/mgcmark_nogreenteagc.go index 6375773123..e450503291 100644 --- a/src/runtime/mgcmark_nogreenteagc.go +++ b/src/runtime/mgcmark_nogreenteagc.go @@ -54,31 +54,34 @@ func (q *spanInlineMarkBits) tryAcquire() bool { } type spanQueue struct { - _ uint32 // To match alignment padding requirements for atomically-accessed variables in workType. } -func (q *spanQueue) empty() bool { - return true +func (q *spanQueue) flush() { } -func (q *spanQueue) size() int { - return 0 +func (q *spanQueue) empty() bool { + return true } -type localSpanQueue struct { +type spanSPMC struct { + _ sys.NotInHeap } -func (q *localSpanQueue) drain() bool { +func freeSomeSpanSPMCs(preemptible bool) bool { return false } -func (q *localSpanQueue) empty() bool { - return true +type objptr uintptr + +func (w *gcWork) tryGetSpanFast() objptr { + return 0 } -type objptr uintptr +func (w *gcWork) tryGetSpan() objptr { + return 0 +} -func (w *gcWork) tryGetSpan(steal bool) objptr { +func (w *gcWork) tryStealSpan() objptr { return 0 } @@ -116,6 +119,17 @@ func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) { clear(w.stats[:]) } +// gcMarkWorkAvailable reports whether there's any non-local work available to do. +func gcMarkWorkAvailable() bool { + if !work.full.empty() { + return true // global work available + } + if work.markrootNext < work.markrootJobs { + return true // root scan work available + } + return false +} + // scanObject scans the object starting at b, adding pointers to gcw. // b must point to the beginning of a heap object or an oblet. // scanObject consults the GC bitmap for the pointer mask and the diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 044792d6bd..17e2f405e4 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -714,7 +714,7 @@ func (c *gcControllerState) enlistWorker() { // (the scheduler will already prefer to spin up a new // dedicated worker over an idle one). if sched.npidle.Load() != 0 && sched.nmspinning.Load() == 0 { - wakep() + wakep() // Likely to consume our worker request. return } } @@ -767,8 +767,8 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) { gcCPULimiter.update(now) } - if !gcMarkWorkAvailable(pp) { - // No work to be done right now. This can happen at + if !gcShouldScheduleWorker(pp) { + // No good reason to schedule a worker. This can happen at // the end of the mark phase when there are still // assists tapering off. Don't bother running a worker // now because it'll just return immediately. diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index b72cc461ba..364cdb58cc 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -307,6 +307,10 @@ func bgsweep(c chan int) { // N.B. freeSomeWbufs is already batched internally. goschedIfBusy() } + for freeSomeSpanSPMCs(true) { + // N.B. freeSomeSpanSPMCs is already batched internally. + goschedIfBusy() + } lock(&sweep.lock) if !isSweepDone() { // This can happen if a GC runs between diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index ee7eec9ef7..48ac94eecd 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -55,9 +55,10 @@ func init() { // | Priority | Work queue | Restrictions | Function | // |----------------------------------------------------------| // | 1 | Workbufs | P-local | tryGetObjFast | -// | 2 | Span queue | P-local | tryGetSpan(false) | [greenteagc] +// | 2 | Span queue | P-local | tryGetSpanFast | [greenteagc] // | 3 | Workbufs | None | tryGetObj | -// | 4 | Span queue | None | tryGetSpan(true) | [greenteagc] +// | 4 | Span queue | None | tryGetSpan | [greenteagc] +// | 5 | Span queue | None | tryStealSpan | [greenteagc] // +----------------------------------------------------------+ // // The rationale behind this ordering comes from two insights: @@ -80,6 +81,8 @@ func init() { // gcWork may locally hold GC work buffers. This can be done by // disabling preemption (systemstack or acquirem). type gcWork struct { + id int32 // same ID as the parent P + // wbuf1 and wbuf2 are the primary and secondary work buffers. // // This can be thought of as a stack of both work buffers' @@ -103,7 +106,7 @@ type gcWork struct { // spanq is a queue of spans to process. // // Only used if goexperiment.GreenTeaGC. - spanq localSpanQueue + spanq spanQueue // ptrBuf is a temporary buffer used by span scanning. ptrBuf *[pageSize / goarch.PtrSize]uintptr @@ -318,7 +321,18 @@ func (w *gcWork) dispose() { } w.wbuf2 = nil } - if w.spanq.drain() { + if !w.spanq.empty() { + w.spanq.flush() // Flush any local work. + + // There's globally-visible work now, so make everyone aware of it. + // + // Note that we need to make everyone aware even if flush didn't + // flush any local work. The global work was always visible, but + // the bitmap bit may have been unset. + // + // See the comment in tryStealSpan, which explains how it relies + // on this behavior. + work.spanqMask.set(w.id) w.flushedWork = true } if w.bytesMarked != 0 { diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1776206573..049b7798a8 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -213,13 +213,14 @@ type mheap struct { pad [(cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte } - spanalloc fixalloc // allocator for span* - cachealloc fixalloc // allocator for mcache* - specialfinalizeralloc fixalloc // allocator for specialfinalizer* - specialCleanupAlloc fixalloc // allocator for specialCleanup* - specialCheckFinalizerAlloc fixalloc // allocator for specialCheckFinalizer* - specialTinyBlockAlloc fixalloc // allocator for specialTinyBlock* - specialprofilealloc fixalloc // allocator for specialprofile* + spanalloc fixalloc // allocator for span + spanSPMCAlloc fixalloc // allocator for spanSPMC, protected by work.spanSPMCs.lock + cachealloc fixalloc // allocator for mcache + specialfinalizeralloc fixalloc // allocator for specialfinalizer + specialCleanupAlloc fixalloc // allocator for specialCleanup + specialCheckFinalizerAlloc fixalloc // allocator for specialCheckFinalizer + specialTinyBlockAlloc fixalloc // allocator for specialTinyBlock + specialprofilealloc fixalloc // allocator for specialprofile specialReachableAlloc fixalloc // allocator for specialReachable specialPinCounterAlloc fixalloc // allocator for specialPinCounter specialWeakHandleAlloc fixalloc // allocator for specialWeakHandle @@ -793,6 +794,7 @@ func (h *mheap) init() { lockInit(&h.speciallock, lockRankMheapSpecial) h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) + h.spanSPMCAlloc.init(unsafe.Sizeof(spanSPMC{}), nil, nil, &memstats.gcMiscSys) h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys) h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys) h.specialCleanupAlloc.init(unsafe.Sizeof(specialCleanup{}), nil, nil, &memstats.other_sys) @@ -1937,86 +1939,6 @@ func (list *mSpanList) takeAll(other *mSpanList) { other.first, other.last = nil, nil } -// mSpanQueue is like an mSpanList but is FIFO instead of LIFO and may -// be allocated on the stack. (mSpanList can be visible from the mspan -// itself, so it is marked as not-in-heap). -type mSpanQueue struct { - head, tail *mspan - n int -} - -// push adds s to the end of the queue. -func (q *mSpanQueue) push(s *mspan) { - if s.next != nil { - throw("span already on list") - } - if q.tail == nil { - q.tail, q.head = s, s - } else { - q.tail.next = s - q.tail = s - } - q.n++ -} - -// pop removes a span from the head of the queue, if any. -func (q *mSpanQueue) pop() *mspan { - if q.head == nil { - return nil - } - s := q.head - q.head = s.next - s.next = nil - if q.head == nil { - q.tail = nil - } - q.n-- - return s -} - -// takeAll removes all the spans from q2 and adds them to the end of q1, in order. -func (q1 *mSpanQueue) takeAll(q2 *mSpanQueue) { - if q2.head == nil { - return - } - if q1.head == nil { - *q1 = *q2 - } else { - q1.tail.next = q2.head - q1.tail = q2.tail - q1.n += q2.n - } - q2.tail = nil - q2.head = nil - q2.n = 0 -} - -// popN removes n spans from the head of the queue and returns them as a new queue. -func (q *mSpanQueue) popN(n int) mSpanQueue { - var newQ mSpanQueue - if n <= 0 { - return newQ - } - if n >= q.n { - newQ = *q - q.tail = nil - q.head = nil - q.n = 0 - return newQ - } - s := q.head - for range n - 1 { - s = s.next - } - q.n -= n - newQ.head = q.head - newQ.tail = s - newQ.n = n - q.head = s.next - s.next = nil - return newQ -} - const ( // _KindSpecialTinyBlock indicates that a given allocation is a tiny block. // Ordered before KindSpecialFinalizer and KindSpecialCleanup so that it diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 05c60c4ba3..887063638b 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3126,7 +3126,7 @@ func handoffp(pp *p) { return } // if it has GC work, start it straight away - if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) { + if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) { startm(pp, false, false) return } @@ -3507,7 +3507,7 @@ top: // // If we're in the GC mark phase, can safely scan and blacken objects, // and have work to do, run idle-time marking rather than give up the P. - if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() { + if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() { node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) if node != nil { pp.gcMarkWorkerMode = gcMarkWorkerIdleMode @@ -3914,7 +3914,7 @@ func checkIdleGCNoP() (*p, *g) { if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { return nil, nil } - if !gcMarkWorkAvailable(nil) { + if !gcShouldScheduleWorker(nil) { return nil, nil } @@ -5736,6 +5736,7 @@ func setcpuprofilerate(hz int32) { // previously destroyed p, and transitions it to status _Pgcstop. func (pp *p) init(id int32) { pp.id = id + pp.gcw.id = id pp.status = _Pgcstop pp.sudogcache = pp.sudogbuf[:0] pp.deferpool = pp.deferpoolbuf[:0] @@ -5874,8 +5875,6 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now - maskWords := (nprocs + 31) / 32 - // Grow allp if necessary. if nprocs > int32(len(allp)) { // Synchronize with retake, which could be running @@ -5891,19 +5890,9 @@ func procresize(nprocs int32) *p { allp = nallp } - if maskWords <= int32(cap(idlepMask)) { - idlepMask = idlepMask[:maskWords] - timerpMask = timerpMask[:maskWords] - } else { - nidlepMask := make([]uint32, maskWords) - // No need to copy beyond len, old Ps are irrelevant. - copy(nidlepMask, idlepMask) - idlepMask = nidlepMask - - ntimerpMask := make([]uint32, maskWords) - copy(ntimerpMask, timerpMask) - timerpMask = ntimerpMask - } + idlepMask = idlepMask.resize(nprocs) + timerpMask = timerpMask.resize(nprocs) + work.spanqMask = work.spanqMask.resize(nprocs) unlock(&allpLock) } @@ -5966,8 +5955,9 @@ func procresize(nprocs int32) *p { if int32(len(allp)) != nprocs { lock(&allpLock) allp = allp[:nprocs] - idlepMask = idlepMask[:maskWords] - timerpMask = timerpMask[:maskWords] + idlepMask = idlepMask.resize(nprocs) + timerpMask = timerpMask.resize(nprocs) + work.spanqMask = work.spanqMask.resize(nprocs) unlock(&allpLock) } @@ -6906,6 +6896,32 @@ func (p pMask) clear(id int32) { atomic.And(&p[word], ^mask) } +// any returns true if any bit in p is set. +func (p pMask) any() bool { + for i := range p { + if atomic.Load(&p[i]) != 0 { + return true + } + } + return false +} + +// resize resizes the pMask and returns a new one. +// +// The result may alias p, so callers are encouraged to +// discard p. Not safe for concurrent use. +func (p pMask) resize(nprocs int32) pMask { + maskWords := (nprocs + 31) / 32 + + if maskWords <= int32(cap(p)) { + return p[:maskWords] + } + newMask := make([]uint32, maskWords) + // No need to copy beyond len, old Ps are irrelevant. + copy(newMask, p) + return newMask +} + // pidleput puts p on the _Pidle list. now must be a relatively recent call // to nanotime or zero. Returns now or the current time if now was zero. // diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 56f2a00d76..62ad8d1361 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -108,7 +108,7 @@ func (ci *Frames) Next() (frame Frame, more bool) { } funcInfo := findfunc(pc) if !funcInfo.valid() { - if cgoSymbolizer != nil { + if cgoSymbolizerAvailable() { // Pre-expand cgo frames. We could do this // incrementally, too, but there's no way to // avoid allocation in this case anyway. @@ -295,6 +295,8 @@ func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr { // expandCgoFrames expands frame information for pc, known to be // a non-Go function, using the cgoSymbolizer hook. expandCgoFrames // returns nil if pc could not be expanded. +// +// Preconditions: cgoSymbolizerAvailable returns true. func expandCgoFrames(pc uintptr) []Frame { arg := cgoSymbolizerArg{pc: pc} callCgoSymbolizer(&arg) diff --git a/src/runtime/testdata/testprog/setcgotraceback.go b/src/runtime/testdata/testprog/setcgotraceback.go new file mode 100644 index 0000000000..de005027ec --- /dev/null +++ b/src/runtime/testdata/testprog/setcgotraceback.go @@ -0,0 +1,45 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "internal/abi" + "runtime" + "unsafe" +) + +func init() { + register("SetCgoTracebackNoCgo", SetCgoTracebackNoCgo) +} + +func cgoTraceback() { + panic("unexpectedly reached cgo traceback function") +} + +func cgoContext() { + panic("unexpectedly reached cgo context function") +} + +func cgoSymbolizer() { + panic("unexpectedly reached cgo symbolizer function") +} + +// SetCgoTraceback is a no-op in non-cgo binaries. +func SetCgoTracebackNoCgo() { + traceback := unsafe.Pointer(abi.FuncPCABIInternal(cgoTraceback)) + context := unsafe.Pointer(abi.FuncPCABIInternal(cgoContext)) + symbolizer := unsafe.Pointer(abi.FuncPCABIInternal(cgoSymbolizer)) + runtime.SetCgoTraceback(0, traceback, context, symbolizer) + + // In a cgo binary, runtime.(*Frames).Next calls the cgo symbolizer for + // any non-Go frames. Pass in a bogus frame to verify that Next does + // not attempt to call the cgo symbolizer, which would crash in a + // non-cgo binary like this one. + frames := runtime.CallersFrames([]uintptr{0x12345678}) + frames.Next() + + fmt.Println("OK") +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e55..949d48c79a 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -591,7 +591,7 @@ func (u *unwinder) symPC() uintptr { // If the current frame is not a cgo frame or if there's no registered cgo // unwinder, it returns 0. func (u *unwinder) cgoCallers(pcBuf []uintptr) int { - if cgoTraceback == nil || u.frame.fn.funcID != abi.FuncID_cgocallback || u.cgoCtxt < 0 { + if !cgoTracebackAvailable() || u.frame.fn.funcID != abi.FuncID_cgocallback || u.cgoCtxt < 0 { // We don't have a cgo unwinder (typical case), or we do but we're not // in a cgo frame or we're out of cgo context. return 0 @@ -1014,7 +1014,7 @@ func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) { anySymbolized := false stop := false for _, pc := range cgoBuf[:cgoN] { - if cgoSymbolizer == nil { + if !cgoSymbolizerAvailable() { if pr, stop := commitFrame(); stop { break } else if pr { @@ -1249,6 +1249,7 @@ func goroutineheader(gp *g) { print(" (scan)") } if bubble := gp.bubble; bubble != nil && + gpstatus == _Gwaiting && gp.waitreason.isIdleInSynctest() && !stringslite.HasSuffix(status, "(durable)") { // If this isn't a status where the name includes a (durable) @@ -1572,10 +1573,18 @@ func SetCgoTraceback(version int, traceback, context, symbolizer unsafe.Pointer) cgoContext = context cgoSymbolizer = symbolizer - // The context function is called when a C function calls a Go - // function. As such it is only called by C code in runtime/cgo. - if _cgo_set_context_function != nil { - cgocall(_cgo_set_context_function, context) + if _cgo_set_traceback_functions != nil { + type cgoSetTracebackFunctionsArg struct { + traceback unsafe.Pointer + context unsafe.Pointer + symbolizer unsafe.Pointer + } + arg := cgoSetTracebackFunctionsArg{ + traceback: traceback, + context: context, + symbolizer: symbolizer, + } + cgocall(_cgo_set_traceback_functions, noescape(unsafe.Pointer(&arg))) } } @@ -1583,6 +1592,18 @@ var cgoTraceback unsafe.Pointer var cgoContext unsafe.Pointer var cgoSymbolizer unsafe.Pointer +func cgoTracebackAvailable() bool { + // - The traceback function must be registered via SetCgoTraceback. + // - This must be a cgo binary (providing _cgo_call_traceback_function). + return cgoTraceback != nil && _cgo_call_traceback_function != nil +} + +func cgoSymbolizerAvailable() bool { + // - The symbolizer function must be registered via SetCgoTraceback. + // - This must be a cgo binary (providing _cgo_call_symbolizer_function). + return cgoSymbolizer != nil && _cgo_call_symbolizer_function != nil +} + // cgoTracebackArg is the type passed to cgoTraceback. type cgoTracebackArg struct { context uintptr @@ -1609,7 +1630,7 @@ type cgoSymbolizerArg struct { // printCgoTraceback prints a traceback of callers. func printCgoTraceback(callers *cgoCallers) { - if cgoSymbolizer == nil { + if !cgoSymbolizerAvailable() { for _, c := range callers { if c == 0 { break @@ -1634,6 +1655,8 @@ func printCgoTraceback(callers *cgoCallers) { // printOneCgoTraceback prints the traceback of a single cgo caller. // This can print more than one line because of inlining. // It returns the "stop" result of commitFrame. +// +// Preconditions: cgoSymbolizerAvailable returns true. func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *cgoSymbolizerArg) bool { arg.pc = pc for { @@ -1664,6 +1687,8 @@ func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *c } // callCgoSymbolizer calls the cgoSymbolizer function. +// +// Preconditions: cgoSymbolizerAvailable returns true. func callCgoSymbolizer(arg *cgoSymbolizerArg) { call := cgocall if panicking.Load() > 0 || getg().m.curg != getg() { @@ -1677,14 +1702,13 @@ func callCgoSymbolizer(arg *cgoSymbolizerArg) { if asanenabled { asanwrite(unsafe.Pointer(arg), unsafe.Sizeof(cgoSymbolizerArg{})) } - call(cgoSymbolizer, noescape(unsafe.Pointer(arg))) + call(_cgo_call_symbolizer_function, noescape(unsafe.Pointer(arg))) } // cgoContextPCs gets the PC values from a cgo traceback. +// +// Preconditions: cgoTracebackAvailable returns true. func cgoContextPCs(ctxt uintptr, buf []uintptr) { - if cgoTraceback == nil { - return - } call := cgocall if panicking.Load() > 0 || getg().m.curg != getg() { // We do not want to call into the scheduler when panicking @@ -1702,5 +1726,5 @@ func cgoContextPCs(ctxt uintptr, buf []uintptr) { if asanenabled { asanwrite(unsafe.Pointer(&arg), unsafe.Sizeof(arg)) } - call(cgoTraceback, noescape(unsafe.Pointer(&arg))) + call(_cgo_call_traceback_function, noescape(unsafe.Pointer(&arg))) } diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go index 8cbccac673..1dac91311c 100644 --- a/src/runtime/traceback_test.go +++ b/src/runtime/traceback_test.go @@ -8,6 +8,9 @@ import ( "bytes" "fmt" "internal/abi" + "internal/asan" + "internal/msan" + "internal/race" "internal/testenv" "regexp" "runtime" @@ -867,3 +870,15 @@ func TestTracebackGeneric(t *testing.T) { } } } + +func TestSetCgoTracebackNoCgo(t *testing.T) { + if asan.Enabled || msan.Enabled || race.Enabled { + t.Skip("skipped test: sanitizer builds use cgo") + } + + output := runTestProg(t, "testprog", "SetCgoTracebackNoCgo") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} |
