From 7a372affd9eb4af18c3e2a0ce4d61f23ca1305d5 Mon Sep 17 00:00:00 2001 From: Mark Freeman Date: Wed, 13 Aug 2025 14:55:50 -0400 Subject: go/types, types2: rename definedType to declaredType and clarify docs declaredType seems a better name for this function because it is reached when processing a (Named or Alias) type declaration. In both cases, the fromRHS field (rather than the underlying field) will be set. Change-Id: Ibb1cc338e3b0632dc63f9cf2fd9d64cef6f1aaa5 Reviewed-on: https://go-review.googlesource.com/c/go/+/695955 Auto-Submit: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Robert Griesemer --- src/cmd/compile/internal/types2/decl.go | 4 ++-- src/cmd/compile/internal/types2/typexpr.go | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index 91d2492a53..8f196ece61 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -532,7 +532,7 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN check.collectTypeParams(&alias.tparams, tdecl.TParamList) } - rhs = check.definedType(tdecl.Type, obj) + rhs = check.declaredType(tdecl.Type, obj) assert(rhs != nil) alias.fromRHS = rhs @@ -576,7 +576,7 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN check.collectTypeParams(&named.tparams, tdecl.TParamList) } - rhs = check.definedType(tdecl.Type, obj) + rhs = check.declaredType(tdecl.Type, obj) assert(rhs != nil) named.fromRHS = rhs diff --git a/src/cmd/compile/internal/types2/typexpr.go b/src/cmd/compile/internal/types2/typexpr.go index 8601ce6277..303f782ac4 100644 --- a/src/cmd/compile/internal/types2/typexpr.go +++ b/src/cmd/compile/internal/types2/typexpr.go @@ -16,7 +16,7 @@ import ( // ident type-checks identifier e and initializes x with the value or type of e. // If an error occurred, x.mode is set to invalid. -// For the meaning of def, see Checker.definedType, below. +// For the meaning of def, see Checker.declaredType, below. // If wantType is set, the identifier e is expected to denote a type. func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType bool) { x.mode = invalid @@ -149,14 +149,14 @@ func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType // typ type-checks the type expression e and returns its type, or Typ[Invalid]. // The type must not be an (uninstantiated) generic type. func (check *Checker) typ(e syntax.Expr) Type { - return check.definedType(e, nil) + return check.declaredType(e, nil) } // varType type-checks the type expression e and returns its type, or Typ[Invalid]. // The type must not be an (uninstantiated) generic type and it must not be a // constraint interface. func (check *Checker) varType(e syntax.Expr) Type { - typ := check.definedType(e, nil) + typ := check.declaredType(e, nil) check.validVarType(e, typ) return typ } @@ -187,11 +187,11 @@ func (check *Checker) validVarType(e syntax.Expr, typ Type) { }).describef(e, "check var type %s", typ) } -// definedType is like typ but also accepts a type name def. -// If def != nil, e is the type specification for the type named def, declared -// in a type declaration, and def.typ.underlying will be set to the type of e -// before any components of e are type-checked. -func (check *Checker) definedType(e syntax.Expr, def *TypeName) Type { +// declaredType is like typ but also accepts a type name def. +// If def != nil, e is the type specification for the [Alias] or [Named] type +// named def, and def.typ.fromRHS will be set to the [Type] of e immediately +// after its creation. +func (check *Checker) declaredType(e syntax.Expr, def *TypeName) Type { typ := check.typInternal(e, def) assert(isTyped(typ)) if isGeneric(typ) { @@ -230,7 +230,7 @@ func goTypeName(typ Type) string { } // typInternal drives type checking of types. -// Must only be called by definedType or genericType. +// Must only be called by declaredType or genericType. func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) { if check.conf.Trace { check.trace(e0.Pos(), "-- type %s", e0) @@ -296,7 +296,7 @@ func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) { case *syntax.ParenExpr: // Generic types must be instantiated before they can be used in any form. // Consequently, generic types cannot be parenthesized. - return check.definedType(e.X, def) + return check.declaredType(e.X, def) case *syntax.ArrayType: typ := new(Array) -- cgit v1.3-5-g9baa From ddd8558e61c80f51a2c65565f8354d12d3c6a418 Mon Sep 17 00:00:00 2001 From: Mark Freeman Date: Tue, 28 Oct 2025 17:54:52 -0400 Subject: go/types, types2: swap object.color for Checker.objPathIdx The type checker assigns types to objects. An object can be in 1 of 3 states, which are mapped to colors. This is stored as a field on the object. - white : type not known, awaiting processing - grey : type pending, in processing - black : type known, done processing With the addition of Checker.objPathIdx, which maps objects to a path index, presence in the map could signal grey coloring. White and black coloring can be signaled by presence of object.typ (a simple nil check). This change removes the object.color field and its associated methods, replacing it with an equivalent use of Checker.objPathIdx. Checker.objPathIdx is integrated into the existing push and pop methods, while slightly simplifying their signatures. Note that the concept of object coloring remains the same - we are merely changing and simplifying the mechanism which represents the colors. Change-Id: I91fb5e9a59dcb34c08ffc5b4ebc3f20a400094b6 Reviewed-on: https://go-review.googlesource.com/c/go/+/715840 Reviewed-by: Robert Griesemer LUCI-TryBot-Result: Go LUCI Auto-Submit: Mark Freeman --- src/cmd/compile/internal/types2/check.go | 31 +++--- src/cmd/compile/internal/types2/cycles.go | 1 - src/cmd/compile/internal/types2/decl.go | 147 +++++++++---------------- src/cmd/compile/internal/types2/object.go | 54 ++------- src/cmd/compile/internal/types2/scope.go | 2 - src/cmd/compile/internal/types2/sizeof_test.go | 16 +-- src/cmd/compile/internal/types2/universe.go | 8 +- src/go/types/check.go | 31 +++--- src/go/types/cycles.go | 1 - src/go/types/decl.go | 147 +++++++++---------------- src/go/types/object.go | 54 ++------- src/go/types/scope.go | 2 - src/go/types/sizeof_test.go | 16 +-- src/go/types/universe.go | 8 +- 14 files changed, 170 insertions(+), 348 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/types2/check.go b/src/cmd/compile/internal/types2/check.go index 25cda4f73d..42218b4caf 100644 --- a/src/cmd/compile/internal/types2/check.go +++ b/src/cmd/compile/internal/types2/check.go @@ -171,12 +171,13 @@ type Checker struct { usedPkgNames map[*PkgName]bool // set of used package names mono monoGraph // graph for detecting non-monomorphizable instantiation loops - firstErr error // first error encountered - methods map[*TypeName][]*Func // maps package scope type names to associated non-blank (non-interface) methods - untyped map[syntax.Expr]exprInfo // map of expressions without final type - delayed []action // stack of delayed action segments; segments are processed in FIFO order - objPath []Object // path of object dependencies during type inference (for cycle reporting) - cleaners []cleaner // list of types that may need a final cleanup at the end of type-checking + firstErr error // first error encountered + methods map[*TypeName][]*Func // maps package scope type names to associated non-blank (non-interface) methods + untyped map[syntax.Expr]exprInfo // map of expressions without final type + delayed []action // stack of delayed action segments; segments are processed in FIFO order + objPath []Object // path of object dependencies during type-checking (for cycle reporting) + objPathIdx map[Object]int // map of object to object path index during type-checking (for cycle reporting) + cleaners []cleaner // list of types that may need a final cleanup at the end of type-checking // environment within which the current object is type-checked (valid only // for the duration of type-checking a specific object) @@ -248,19 +249,22 @@ func (check *Checker) later(f func()) *action { return &check.delayed[i] } -// push pushes obj onto the object path and returns its index in the path. -func (check *Checker) push(obj Object) int { +// push pushes obj onto the object path and records its index in the path index map. +func (check *Checker) push(obj Object) { + if check.objPathIdx == nil { + check.objPathIdx = make(map[Object]int) + } + check.objPathIdx[obj] = len(check.objPath) check.objPath = append(check.objPath, obj) - return len(check.objPath) - 1 } -// pop pops and returns the topmost object from the object path. -func (check *Checker) pop() Object { +// pop pops an object from the object path and removes it from the path index map. +func (check *Checker) pop() { i := len(check.objPath) - 1 obj := check.objPath[i] - check.objPath[i] = nil + check.objPath[i] = nil // help the garbage collector check.objPath = check.objPath[:i] - return obj + delete(check.objPathIdx, obj) } type cleaner interface { @@ -319,6 +323,7 @@ func (check *Checker) initFiles(files []*syntax.File) { check.untyped = nil check.delayed = nil check.objPath = nil + check.objPathIdx = nil check.cleaners = nil // We must initialize usedVars and usedPkgNames both here and in NewChecker, diff --git a/src/cmd/compile/internal/types2/cycles.go b/src/cmd/compile/internal/types2/cycles.go index fa739a2b84..b916219c97 100644 --- a/src/cmd/compile/internal/types2/cycles.go +++ b/src/cmd/compile/internal/types2/cycles.go @@ -54,7 +54,6 @@ func (check *Checker) directCycle(tname *TypeName, pathIdx map[*TypeName]int) { // tname is marked grey - we have a cycle on the path beginning at start. // Mark tname as invalid. tname.setType(Typ[Invalid]) - tname.setColor(black) // collect type names on cycle var cycle []Object diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index 8f196ece61..2df34f3b94 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -62,114 +62,77 @@ func (check *Checker) objDecl(obj Object, def *TypeName) { if check.indent == 0 { fmt.Println() // empty line between top-level objects for readability } - check.trace(obj.Pos(), "-- checking %s (%s, objPath = %s)", obj, obj.color(), pathString(check.objPath)) + check.trace(obj.Pos(), "-- checking %s (objPath = %s)", obj, pathString(check.objPath)) check.indent++ defer func() { check.indent-- - check.trace(obj.Pos(), "=> %s (%s)", obj, obj.color()) + check.trace(obj.Pos(), "=> %s", obj) }() } - // Checking the declaration of obj means inferring its type - // (and possibly its value, for constants). - // An object's type (and thus the object) may be in one of - // three states which are expressed by colors: + // Checking the declaration of an object means determining its type + // (and also its value for constants). An object (and thus its type) + // may be in 1 of 3 states: // - // - an object whose type is not yet known is painted white (initial color) - // - an object whose type is in the process of being inferred is painted grey - // - an object whose type is fully inferred is painted black + // - not in Checker.objPathIdx and type == nil : type is not yet known (white) + // - in Checker.objPathIdx : type is pending (grey) + // - not in Checker.objPathIdx and type != nil : type is known (black) // - // During type inference, an object's color changes from white to grey - // to black (pre-declared objects are painted black from the start). - // A black object (i.e., its type) can only depend on (refer to) other black - // ones. White and grey objects may depend on white and black objects. - // A dependency on a grey object indicates a cycle which may or may not be - // valid. + // During type-checking, an object changes from white to grey to black. + // Predeclared objects start as black (their type is known without checking). // - // When objects turn grey, they are pushed on the object path (a stack); - // they are popped again when they turn black. Thus, if a grey object (a - // cycle) is encountered, it is on the object path, and all the objects - // it depends on are the remaining objects on that path. Color encoding - // is such that the color value of a grey object indicates the index of - // that object in the object path. - - // During type-checking, white objects may be assigned a type without - // traversing through objDecl; e.g., when initializing constants and - // variables. Update the colors of those objects here (rather than - // everywhere where we set the type) to satisfy the color invariants. - if obj.color() == white && obj.Type() != nil { - obj.setColor(black) - return - } - - switch obj.color() { - case white: - assert(obj.Type() == nil) - // All color values other than white and black are considered grey. - // Because black and white are < grey, all values >= grey are grey. - // Use those values to encode the object's index into the object path. - obj.setColor(grey + color(check.push(obj))) - defer func() { - check.pop().setColor(black) - }() - - case black: - assert(obj.Type() != nil) - return - - default: - // Color values other than white or black are considered grey. - fallthrough - - case grey: - // We have a (possibly invalid) cycle. - // In the existing code, this is marked by a non-nil type - // for the object except for constants and variables whose - // type may be non-nil (known), or nil if it depends on the - // not-yet known initialization value. - // In the former case, set the type to Typ[Invalid] because - // we have an initialization cycle. The cycle error will be - // reported later, when determining initialization order. - // TODO(gri) Report cycle here and simplify initialization - // order code. + // A black object may only depend on (refer to) to other black objects. White + // and grey objects may depend on white or black objects. A dependency on a + // grey object indicates a (possibly invalid) cycle. + // + // When an object is marked grey, it is pushed onto the object path (a stack) + // and its index in the path is recorded in the path index map. It is popped + // and removed from the map when its type is determined (and marked black). + + // If this object is grey, we have a (possibly invalid) cycle. This is signaled + // by a non-nil type for the object, except for constants and variables whose + // type may be non-nil (known), or nil if it depends on a not-yet known + // initialization value. + // + // In the former case, set the type to Typ[Invalid] because we have an + // initialization cycle. The cycle error will be reported later, when + // determining initialization order. + // + // TODO(gri) Report cycle here and simplify initialization order code. + if _, ok := check.objPathIdx[obj]; ok { switch obj := obj.(type) { - case *Const: - if !check.validCycle(obj) || obj.typ == nil { - obj.typ = Typ[Invalid] - } - - case *Var: - if !check.validCycle(obj) || obj.typ == nil { - obj.typ = Typ[Invalid] + case *Const, *Var: + if !check.validCycle(obj) || obj.Type() == nil { + obj.setType(Typ[Invalid]) } - case *TypeName: if !check.validCycle(obj) { - // break cycle - // (without this, calling underlying() - // below may lead to an endless loop - // if we have a cycle for a defined - // (*Named) type) - obj.typ = Typ[Invalid] + obj.setType(Typ[Invalid]) } - case *Func: if !check.validCycle(obj) { - // Don't set obj.typ to Typ[Invalid] here - // because plenty of code type-asserts that - // functions have a *Signature type. Grey - // functions have their type set to an empty - // signature which makes it impossible to + // Don't set type to Typ[Invalid]; plenty of code asserts that + // functions have a *Signature type. Instead, leave the type + // as an empty signature, which makes it impossible to // initialize a variable with the function. } - default: panic("unreachable") } + assert(obj.Type() != nil) return } + if obj.Type() != nil { // black, meaning it's already type-checked + return + } + + // white, meaning it must be type-checked + + check.push(obj) + defer check.pop() + d := check.objMap[obj] if d == nil { check.dump("%v: %s should have been declared", obj.Pos(), obj) @@ -221,8 +184,8 @@ func (check *Checker) validCycle(obj Object) (valid bool) { } // Count cycle objects. - assert(obj.color() >= grey) - start := obj.color() - grey // index of obj in objPath + start, found := check.objPathIdx[obj] + assert(found) cycle := check.objPath[start:] tparCycle := false // if set, the cycle is through a type parameter list nval := 0 // number of (constant or variable) values in the cycle @@ -764,17 +727,8 @@ func (check *Checker) funcDecl(obj *Func, decl *declInfo) { sig := new(Signature) obj.typ = sig // guard against cycles - // Avoid cycle error when referring to method while type-checking the signature. - // This avoids a nuisance in the best case (non-parameterized receiver type) and - // since the method is not a type, we get an error. If we have a parameterized - // receiver type, instantiating the receiver type leads to the instantiation of - // its methods, and we don't want a cycle error in that case. - // TODO(gri) review if this is correct and/or whether we still need this? - saved := obj.color_ - obj.color_ = black fdecl := decl.fdecl check.funcType(sig, fdecl.Recv, fdecl.TParamList, fdecl.Type) - obj.color_ = saved // Set the scope's extent to the complete "func (...) { ... }" // so that Scope.Innermost works correctly. @@ -921,10 +875,9 @@ func (check *Checker) declStmt(list []syntax.Decl) { // the innermost containing block." scopePos := s.Name.Pos() check.declare(check.scope, s.Name, obj, scopePos) - // mark and unmark type before calling typeDecl; its type is still nil (see Checker.objDecl) - obj.setColor(grey + color(check.push(obj))) + check.push(obj) // mark as grey + defer check.pop() check.typeDecl(obj, s, nil) - check.pop().setColor(black) default: check.errorf(s, InvalidSyntaxTree, "unknown syntax.Decl node %T", s) diff --git a/src/cmd/compile/internal/types2/object.go b/src/cmd/compile/internal/types2/object.go index ce129dbf59..dd2d415790 100644 --- a/src/cmd/compile/internal/types2/object.go +++ b/src/cmd/compile/internal/types2/object.go @@ -42,18 +42,12 @@ type Object interface { // 0 for all other objects (including objects in file scopes). order() uint32 - // color returns the object's color. - color() color - // setType sets the type of the object. setType(Type) // setOrder sets the order number of the object. It must be > 0. setOrder(uint32) - // setColor sets the object's color. It must not be white. - setColor(color color) - // setParent sets the parent scope of the object. setParent(*Scope) @@ -102,41 +96,9 @@ type object struct { name string typ Type order_ uint32 - color_ color scopePos_ syntax.Pos } -// color encodes the color of an object (see Checker.objDecl for details). -type color uint32 - -// An object may be painted in one of three colors. -// Color values other than white or black are considered grey. -const ( - white color = iota - black - grey // must be > white and black -) - -func (c color) String() string { - switch c { - case white: - return "white" - case black: - return "black" - default: - return "grey" - } -} - -// colorFor returns the (initial) color for an object depending on -// whether its type t is known or not. -func colorFor(t Type) color { - if t != nil { - return black - } - return white -} - // Parent returns the scope in which the object is declared. // The result is nil for methods and struct fields. func (obj *object) Parent() *Scope { return obj.parent } @@ -164,13 +126,11 @@ func (obj *object) Id() string { return Id(obj.pkg, obj.name) } func (obj *object) String() string { panic("abstract") } func (obj *object) order() uint32 { return obj.order_ } -func (obj *object) color() color { return obj.color_ } func (obj *object) scopePos() syntax.Pos { return obj.scopePos_ } func (obj *object) setParent(parent *Scope) { obj.parent = parent } func (obj *object) setType(typ Type) { obj.typ = typ } func (obj *object) setOrder(order uint32) { assert(order > 0); obj.order_ = order } -func (obj *object) setColor(color color) { assert(color != white); obj.color_ = color } func (obj *object) setScopePos(pos syntax.Pos) { obj.scopePos_ = pos } func (obj *object) sameId(pkg *Package, name string, foldCase bool) bool { @@ -247,7 +207,7 @@ type PkgName struct { // NewPkgName returns a new PkgName object representing an imported package. // The remaining arguments set the attributes found with all Objects. func NewPkgName(pos syntax.Pos, pkg *Package, name string, imported *Package) *PkgName { - return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, black, nopos}, imported} + return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, nopos}, imported} } // Imported returns the package that was imported. @@ -263,7 +223,7 @@ type Const struct { // NewConst returns a new constant with value val. // The remaining arguments set the attributes found with all Objects. func NewConst(pos syntax.Pos, pkg *Package, name string, typ Type, val constant.Value) *Const { - return &Const{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, val} + return &Const{object{nil, pos, pkg, name, typ, 0, nopos}, val} } // Val returns the constant's value. @@ -288,7 +248,7 @@ type TypeName struct { // argument for NewNamed, which will set the TypeName's type as a side- // effect. func NewTypeName(pos syntax.Pos, pkg *Package, name string, typ Type) *TypeName { - return &TypeName{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}} + return &TypeName{object{nil, pos, pkg, name, typ, 0, nopos}} } // NewTypeNameLazy returns a new defined type like NewTypeName, but it @@ -402,7 +362,7 @@ func NewField(pos syntax.Pos, pkg *Package, name string, typ Type, embedded bool // newVar returns a new variable. // The arguments set the attributes found with all Objects. func newVar(kind VarKind, pos syntax.Pos, pkg *Package, name string, typ Type) *Var { - return &Var{object: object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, kind: kind} + return &Var{object: object{nil, pos, pkg, name, typ, 0, nopos}, kind: kind} } // Anonymous reports whether the variable is an embedded field. @@ -452,7 +412,7 @@ func NewFunc(pos syntax.Pos, pkg *Package, name string, sig *Signature) *Func { // as this would violate object.{Type,color} invariants. // TODO(adonovan): propose to disallow NewFunc with nil *Signature. } - return &Func{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, false, nil} + return &Func{object{nil, pos, pkg, name, typ, 0, nopos}, false, nil} } // Signature returns the signature (type) of the function or method. @@ -534,7 +494,7 @@ type Label struct { // NewLabel returns a new label. func NewLabel(pos syntax.Pos, pkg *Package, name string) *Label { - return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid], color_: black}, false} + return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid]}, false} } // A Builtin represents a built-in function. @@ -545,7 +505,7 @@ type Builtin struct { } func newBuiltin(id builtinId) *Builtin { - return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid], color_: black}, id} + return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid]}, id} } // Nil represents the predeclared value nil. diff --git a/src/cmd/compile/internal/types2/scope.go b/src/cmd/compile/internal/types2/scope.go index 566184df73..10c624be2e 100644 --- a/src/cmd/compile/internal/types2/scope.go +++ b/src/cmd/compile/internal/types2/scope.go @@ -217,10 +217,8 @@ func (*lazyObject) Exported() bool { panic("unreachable") } func (*lazyObject) Id() string { panic("unreachable") } func (*lazyObject) String() string { panic("unreachable") } func (*lazyObject) order() uint32 { panic("unreachable") } -func (*lazyObject) color() color { panic("unreachable") } func (*lazyObject) setType(Type) { panic("unreachable") } func (*lazyObject) setOrder(uint32) { panic("unreachable") } -func (*lazyObject) setColor(color color) { panic("unreachable") } func (*lazyObject) setParent(*Scope) { panic("unreachable") } func (*lazyObject) sameId(*Package, string, bool) bool { panic("unreachable") } func (*lazyObject) scopePos() syntax.Pos { panic("unreachable") } diff --git a/src/cmd/compile/internal/types2/sizeof_test.go b/src/cmd/compile/internal/types2/sizeof_test.go index 092e82318a..f206c12fc3 100644 --- a/src/cmd/compile/internal/types2/sizeof_test.go +++ b/src/cmd/compile/internal/types2/sizeof_test.go @@ -36,14 +36,14 @@ func TestSizeof(t *testing.T) { {term{}, 12, 24}, // Objects - {PkgName{}, 60, 96}, - {Const{}, 64, 104}, - {TypeName{}, 56, 88}, - {Var{}, 64, 104}, - {Func{}, 64, 104}, - {Label{}, 60, 96}, - {Builtin{}, 60, 96}, - {Nil{}, 56, 88}, + {PkgName{}, 56, 96}, + {Const{}, 60, 104}, + {TypeName{}, 52, 88}, + {Var{}, 60, 104}, + {Func{}, 60, 104}, + {Label{}, 56, 96}, + {Builtin{}, 56, 96}, + {Nil{}, 52, 88}, // Misc {Scope{}, 60, 104}, diff --git a/src/cmd/compile/internal/types2/universe.go b/src/cmd/compile/internal/types2/universe.go index 332cd174f9..1ecef97d51 100644 --- a/src/cmd/compile/internal/types2/universe.go +++ b/src/cmd/compile/internal/types2/universe.go @@ -98,7 +98,6 @@ func defPredeclaredTypes() { // interface. { universeAnyNoAlias = NewTypeName(nopos, nil, "any", &Interface{complete: true, tset: &topTypeSet}) - universeAnyNoAlias.setColor(black) // ensure that the any TypeName reports a consistent Parent, after // hijacking Universe.Lookup with gotypesalias=0. universeAnyNoAlias.setParent(Universe) @@ -107,7 +106,6 @@ func defPredeclaredTypes() { // into the Universe, but we lean toward the future and insert the Alias // representation. universeAnyAlias = NewTypeName(nopos, nil, "any", nil) - universeAnyAlias.setColor(black) _ = NewAlias(universeAnyAlias, universeAnyNoAlias.Type().Underlying()) // Link TypeName and Alias def(universeAnyAlias) } @@ -115,7 +113,6 @@ func defPredeclaredTypes() { // type error interface{ Error() string } { obj := NewTypeName(nopos, nil, "error", nil) - obj.setColor(black) typ := (*Checker)(nil).newNamed(obj, nil, nil) // error.Error() string @@ -136,7 +133,6 @@ func defPredeclaredTypes() { // type comparable interface{} // marked as comparable { obj := NewTypeName(nopos, nil, "comparable", nil) - obj.setColor(black) typ := (*Checker)(nil).newNamed(obj, nil, nil) // interface{} // marked as comparable @@ -165,7 +161,7 @@ func defPredeclaredConsts() { } func defPredeclaredNil() { - def(&Nil{object{name: "nil", typ: Typ[UntypedNil], color_: black}}) + def(&Nil{object{name: "nil", typ: Typ[UntypedNil]}}) } // A builtinId is the id of a builtin function. @@ -289,7 +285,7 @@ func init() { // a scope. Objects with exported names are inserted in the unsafe package // scope; other objects are inserted in the universe scope. func def(obj Object) { - assert(obj.color() == black) + assert(obj.Type() != nil) name := obj.Name() if strings.Contains(name, " ") { return // nothing to do diff --git a/src/go/types/check.go b/src/go/types/check.go index 44d3ae5586..638b1f6fcc 100644 --- a/src/go/types/check.go +++ b/src/go/types/check.go @@ -191,12 +191,13 @@ type Checker struct { usedPkgNames map[*PkgName]bool // set of used package names mono monoGraph // graph for detecting non-monomorphizable instantiation loops - firstErr error // first error encountered - methods map[*TypeName][]*Func // maps package scope type names to associated non-blank (non-interface) methods - untyped map[ast.Expr]exprInfo // map of expressions without final type - delayed []action // stack of delayed action segments; segments are processed in FIFO order - objPath []Object // path of object dependencies during type inference (for cycle reporting) - cleaners []cleaner // list of types that may need a final cleanup at the end of type-checking + firstErr error // first error encountered + methods map[*TypeName][]*Func // maps package scope type names to associated non-blank (non-interface) methods + untyped map[ast.Expr]exprInfo // map of expressions without final type + delayed []action // stack of delayed action segments; segments are processed in FIFO order + objPath []Object // path of object dependencies during type-checking (for cycle reporting) + objPathIdx map[Object]int // map of object to object path index during type-checking (for cycle reporting) + cleaners []cleaner // list of types that may need a final cleanup at the end of type-checking // environment within which the current object is type-checked (valid only // for the duration of type-checking a specific object) @@ -268,19 +269,22 @@ func (check *Checker) later(f func()) *action { return &check.delayed[i] } -// push pushes obj onto the object path and returns its index in the path. -func (check *Checker) push(obj Object) int { +// push pushes obj onto the object path and records its index in the path index map. +func (check *Checker) push(obj Object) { + if check.objPathIdx == nil { + check.objPathIdx = make(map[Object]int) + } + check.objPathIdx[obj] = len(check.objPath) check.objPath = append(check.objPath, obj) - return len(check.objPath) - 1 } -// pop pops and returns the topmost object from the object path. -func (check *Checker) pop() Object { +// pop pops an object from the object path and removes it from the path index map. +func (check *Checker) pop() { i := len(check.objPath) - 1 obj := check.objPath[i] - check.objPath[i] = nil + check.objPath[i] = nil // help the garbage collector check.objPath = check.objPath[:i] - return obj + delete(check.objPathIdx, obj) } type cleaner interface { @@ -343,6 +347,7 @@ func (check *Checker) initFiles(files []*ast.File) { check.untyped = nil check.delayed = nil check.objPath = nil + check.objPathIdx = nil check.cleaners = nil // We must initialize usedVars and usedPkgNames both here and in NewChecker, diff --git a/src/go/types/cycles.go b/src/go/types/cycles.go index d90e7de39c..bd894258b1 100644 --- a/src/go/types/cycles.go +++ b/src/go/types/cycles.go @@ -57,7 +57,6 @@ func (check *Checker) directCycle(tname *TypeName, pathIdx map[*TypeName]int) { // tname is marked grey - we have a cycle on the path beginning at start. // Mark tname as invalid. tname.setType(Typ[Invalid]) - tname.setColor(black) // collect type names on cycle var cycle []Object diff --git a/src/go/types/decl.go b/src/go/types/decl.go index c35edd8afa..05cc63e01c 100644 --- a/src/go/types/decl.go +++ b/src/go/types/decl.go @@ -63,114 +63,77 @@ func (check *Checker) objDecl(obj Object, def *TypeName) { if check.indent == 0 { fmt.Println() // empty line between top-level objects for readability } - check.trace(obj.Pos(), "-- checking %s (%s, objPath = %s)", obj, obj.color(), pathString(check.objPath)) + check.trace(obj.Pos(), "-- checking %s (objPath = %s)", obj, pathString(check.objPath)) check.indent++ defer func() { check.indent-- - check.trace(obj.Pos(), "=> %s (%s)", obj, obj.color()) + check.trace(obj.Pos(), "=> %s", obj) }() } - // Checking the declaration of obj means inferring its type - // (and possibly its value, for constants). - // An object's type (and thus the object) may be in one of - // three states which are expressed by colors: + // Checking the declaration of an object means determining its type + // (and also its value for constants). An object (and thus its type) + // may be in 1 of 3 states: // - // - an object whose type is not yet known is painted white (initial color) - // - an object whose type is in the process of being inferred is painted grey - // - an object whose type is fully inferred is painted black + // - not in Checker.objPathIdx and type == nil : type is not yet known (white) + // - in Checker.objPathIdx : type is pending (grey) + // - not in Checker.objPathIdx and type != nil : type is known (black) // - // During type inference, an object's color changes from white to grey - // to black (pre-declared objects are painted black from the start). - // A black object (i.e., its type) can only depend on (refer to) other black - // ones. White and grey objects may depend on white and black objects. - // A dependency on a grey object indicates a cycle which may or may not be - // valid. + // During type-checking, an object changes from white to grey to black. + // Predeclared objects start as black (their type is known without checking). // - // When objects turn grey, they are pushed on the object path (a stack); - // they are popped again when they turn black. Thus, if a grey object (a - // cycle) is encountered, it is on the object path, and all the objects - // it depends on are the remaining objects on that path. Color encoding - // is such that the color value of a grey object indicates the index of - // that object in the object path. - - // During type-checking, white objects may be assigned a type without - // traversing through objDecl; e.g., when initializing constants and - // variables. Update the colors of those objects here (rather than - // everywhere where we set the type) to satisfy the color invariants. - if obj.color() == white && obj.Type() != nil { - obj.setColor(black) - return - } - - switch obj.color() { - case white: - assert(obj.Type() == nil) - // All color values other than white and black are considered grey. - // Because black and white are < grey, all values >= grey are grey. - // Use those values to encode the object's index into the object path. - obj.setColor(grey + color(check.push(obj))) - defer func() { - check.pop().setColor(black) - }() - - case black: - assert(obj.Type() != nil) - return - - default: - // Color values other than white or black are considered grey. - fallthrough - - case grey: - // We have a (possibly invalid) cycle. - // In the existing code, this is marked by a non-nil type - // for the object except for constants and variables whose - // type may be non-nil (known), or nil if it depends on the - // not-yet known initialization value. - // In the former case, set the type to Typ[Invalid] because - // we have an initialization cycle. The cycle error will be - // reported later, when determining initialization order. - // TODO(gri) Report cycle here and simplify initialization - // order code. + // A black object may only depend on (refer to) to other black objects. White + // and grey objects may depend on white or black objects. A dependency on a + // grey object indicates a (possibly invalid) cycle. + // + // When an object is marked grey, it is pushed onto the object path (a stack) + // and its index in the path is recorded in the path index map. It is popped + // and removed from the map when its type is determined (and marked black). + + // If this object is grey, we have a (possibly invalid) cycle. This is signaled + // by a non-nil type for the object, except for constants and variables whose + // type may be non-nil (known), or nil if it depends on a not-yet known + // initialization value. + // + // In the former case, set the type to Typ[Invalid] because we have an + // initialization cycle. The cycle error will be reported later, when + // determining initialization order. + // + // TODO(gri) Report cycle here and simplify initialization order code. + if _, ok := check.objPathIdx[obj]; ok { switch obj := obj.(type) { - case *Const: - if !check.validCycle(obj) || obj.typ == nil { - obj.typ = Typ[Invalid] - } - - case *Var: - if !check.validCycle(obj) || obj.typ == nil { - obj.typ = Typ[Invalid] + case *Const, *Var: + if !check.validCycle(obj) || obj.Type() == nil { + obj.setType(Typ[Invalid]) } - case *TypeName: if !check.validCycle(obj) { - // break cycle - // (without this, calling underlying() - // below may lead to an endless loop - // if we have a cycle for a defined - // (*Named) type) - obj.typ = Typ[Invalid] + obj.setType(Typ[Invalid]) } - case *Func: if !check.validCycle(obj) { - // Don't set obj.typ to Typ[Invalid] here - // because plenty of code type-asserts that - // functions have a *Signature type. Grey - // functions have their type set to an empty - // signature which makes it impossible to + // Don't set type to Typ[Invalid]; plenty of code asserts that + // functions have a *Signature type. Instead, leave the type + // as an empty signature, which makes it impossible to // initialize a variable with the function. } - default: panic("unreachable") } + assert(obj.Type() != nil) return } + if obj.Type() != nil { // black, meaning it's already type-checked + return + } + + // white, meaning it must be type-checked + + check.push(obj) // mark as grey + defer check.pop() + d := check.objMap[obj] if d == nil { check.dump("%v: %s should have been declared", obj.Pos(), obj) @@ -222,8 +185,8 @@ func (check *Checker) validCycle(obj Object) (valid bool) { } // Count cycle objects. - assert(obj.color() >= grey) - start := obj.color() - grey // index of obj in objPath + start, found := check.objPathIdx[obj] + assert(found) cycle := check.objPath[start:] tparCycle := false // if set, the cycle is through a type parameter list nval := 0 // number of (constant or variable) values in the cycle @@ -857,17 +820,8 @@ func (check *Checker) funcDecl(obj *Func, decl *declInfo) { sig := new(Signature) obj.typ = sig // guard against cycles - // Avoid cycle error when referring to method while type-checking the signature. - // This avoids a nuisance in the best case (non-parameterized receiver type) and - // since the method is not a type, we get an error. If we have a parameterized - // receiver type, instantiating the receiver type leads to the instantiation of - // its methods, and we don't want a cycle error in that case. - // TODO(gri) review if this is correct and/or whether we still need this? - saved := obj.color_ - obj.color_ = black fdecl := decl.fdecl check.funcType(sig, fdecl.Recv, fdecl.Type) - obj.color_ = saved // Set the scope's extent to the complete "func (...) { ... }" // so that Scope.Innermost works correctly. @@ -980,10 +934,9 @@ func (check *Checker) declStmt(d ast.Decl) { // the innermost containing block." scopePos := d.spec.Name.Pos() check.declare(check.scope, d.spec.Name, obj, scopePos) - // mark and unmark type before calling typeDecl; its type is still nil (see Checker.objDecl) - obj.setColor(grey + color(check.push(obj))) + check.push(obj) // mark as grey + defer check.pop() check.typeDecl(obj, d.spec, nil) - check.pop().setColor(black) default: check.errorf(d.node(), InvalidSyntaxTree, "unknown ast.Decl node %T", d.node()) } diff --git a/src/go/types/object.go b/src/go/types/object.go index 7bf705cb81..57158c1595 100644 --- a/src/go/types/object.go +++ b/src/go/types/object.go @@ -45,18 +45,12 @@ type Object interface { // 0 for all other objects (including objects in file scopes). order() uint32 - // color returns the object's color. - color() color - // setType sets the type of the object. setType(Type) // setOrder sets the order number of the object. It must be > 0. setOrder(uint32) - // setColor sets the object's color. It must not be white. - setColor(color color) - // setParent sets the parent scope of the object. setParent(*Scope) @@ -105,41 +99,9 @@ type object struct { name string typ Type order_ uint32 - color_ color scopePos_ token.Pos } -// color encodes the color of an object (see Checker.objDecl for details). -type color uint32 - -// An object may be painted in one of three colors. -// Color values other than white or black are considered grey. -const ( - white color = iota - black - grey // must be > white and black -) - -func (c color) String() string { - switch c { - case white: - return "white" - case black: - return "black" - default: - return "grey" - } -} - -// colorFor returns the (initial) color for an object depending on -// whether its type t is known or not. -func colorFor(t Type) color { - if t != nil { - return black - } - return white -} - // Parent returns the scope in which the object is declared. // The result is nil for methods and struct fields. func (obj *object) Parent() *Scope { return obj.parent } @@ -167,13 +129,11 @@ func (obj *object) Id() string { return Id(obj.pkg, obj.name) } func (obj *object) String() string { panic("abstract") } func (obj *object) order() uint32 { return obj.order_ } -func (obj *object) color() color { return obj.color_ } func (obj *object) scopePos() token.Pos { return obj.scopePos_ } func (obj *object) setParent(parent *Scope) { obj.parent = parent } func (obj *object) setType(typ Type) { obj.typ = typ } func (obj *object) setOrder(order uint32) { assert(order > 0); obj.order_ = order } -func (obj *object) setColor(color color) { assert(color != white); obj.color_ = color } func (obj *object) setScopePos(pos token.Pos) { obj.scopePos_ = pos } func (obj *object) sameId(pkg *Package, name string, foldCase bool) bool { @@ -250,7 +210,7 @@ type PkgName struct { // NewPkgName returns a new PkgName object representing an imported package. // The remaining arguments set the attributes found with all Objects. func NewPkgName(pos token.Pos, pkg *Package, name string, imported *Package) *PkgName { - return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, black, nopos}, imported} + return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, nopos}, imported} } // Imported returns the package that was imported. @@ -266,7 +226,7 @@ type Const struct { // NewConst returns a new constant with value val. // The remaining arguments set the attributes found with all Objects. func NewConst(pos token.Pos, pkg *Package, name string, typ Type, val constant.Value) *Const { - return &Const{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, val} + return &Const{object{nil, pos, pkg, name, typ, 0, nopos}, val} } // Val returns the constant's value. @@ -291,7 +251,7 @@ type TypeName struct { // argument for NewNamed, which will set the TypeName's type as a side- // effect. func NewTypeName(pos token.Pos, pkg *Package, name string, typ Type) *TypeName { - return &TypeName{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}} + return &TypeName{object{nil, pos, pkg, name, typ, 0, nopos}} } // NewTypeNameLazy returns a new defined type like NewTypeName, but it @@ -405,7 +365,7 @@ func NewField(pos token.Pos, pkg *Package, name string, typ Type, embedded bool) // newVar returns a new variable. // The arguments set the attributes found with all Objects. func newVar(kind VarKind, pos token.Pos, pkg *Package, name string, typ Type) *Var { - return &Var{object: object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, kind: kind} + return &Var{object: object{nil, pos, pkg, name, typ, 0, nopos}, kind: kind} } // Anonymous reports whether the variable is an embedded field. @@ -455,7 +415,7 @@ func NewFunc(pos token.Pos, pkg *Package, name string, sig *Signature) *Func { // as this would violate object.{Type,color} invariants. // TODO(adonovan): propose to disallow NewFunc with nil *Signature. } - return &Func{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, false, nil} + return &Func{object{nil, pos, pkg, name, typ, 0, nopos}, false, nil} } // Signature returns the signature (type) of the function or method. @@ -537,7 +497,7 @@ type Label struct { // NewLabel returns a new label. func NewLabel(pos token.Pos, pkg *Package, name string) *Label { - return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid], color_: black}, false} + return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid]}, false} } // A Builtin represents a built-in function. @@ -548,7 +508,7 @@ type Builtin struct { } func newBuiltin(id builtinId) *Builtin { - return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid], color_: black}, id} + return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid]}, id} } // Nil represents the predeclared value nil. diff --git a/src/go/types/scope.go b/src/go/types/scope.go index 81366df741..e44b097dc5 100644 --- a/src/go/types/scope.go +++ b/src/go/types/scope.go @@ -220,10 +220,8 @@ func (*lazyObject) Exported() bool { panic("unreachable") } func (*lazyObject) Id() string { panic("unreachable") } func (*lazyObject) String() string { panic("unreachable") } func (*lazyObject) order() uint32 { panic("unreachable") } -func (*lazyObject) color() color { panic("unreachable") } func (*lazyObject) setType(Type) { panic("unreachable") } func (*lazyObject) setOrder(uint32) { panic("unreachable") } -func (*lazyObject) setColor(color color) { panic("unreachable") } func (*lazyObject) setParent(*Scope) { panic("unreachable") } func (*lazyObject) sameId(*Package, string, bool) bool { panic("unreachable") } func (*lazyObject) scopePos() token.Pos { panic("unreachable") } diff --git a/src/go/types/sizeof_test.go b/src/go/types/sizeof_test.go index 4ff255ffa0..694ab32462 100644 --- a/src/go/types/sizeof_test.go +++ b/src/go/types/sizeof_test.go @@ -35,14 +35,14 @@ func TestSizeof(t *testing.T) { {term{}, 12, 24}, // Objects - {PkgName{}, 44, 80}, - {Const{}, 48, 88}, - {TypeName{}, 40, 72}, - {Var{}, 48, 88}, - {Func{}, 48, 88}, - {Label{}, 44, 80}, - {Builtin{}, 44, 80}, - {Nil{}, 40, 72}, + {PkgName{}, 40, 80}, + {Const{}, 44, 88}, + {TypeName{}, 36, 72}, + {Var{}, 44, 88}, + {Func{}, 44, 88}, + {Label{}, 40, 80}, + {Builtin{}, 40, 80}, + {Nil{}, 36, 72}, // Misc {Scope{}, 44, 88}, diff --git a/src/go/types/universe.go b/src/go/types/universe.go index 70935dc35f..8d2b99cf17 100644 --- a/src/go/types/universe.go +++ b/src/go/types/universe.go @@ -101,7 +101,6 @@ func defPredeclaredTypes() { // interface. { universeAnyNoAlias = NewTypeName(nopos, nil, "any", &Interface{complete: true, tset: &topTypeSet}) - universeAnyNoAlias.setColor(black) // ensure that the any TypeName reports a consistent Parent, after // hijacking Universe.Lookup with gotypesalias=0. universeAnyNoAlias.setParent(Universe) @@ -110,7 +109,6 @@ func defPredeclaredTypes() { // into the Universe, but we lean toward the future and insert the Alias // representation. universeAnyAlias = NewTypeName(nopos, nil, "any", nil) - universeAnyAlias.setColor(black) _ = NewAlias(universeAnyAlias, universeAnyNoAlias.Type().Underlying()) // Link TypeName and Alias def(universeAnyAlias) } @@ -118,7 +116,6 @@ func defPredeclaredTypes() { // type error interface{ Error() string } { obj := NewTypeName(nopos, nil, "error", nil) - obj.setColor(black) typ := (*Checker)(nil).newNamed(obj, nil, nil) // error.Error() string @@ -139,7 +136,6 @@ func defPredeclaredTypes() { // type comparable interface{} // marked as comparable { obj := NewTypeName(nopos, nil, "comparable", nil) - obj.setColor(black) typ := (*Checker)(nil).newNamed(obj, nil, nil) // interface{} // marked as comparable @@ -168,7 +164,7 @@ func defPredeclaredConsts() { } func defPredeclaredNil() { - def(&Nil{object{name: "nil", typ: Typ[UntypedNil], color_: black}}) + def(&Nil{object{name: "nil", typ: Typ[UntypedNil]}}) } // A builtinId is the id of a builtin function. @@ -292,7 +288,7 @@ func init() { // a scope. Objects with exported names are inserted in the unsafe package // scope; other objects are inserted in the universe scope. func def(obj Object) { - assert(obj.color() == black) + assert(obj.Type() != nil) name := obj.Name() if strings.Contains(name, " ") { return // nothing to do -- cgit v1.3-5-g9baa From 1e5e6663e958dcc9579fb38ffcd8a1999d75128d Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Fri, 7 Nov 2025 00:00:50 +0000 Subject: cmd/compile: remove unnecessary casts and types from riscv64 rules This change shouldn't have any impact on codegen. It removes some unnecessary int8 and int64 casts from the riscv64 rewrite rules. It also removes explicit typing where the types already match: `(OldOp ) => (NewOp )` is the same as `(OldOp) => (NewOp)`. Change-Id: Ic02b65da8f548c8b9ad9ccb6627a03b7bf6f050f Reviewed-on: https://go-review.googlesource.com/c/go/+/719220 Reviewed-by: Junyang Shao Auto-Submit: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Keith Randall Commit-Queue: Junyang Shao --- src/cmd/compile/internal/ssa/_gen/RISCV64.rules | 38 +++++------ src/cmd/compile/internal/ssa/rewriteRISCV64.go | 87 +++++++++++-------------- 2 files changed, 57 insertions(+), 68 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index 646948f2df..6166bd5584 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -689,36 +689,36 @@ (MOVDnop (MOVDconst [c])) => (MOVDconst [c]) // Avoid unnecessary zero and sign extension when right shifting. -(SRAI [x] (MOVWreg y)) && x >= 0 && x <= 31 => (SRAIW [int64(x)] y) -(SRLI [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW [int64(x)] y) +(SRAI [x] (MOVWreg y)) && x >= 0 && x <= 31 => (SRAIW [x] y) +(SRLI [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW [x] y) // Replace right shifts that exceed size of signed type. (SRAI [x] (MOVBreg y)) && x >= 8 => (SRAI [63] (SLLI [56] y)) (SRAI [x] (MOVHreg y)) && x >= 16 => (SRAI [63] (SLLI [48] y)) -(SRAI [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y) +(SRAI [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y) // Eliminate right shifts that exceed size of unsigned type. -(SRLI [x] (MOVBUreg y)) && x >= 8 => (MOVDconst [0]) -(SRLI [x] (MOVHUreg y)) && x >= 16 => (MOVDconst [0]) -(SRLI [x] (MOVWUreg y)) && x >= 32 => (MOVDconst [0]) +(SRLI [x] (MOVBUreg y)) && x >= 8 => (MOVDconst [0]) +(SRLI [x] (MOVHUreg y)) && x >= 16 => (MOVDconst [0]) +(SRLI [x] (MOVWUreg y)) && x >= 32 => (MOVDconst [0]) // Fold constant into immediate instructions where possible. (ADD (MOVDconst [val]) x) && is32Bit(val) && !t.IsPtr() => (ADDI [val] x) (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x) (OR (MOVDconst [val]) x) && is32Bit(val) => (ORI [val] x) (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x) -(ROL x (MOVDconst [val])) => (RORI [int64(int8(-val)&63)] x) -(ROLW x (MOVDconst [val])) => (RORIW [int64(int8(-val)&31)] x) -(ROR x (MOVDconst [val])) => (RORI [int64(val&63)] x) -(RORW x (MOVDconst [val])) => (RORIW [int64(val&31)] x) -(SLL x (MOVDconst [val])) => (SLLI [int64(val&63)] x) -(SRL x (MOVDconst [val])) => (SRLI [int64(val&63)] x) -(SLLW x (MOVDconst [val])) => (SLLIW [int64(val&31)] x) -(SRLW x (MOVDconst [val])) => (SRLIW [int64(val&31)] x) -(SRA x (MOVDconst [val])) => (SRAI [int64(val&63)] x) -(SRAW x (MOVDconst [val])) => (SRAIW [int64(val&31)] x) -(SLT x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTI [val] x) -(SLTU x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTIU [val] x) +(ROL x (MOVDconst [val])) => (RORI [-val&63] x) +(ROLW x (MOVDconst [val])) => (RORIW [-val&31] x) +(ROR x (MOVDconst [val])) => (RORI [val&63] x) +(RORW x (MOVDconst [val])) => (RORIW [val&31] x) +(SLL x (MOVDconst [val])) => (SLLI [val&63] x) +(SLLW x (MOVDconst [val])) => (SLLIW [val&31] x) +(SRL x (MOVDconst [val])) => (SRLI [val&63] x) +(SRLW x (MOVDconst [val])) => (SRLIW [val&31] x) +(SRA x (MOVDconst [val])) => (SRAI [val&63] x) +(SRAW x (MOVDconst [val])) => (SRAIW [val&31] x) +(SLT x (MOVDconst [val])) && is12Bit(val) => (SLTI [val] x) +(SLTU x (MOVDconst [val])) && is12Bit(val) => (SLTIU [val] x) // Replace negated left rotation with right rotation. (ROL x (NEG y)) => (ROR x y) @@ -782,7 +782,7 @@ (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> uint32(x)]) // Combine doubling via addition with shift. -(SLLI [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI [c+1] x) +(SLLI [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI [c+1] x) (SLLI [c] (ADD x x)) && c >= t.Size() * 8 - 1 => (MOVDconst [0]) // SLTI/SLTIU with constants. diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index 191c7b3d48..24fef3fe72 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -7027,7 +7027,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (ROL x (MOVDconst [val])) - // result: (RORI [int64(int8(-val)&63)] x) + // result: (RORI [-val&63] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7035,7 +7035,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64RORI) - v.AuxInt = int64ToAuxInt(int64(int8(-val) & 63)) + v.AuxInt = int64ToAuxInt(-val & 63) v.AddArg(x) return true } @@ -7057,7 +7057,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (ROLW x (MOVDconst [val])) - // result: (RORIW [int64(int8(-val)&31)] x) + // result: (RORIW [-val&31] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7065,7 +7065,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64RORIW) - v.AuxInt = int64ToAuxInt(int64(int8(-val) & 31)) + v.AuxInt = int64ToAuxInt(-val & 31) v.AddArg(x) return true } @@ -7087,7 +7087,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (ROR x (MOVDconst [val])) - // result: (RORI [int64(val&63)] x) + // result: (RORI [val&63] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7095,7 +7095,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64RORI) - v.AuxInt = int64ToAuxInt(int64(val & 63)) + v.AuxInt = int64ToAuxInt(val & 63) v.AddArg(x) return true } @@ -7105,7 +7105,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (RORW x (MOVDconst [val])) - // result: (RORIW [int64(val&31)] x) + // result: (RORIW [val&31] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7113,7 +7113,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64RORIW) - v.AuxInt = int64ToAuxInt(int64(val & 31)) + v.AuxInt = int64ToAuxInt(val & 31) v.AddArg(x) return true } @@ -7212,7 +7212,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SLL x (MOVDconst [val])) - // result: (SLLI [int64(val&63)] x) + // result: (SLLI [val&63] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7220,7 +7220,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SLLI) - v.AuxInt = int64ToAuxInt(int64(val & 63)) + v.AuxInt = int64ToAuxInt(val & 63) v.AddArg(x) return true } @@ -7246,7 +7246,7 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool { } // match: (SLLI [c] (ADD x x)) // cond: c < t.Size() * 8 - 1 - // result: (SLLI [c+1] x) + // result: (SLLI [c+1] x) for { t := v.Type c := auxIntToInt64(v.AuxInt) @@ -7258,7 +7258,6 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool { break } v.reset(OpRISCV64SLLI) - v.Type = t v.AuxInt = int64ToAuxInt(c + 1) v.AddArg(x) return true @@ -7286,7 +7285,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SLLW x (MOVDconst [val])) - // result: (SLLIW [int64(val&31)] x) + // result: (SLLIW [val&31] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7294,7 +7293,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SLLIW) - v.AuxInt = int64ToAuxInt(int64(val & 31)) + v.AuxInt = int64ToAuxInt(val & 31) v.AddArg(x) return true } @@ -7304,7 +7303,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SLT x (MOVDconst [val])) - // cond: val >= -2048 && val <= 2047 + // cond: is12Bit(val) // result: (SLTI [val] x) for { x := v_0 @@ -7312,7 +7311,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool { break } val := auxIntToInt64(v_1.AuxInt) - if !(val >= -2048 && val <= 2047) { + if !(is12Bit(val)) { break } v.reset(OpRISCV64SLTI) @@ -7433,7 +7432,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SLTU x (MOVDconst [val])) - // cond: val >= -2048 && val <= 2047 + // cond: is12Bit(val) // result: (SLTIU [val] x) for { x := v_0 @@ -7441,7 +7440,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool { break } val := auxIntToInt64(v_1.AuxInt) - if !(val >= -2048 && val <= 2047) { + if !(is12Bit(val)) { break } v.reset(OpRISCV64SLTIU) @@ -7555,7 +7554,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SRA x (MOVDconst [val])) - // result: (SRAI [int64(val&63)] x) + // result: (SRAI [val&63] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7563,7 +7562,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SRAI) - v.AuxInt = int64ToAuxInt(int64(val & 63)) + v.AuxInt = int64ToAuxInt(val & 63) v.AddArg(x) return true } @@ -7572,11 +7571,10 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool { func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (SRAI [x] (MOVWreg y)) + // match: (SRAI [x] (MOVWreg y)) // cond: x >= 0 && x <= 31 - // result: (SRAIW [int64(x)] y) + // result: (SRAIW [x] y) for { - t := v.Type x := auxIntToInt64(v.AuxInt) if v_0.Op != OpRISCV64MOVWreg { break @@ -7586,8 +7584,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool { break } v.reset(OpRISCV64SRAIW) - v.Type = t - v.AuxInt = int64ToAuxInt(int64(x)) + v.AuxInt = int64ToAuxInt(x) v.AddArg(y) return true } @@ -7633,7 +7630,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool { v.AddArg(v0) return true } - // match: (SRAI [x] (MOVWreg y)) + // match: (SRAI [x] (MOVWreg y)) // cond: x >= 32 // result: (SRAIW [31] y) for { @@ -7668,7 +7665,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SRAW x (MOVDconst [val])) - // result: (SRAIW [int64(val&31)] x) + // result: (SRAIW [val&31] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7676,7 +7673,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SRAIW) - v.AuxInt = int64ToAuxInt(int64(val & 31)) + v.AuxInt = int64ToAuxInt(val & 31) v.AddArg(x) return true } @@ -7686,7 +7683,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SRL x (MOVDconst [val])) - // result: (SRLI [int64(val&63)] x) + // result: (SRLI [val&63] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7694,7 +7691,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SRLI) - v.AuxInt = int64ToAuxInt(int64(val & 63)) + v.AuxInt = int64ToAuxInt(val & 63) v.AddArg(x) return true } @@ -7702,11 +7699,10 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool { } func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool { v_0 := v.Args[0] - // match: (SRLI [x] (MOVWUreg y)) + // match: (SRLI [x] (MOVWUreg y)) // cond: x >= 0 && x <= 31 - // result: (SRLIW [int64(x)] y) + // result: (SRLIW [x] y) for { - t := v.Type x := auxIntToInt64(v.AuxInt) if v_0.Op != OpRISCV64MOVWUreg { break @@ -7716,16 +7712,14 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool { break } v.reset(OpRISCV64SRLIW) - v.Type = t - v.AuxInt = int64ToAuxInt(int64(x)) + v.AuxInt = int64ToAuxInt(x) v.AddArg(y) return true } - // match: (SRLI [x] (MOVBUreg y)) + // match: (SRLI [x] (MOVBUreg y)) // cond: x >= 8 - // result: (MOVDconst [0]) + // result: (MOVDconst [0]) for { - t := v.Type x := auxIntToInt64(v.AuxInt) if v_0.Op != OpRISCV64MOVBUreg { break @@ -7734,15 +7728,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool { break } v.reset(OpRISCV64MOVDconst) - v.Type = t v.AuxInt = int64ToAuxInt(0) return true } - // match: (SRLI [x] (MOVHUreg y)) + // match: (SRLI [x] (MOVHUreg y)) // cond: x >= 16 - // result: (MOVDconst [0]) + // result: (MOVDconst [0]) for { - t := v.Type x := auxIntToInt64(v.AuxInt) if v_0.Op != OpRISCV64MOVHUreg { break @@ -7751,15 +7743,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool { break } v.reset(OpRISCV64MOVDconst) - v.Type = t v.AuxInt = int64ToAuxInt(0) return true } - // match: (SRLI [x] (MOVWUreg y)) + // match: (SRLI [x] (MOVWUreg y)) // cond: x >= 32 - // result: (MOVDconst [0]) + // result: (MOVDconst [0]) for { - t := v.Type x := auxIntToInt64(v.AuxInt) if v_0.Op != OpRISCV64MOVWUreg { break @@ -7768,7 +7758,6 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool { break } v.reset(OpRISCV64MOVDconst) - v.Type = t v.AuxInt = int64ToAuxInt(0) return true } @@ -7790,7 +7779,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SRLW x (MOVDconst [val])) - // result: (SRLIW [int64(val&31)] x) + // result: (SRLIW [val&31] x) for { x := v_0 if v_1.Op != OpRISCV64MOVDconst { @@ -7798,7 +7787,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool { } val := auxIntToInt64(v_1.AuxInt) v.reset(OpRISCV64SRLIW) - v.AuxInt = int64ToAuxInt(int64(val & 31)) + v.AuxInt = int64ToAuxInt(val & 31) v.AddArg(x) return true } -- cgit v1.3-5-g9baa From 0a569528ea355099af864f7612c3fa1973df30e4 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 26 Aug 2025 21:17:36 +0100 Subject: cmd/compile: optimize comparisons with single bit difference Optimize comparisons with constants that only differ by 1 bit (i.e. a power of 2). For example: x == 4 || x == 6 -> x|2 == 6 x != 1 && x != 5 -> x|4 != 5 Change-Id: Ic61719e5118446d21cf15652d9da22f7d95b2a15 Reviewed-on: https://go-review.googlesource.com/c/go/+/719420 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Auto-Submit: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/generic.rules | 6 + src/cmd/compile/internal/ssa/fuse.go | 8 +- src/cmd/compile/internal/ssa/fuse_comparisons.go | 45 +++ src/cmd/compile/internal/ssa/rewritegeneric.go | 352 +++++++++++++++++++++++ test/codegen/fuse.go | 120 ++++++++ 5 files changed, 530 insertions(+), 1 deletion(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules index 7e3aba1e5e..6efead03ad 100644 --- a/src/cmd/compile/internal/ssa/_gen/generic.rules +++ b/src/cmd/compile/internal/ssa/_gen/generic.rules @@ -337,6 +337,12 @@ (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 [c-d-1]) (Sub16 x (Const16 [d+1]))) (OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) => ((Less|Leq)8U (Const8 [c-d-1]) (Sub8 x (Const8 [d+1]))) +// single bit difference: ( x != c && x != d ) -> ( x|(c^d) != c ) +(AndB (Neq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Neq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Neq(64|32|16|8) (Or(64|32|16|8) x (Const(64|32|16|8) [c^d])) cv) + +// single bit difference: ( x == c || x == d ) -> ( x|(c^d) == c ) +(OrB (Eq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Eq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Eq(64|32|16|8) (Or(64|32|16|8) x (Const(64|32|16|8) [c^d])) cv) + // NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) ) (OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x)) (OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y)) diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go index 0cee91b532..e95064c1df 100644 --- a/src/cmd/compile/internal/ssa/fuse.go +++ b/src/cmd/compile/internal/ssa/fuse.go @@ -10,7 +10,9 @@ import ( ) // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck). -func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) } +func fuseEarly(f *Func) { + fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeSingleBitDifference|fuseTypeNanCheck) +} // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect). func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) } @@ -21,6 +23,7 @@ const ( fuseTypePlain fuseType = 1 << iota fuseTypeIf fuseTypeIntInRange + fuseTypeSingleBitDifference fuseTypeNanCheck fuseTypeBranchRedirect fuseTypeShortCircuit @@ -41,6 +44,9 @@ func fuse(f *Func, typ fuseType) { if typ&fuseTypeIntInRange != 0 { changed = fuseIntInRange(b) || changed } + if typ&fuseTypeSingleBitDifference != 0 { + changed = fuseSingleBitDifference(b) || changed + } if typ&fuseTypeNanCheck != 0 { changed = fuseNanCheck(b) || changed } diff --git a/src/cmd/compile/internal/ssa/fuse_comparisons.go b/src/cmd/compile/internal/ssa/fuse_comparisons.go index b6eb8fcb90..898c034485 100644 --- a/src/cmd/compile/internal/ssa/fuse_comparisons.go +++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go @@ -19,6 +19,14 @@ func fuseNanCheck(b *Block) bool { return fuseComparisons(b, canOptNanCheck) } +// fuseSingleBitDifference replaces the short-circuit operators between equality checks with +// constants that only differ by a single bit. For example, it would convert +// `if x == 4 || x == 6 { ... }` into `if (x == 4) | (x == 6) { ... }`. Rewrite rules can +// then optimize these using a bitwise operation, in this case generating `if x|2 == 6 { ... }`. +func fuseSingleBitDifference(b *Block) bool { + return fuseComparisons(b, canOptSingleBitDifference) +} + // fuseComparisons looks for control graphs that match this pattern: // // p - predecessor @@ -229,3 +237,40 @@ func canOptNanCheck(x, y *Value, op Op) bool { } return false } + +// canOptSingleBitDifference returns true if x op y matches either: +// +// v == c || v == d +// v != c && v != d +// +// Where c and d are constant values that differ by a single bit. +func canOptSingleBitDifference(x, y *Value, op Op) bool { + if x.Op != y.Op { + return false + } + switch x.Op { + case OpEq64, OpEq32, OpEq16, OpEq8: + if op != OpOrB { + return false + } + case OpNeq64, OpNeq32, OpNeq16, OpNeq8: + if op != OpAndB { + return false + } + default: + return false + } + + xi := getConstIntArgIndex(x) + if xi < 0 { + return false + } + yi := getConstIntArgIndex(y) + if yi < 0 { + return false + } + if x.Args[xi^1] != y.Args[yi^1] { + return false + } + return oneBit(x.Args[xi].AuxInt ^ y.Args[yi].AuxInt) +} diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index fd5139c0bb..2428f17947 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -5332,6 +5332,182 @@ func rewriteValuegeneric_OpAndB(v *Value) bool { } break } + // match: (AndB (Neq64 x cv:(Const64 [c])) (Neq64 x (Const64 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Neq64 (Or64 x (Const64 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeq64 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst64 { + continue + } + c := auxIntToInt64(cv.AuxInt) + if v_1.Op != OpNeq64 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst64 { + continue + } + d := auxIntToInt64(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpNeq64) + v0 := b.NewValue0(v.Pos, OpOr64, x.Type) + v1 := b.NewValue0(v.Pos, OpConst64, x.Type) + v1.AuxInt = int64ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (AndB (Neq32 x cv:(Const32 [c])) (Neq32 x (Const32 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Neq32 (Or32 x (Const32 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeq32 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst32 { + continue + } + c := auxIntToInt32(cv.AuxInt) + if v_1.Op != OpNeq32 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst32 { + continue + } + d := auxIntToInt32(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpNeq32) + v0 := b.NewValue0(v.Pos, OpOr32, x.Type) + v1 := b.NewValue0(v.Pos, OpConst32, x.Type) + v1.AuxInt = int32ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (AndB (Neq16 x cv:(Const16 [c])) (Neq16 x (Const16 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Neq16 (Or16 x (Const16 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeq16 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst16 { + continue + } + c := auxIntToInt16(cv.AuxInt) + if v_1.Op != OpNeq16 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst16 { + continue + } + d := auxIntToInt16(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpNeq16) + v0 := b.NewValue0(v.Pos, OpOr16, x.Type) + v1 := b.NewValue0(v.Pos, OpConst16, x.Type) + v1.AuxInt = int16ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (AndB (Neq8 x cv:(Const8 [c])) (Neq8 x (Const8 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Neq8 (Or8 x (Const8 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeq8 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst8 { + continue + } + c := auxIntToInt8(cv.AuxInt) + if v_1.Op != OpNeq8 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst8 { + continue + } + d := auxIntToInt8(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpNeq8) + v0 := b.NewValue0(v.Pos, OpOr8, x.Type) + v1 := b.NewValue0(v.Pos, OpConst8, x.Type) + v1.AuxInt = int8ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } return false } func rewriteValuegeneric_OpArraySelect(v *Value) bool { @@ -23242,6 +23418,182 @@ func rewriteValuegeneric_OpOrB(v *Value) bool { } break } + // match: (OrB (Eq64 x cv:(Const64 [c])) (Eq64 x (Const64 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Eq64 (Or64 x (Const64 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpEq64 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst64 { + continue + } + c := auxIntToInt64(cv.AuxInt) + if v_1.Op != OpEq64 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst64 { + continue + } + d := auxIntToInt64(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpEq64) + v0 := b.NewValue0(v.Pos, OpOr64, x.Type) + v1 := b.NewValue0(v.Pos, OpConst64, x.Type) + v1.AuxInt = int64ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (OrB (Eq32 x cv:(Const32 [c])) (Eq32 x (Const32 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Eq32 (Or32 x (Const32 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpEq32 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst32 { + continue + } + c := auxIntToInt32(cv.AuxInt) + if v_1.Op != OpEq32 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst32 { + continue + } + d := auxIntToInt32(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpEq32) + v0 := b.NewValue0(v.Pos, OpOr32, x.Type) + v1 := b.NewValue0(v.Pos, OpConst32, x.Type) + v1.AuxInt = int32ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (OrB (Eq16 x cv:(Const16 [c])) (Eq16 x (Const16 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Eq16 (Or16 x (Const16 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpEq16 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst16 { + continue + } + c := auxIntToInt16(cv.AuxInt) + if v_1.Op != OpEq16 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst16 { + continue + } + d := auxIntToInt16(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpEq16) + v0 := b.NewValue0(v.Pos, OpOr16, x.Type) + v1 := b.NewValue0(v.Pos, OpConst16, x.Type) + v1.AuxInt = int16ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } + // match: (OrB (Eq8 x cv:(Const8 [c])) (Eq8 x (Const8 [d]))) + // cond: c|d == c && oneBit(c^d) + // result: (Eq8 (Or8 x (Const8 [c^d])) cv) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpEq8 { + continue + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + cv := v_0_1 + if cv.Op != OpConst8 { + continue + } + c := auxIntToInt8(cv.AuxInt) + if v_1.Op != OpEq8 { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 { + if x != v_1_0 || v_1_1.Op != OpConst8 { + continue + } + d := auxIntToInt8(v_1_1.AuxInt) + if !(c|d == c && oneBit(c^d)) { + continue + } + v.reset(OpEq8) + v0 := b.NewValue0(v.Pos, OpOr8, x.Type) + v1 := b.NewValue0(v.Pos, OpConst8, x.Type) + v1.AuxInt = int8ToAuxInt(c ^ d) + v0.AddArg2(x, v1) + v.AddArg2(v0, cv) + return true + } + } + } + break + } // match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c]))) // result: (Not (Leq64F y x)) for { diff --git a/test/codegen/fuse.go b/test/codegen/fuse.go index 4fbb03bef8..e5a28549dc 100644 --- a/test/codegen/fuse.go +++ b/test/codegen/fuse.go @@ -198,6 +198,126 @@ func ui4d(c <-chan uint8) { } } +// ------------------------------------ // +// single bit difference (conjunction) // +// ------------------------------------ // + +func sisbc64(c <-chan int64) { + // amd64: "ORQ [$]2," + // riscv64: "ORI [$]2," + for x := <-c; x != 4 && x != 6; x = <-c { + } +} + +func sisbc32(c <-chan int32) { + // amd64: "ORL [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x != -1 && x != -5; x = <-c { + } +} + +func sisbc16(c <-chan int16) { + // amd64: "ORL [$]32," + // riscv64: "ORI [$]32," + for x := <-c; x != 16 && x != 48; x = <-c { + } +} + +func sisbc8(c <-chan int8) { + // amd64: "ORL [$]16," + // riscv64: "ORI [$]16," + for x := <-c; x != -15 && x != -31; x = <-c { + } +} + +func uisbc64(c <-chan uint64) { + // amd64: "ORQ [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x != 1 && x != 5; x = <-c { + } +} + +func uisbc32(c <-chan uint32) { + // amd64: "ORL [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x != 2 && x != 6; x = <-c { + } +} + +func uisbc16(c <-chan uint16) { + // amd64: "ORL [$]32," + // riscv64: "ORI [$]32," + for x := <-c; x != 16 && x != 48; x = <-c { + } +} + +func uisbc8(c <-chan uint8) { + // amd64: "ORL [$]64," + // riscv64: "ORI [$]64," + for x := <-c; x != 64 && x != 0; x = <-c { + } +} + +// ------------------------------------ // +// single bit difference (disjunction) // +// ------------------------------------ // + +func sisbd64(c <-chan int64) { + // amd64: "ORQ [$]2," + // riscv64: "ORI [$]2," + for x := <-c; x == 4 || x == 6; x = <-c { + } +} + +func sisbd32(c <-chan int32) { + // amd64: "ORL [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x == -1 || x == -5; x = <-c { + } +} + +func sisbd16(c <-chan int16) { + // amd64: "ORL [$]32," + // riscv64: "ORI [$]32," + for x := <-c; x == 16 || x == 48; x = <-c { + } +} + +func sisbd8(c <-chan int8) { + // amd64: "ORL [$]16," + // riscv64: "ORI [$]16," + for x := <-c; x == -15 || x == -31; x = <-c { + } +} + +func uisbd64(c <-chan uint64) { + // amd64: "ORQ [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x == 1 || x == 5; x = <-c { + } +} + +func uisbd32(c <-chan uint32) { + // amd64: "ORL [$]4," + // riscv64: "ORI [$]4," + for x := <-c; x == 2 || x == 6; x = <-c { + } +} + +func uisbd16(c <-chan uint16) { + // amd64: "ORL [$]32," + // riscv64: "ORI [$]32," + for x := <-c; x == 16 || x == 48; x = <-c { + } +} + +func uisbd8(c <-chan uint8) { + // amd64: "ORL [$]64," + // riscv64: "ORI [$]64," + for x := <-c; x == 64 || x == 0; x = <-c { + } +} + // -------------------------------------// // merge NaN checks // // ------------------------------------ // -- cgit v1.3-5-g9baa From 2cdcc4150bc577e0b40a9cedaaa7c8301f2860cd Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Fri, 14 Nov 2025 12:47:35 +0800 Subject: cmd/compile: fold negation into multiplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit goos: linux goarch: riscv64 pkg: cmd/compile/internal/test cpu: Spacemit(R) X60 │ /root/mul.base.log │ /root/mul.new.log │ │ sec/op │ sec/op vs base │ MulNeg 6.426µ ± 0% 4.501µ ± 0% -29.96% (p=0.000 n=10) Mul2Neg 9.000µ ± 0% 6.431µ ± 0% -28.54% (p=0.000 n=10) Mul2 1.263µ ± 0% 1.263µ ± 0% ~ (p=1.000 n=10) MulNeg2 1.577µ ± 0% 1.577µ ± 0% ~ (p=0.211 n=10) geomean 3.276µ 2.756µ -15.89% goos: linux goarch: amd64 pkg: cmd/compile/internal/test cpu: AMD EPYC 7532 32-Core Processor │ /root/base │ /root/new │ │ sec/op │ sec/op vs base │ MulNeg 691.9n ± 1% 319.4n ± 0% -53.83% (p=0.000 n=10) Mul2Neg 630.0n ± 0% 629.6n ± 0% -0.07% (p=0.000 n=10) Mul2 438.1n ± 0% 438.1n ± 0% ~ (p=0.728 n=10) MulNeg2 439.3n ± 0% 439.4n ± 0% ~ (p=0.656 n=10) geomean 538.2n 443.6n -17.58% Change-Id: Ice8e6c8d1e8e3009ba8a0b1b689205174e199019 Reviewed-on: https://go-review.googlesource.com/c/go/+/720180 Reviewed-by: abner chenc Reviewed-by: Keith Randall Reviewed-by: Junyang Shao Reviewed-by: Joel Sing Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Auto-Submit: Keith Randall --- src/cmd/compile/internal/ssa/_gen/LOONG64.rules | 3 - src/cmd/compile/internal/ssa/_gen/generic.rules | 7 +- src/cmd/compile/internal/ssa/rewriteLOONG64.go | 39 ------ src/cmd/compile/internal/ssa/rewritegeneric.go | 156 ++++++++++++++++++++++++ test/codegen/arithmetic.go | 12 +- 5 files changed, 171 insertions(+), 46 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 9691296043..2beba0b1c5 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -743,9 +743,6 @@ (MULV x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)} -(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c])) -(MULV (NEGV x) (NEGV y)) => (MULV x y) - (ADDV x0 x1:(SLLVconst [c] y)) && x1.Uses == 1 && c > 0 && c <= 4 => (ADDshiftLLV x0 y [c]) // fold constant in ADDshift op diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules index 6efead03ad..e09cd31c31 100644 --- a/src/cmd/compile/internal/ssa/_gen/generic.rules +++ b/src/cmd/compile/internal/ssa/_gen/generic.rules @@ -195,6 +195,11 @@ // Convert x * -1 to -x. (Mul(8|16|32|64) (Const(8|16|32|64) [-1]) x) => (Neg(8|16|32|64) x) +// Convert -x * c to x * -c +(Mul(8|16|32|64) (Const(8|16|32|64) [c]) (Neg(8|16|32|64) x)) => (Mul(8|16|32|64) x (Const(8|16|32|64) [-c])) + +(Mul(8|16|32|64) (Neg(8|16|32|64) x) (Neg(8|16|32|64) y)) => (Mul(8|16|32|64) x y) + // DeMorgan's Laws (And(8|16|32|64) (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (Or(8|16|32|64) x y)) (Or(8|16|32|64) (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (And(8|16|32|64) x y)) @@ -2228,4 +2233,4 @@ (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => x (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => (Not x) (Eq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => x -(Eq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x) \ No newline at end of file +(Eq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x) diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 4262d4e0fb..bf2dd114a9 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -5866,7 +5866,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config - typ := &b.Func.Config.Types // match: (MULV _ (MOVVconst [0])) // result: (MOVVconst [0]) for { @@ -5911,44 +5910,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { } break } - // match: (MULV (NEGV x) (MOVVconst [c])) - // result: (MULV x (MOVVconst [-c])) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpLOONG64NEGV { - continue - } - x := v_0.Args[0] - if v_1.Op != OpLOONG64MOVVconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpLOONG64MULV) - v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(-c) - v.AddArg2(x, v0) - return true - } - break - } - // match: (MULV (NEGV x) (NEGV y)) - // result: (MULV x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpLOONG64NEGV { - continue - } - x := v_0.Args[0] - if v_1.Op != OpLOONG64NEGV { - continue - } - y := v_1.Args[0] - v.reset(OpLOONG64MULV) - v.AddArg2(x, y) - return true - } - break - } // match: (MULV (MOVVconst [c]) (MOVVconst [d])) // result: (MOVVconst [c*d]) for { diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 2428f17947..1621153b43 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -16786,6 +16786,45 @@ func rewriteValuegeneric_OpMul16(v *Value) bool { } break } + // match: (Mul16 (Const16 [c]) (Neg16 x)) + // result: (Mul16 x (Const16 [-c])) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpConst16 { + continue + } + t := v_0.Type + c := auxIntToInt16(v_0.AuxInt) + if v_1.Op != OpNeg16 { + continue + } + x := v_1.Args[0] + v.reset(OpMul16) + v0 := b.NewValue0(v.Pos, OpConst16, t) + v0.AuxInt = int16ToAuxInt(-c) + v.AddArg2(x, v0) + return true + } + break + } + // match: (Mul16 (Neg16 x) (Neg16 y)) + // result: (Mul16 x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeg16 { + continue + } + x := v_0.Args[0] + if v_1.Op != OpNeg16 { + continue + } + y := v_1.Args[0] + v.reset(OpMul16) + v.AddArg2(x, y) + return true + } + break + } // match: (Mul16 (Const16 [c]) (Add16 (Const16 [d]) x)) // cond: !isPowerOfTwo(c) // result: (Add16 (Const16 [c*d]) (Mul16 (Const16 [c]) x)) @@ -16997,6 +17036,45 @@ func rewriteValuegeneric_OpMul32(v *Value) bool { } break } + // match: (Mul32 (Const32 [c]) (Neg32 x)) + // result: (Mul32 x (Const32 [-c])) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpConst32 { + continue + } + t := v_0.Type + c := auxIntToInt32(v_0.AuxInt) + if v_1.Op != OpNeg32 { + continue + } + x := v_1.Args[0] + v.reset(OpMul32) + v0 := b.NewValue0(v.Pos, OpConst32, t) + v0.AuxInt = int32ToAuxInt(-c) + v.AddArg2(x, v0) + return true + } + break + } + // match: (Mul32 (Neg32 x) (Neg32 y)) + // result: (Mul32 x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeg32 { + continue + } + x := v_0.Args[0] + if v_1.Op != OpNeg32 { + continue + } + y := v_1.Args[0] + v.reset(OpMul32) + v.AddArg2(x, y) + return true + } + break + } // match: (Mul32 (Const32 [c]) (Add32 (Const32 [d]) x)) // cond: !isPowerOfTwo(c) // result: (Add32 (Const32 [c*d]) (Mul32 (Const32 [c]) x)) @@ -17369,6 +17447,45 @@ func rewriteValuegeneric_OpMul64(v *Value) bool { } break } + // match: (Mul64 (Const64 [c]) (Neg64 x)) + // result: (Mul64 x (Const64 [-c])) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpConst64 { + continue + } + t := v_0.Type + c := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpNeg64 { + continue + } + x := v_1.Args[0] + v.reset(OpMul64) + v0 := b.NewValue0(v.Pos, OpConst64, t) + v0.AuxInt = int64ToAuxInt(-c) + v.AddArg2(x, v0) + return true + } + break + } + // match: (Mul64 (Neg64 x) (Neg64 y)) + // result: (Mul64 x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeg64 { + continue + } + x := v_0.Args[0] + if v_1.Op != OpNeg64 { + continue + } + y := v_1.Args[0] + v.reset(OpMul64) + v.AddArg2(x, y) + return true + } + break + } // match: (Mul64 (Const64 [c]) (Add64 (Const64 [d]) x)) // cond: !isPowerOfTwo(c) // result: (Add64 (Const64 [c*d]) (Mul64 (Const64 [c]) x)) @@ -17741,6 +17858,45 @@ func rewriteValuegeneric_OpMul8(v *Value) bool { } break } + // match: (Mul8 (Const8 [c]) (Neg8 x)) + // result: (Mul8 x (Const8 [-c])) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpConst8 { + continue + } + t := v_0.Type + c := auxIntToInt8(v_0.AuxInt) + if v_1.Op != OpNeg8 { + continue + } + x := v_1.Args[0] + v.reset(OpMul8) + v0 := b.NewValue0(v.Pos, OpConst8, t) + v0.AuxInt = int8ToAuxInt(-c) + v.AddArg2(x, v0) + return true + } + break + } + // match: (Mul8 (Neg8 x) (Neg8 y)) + // result: (Mul8 x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpNeg8 { + continue + } + x := v_0.Args[0] + if v_1.Op != OpNeg8 { + continue + } + y := v_1.Args[0] + v.reset(OpMul8) + v.AddArg2(x, y) + return true + } + break + } // match: (Mul8 (Const8 [c]) (Add8 (Const8 [d]) x)) // cond: !isPowerOfTwo(c) // result: (Add8 (Const8 [c*d]) (Mul8 (Const8 [c]) x)) diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 42d5d2ef65..6b2c5529e1 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -318,13 +318,19 @@ func MergeMuls5(a, n int) int { // Multiplications folded negation func FoldNegMul(a int) int { - // loong64:"SUBVU" "ALSLV [$]2" "ALSLV [$]1" - return (-a) * 11 + // amd64:"IMUL3Q [$]-11" -"NEGQ" + // arm64:"MOVD [$]-11" "MUL" -"NEG" + // loong64:"ALSLV [$]2" "SUBVU" "ALSLV [$]4" + // riscv64:"MOV [$]-11" "MUL" -"NEG" + return -a * 11 } func Fold2NegMul(a, b int) int { + // amd64:"IMULQ" -"NEGQ" + // arm64:"MUL" -"NEG" // loong64:"MULV" -"SUBVU R[0-9], R0," - return (-a) * (-b) + // riscv64:"MUL" -"NEG" + return -a * -b } // -------------- // -- cgit v1.3-5-g9baa From a0e738c657d33e2a648838546812fb50cf42e41d Mon Sep 17 00:00:00 2001 From: Mark Ryan Date: Thu, 13 Nov 2025 12:28:29 +0100 Subject: cmd/compile/internal: remove incorrect riscv64 SLTI rule The rule (SLTI [x] (ORI [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0]) is incorrect as it only generates correct code if the unknown value being compared is >= 0. If the unknown value is < 0 the rule will incorrectly produce a constant value of 0, whereas the code optimized away by the rule would have produced a value of 1. A new test that causes the faulty rule to generate incorrect code is also added to ensure that the error does not return. Change-Id: I69224e0776596f1b9538acf9dacf9009d305f966 Reviewed-on: https://go-review.googlesource.com/c/go/+/720220 Reviewed-by: Meng Zhuo Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Joel Sing Auto-Submit: Keith Randall Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/RISCV64.rules | 1 - src/cmd/compile/internal/ssa/rewriteRISCV64.go | 16 ---------------- src/cmd/compile/internal/test/testdata/arith_test.go | 14 ++++++++++++++ 3 files changed, 14 insertions(+), 17 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index 6166bd5584..13a8cab3b5 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -792,7 +792,6 @@ // SLTI/SLTIU with known outcomes. (SLTI [x] (ANDI [y] _)) && y >= 0 && int64(y) < int64(x) => (MOVDconst [1]) (SLTIU [x] (ANDI [y] _)) && y >= 0 && uint64(y) < uint64(x) => (MOVDconst [1]) -(SLTI [x] (ORI [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0]) (SLTIU [x] (ORI [y] _)) && y >= 0 && uint64(y) >= uint64(x) => (MOVDconst [0]) // SLT/SLTU with known outcomes. diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index 24fef3fe72..284d88967b 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -7362,22 +7362,6 @@ func rewriteValueRISCV64_OpRISCV64SLTI(v *Value) bool { v.AuxInt = int64ToAuxInt(1) return true } - // match: (SLTI [x] (ORI [y] _)) - // cond: y >= 0 && int64(y) >= int64(x) - // result: (MOVDconst [0]) - for { - x := auxIntToInt64(v.AuxInt) - if v_0.Op != OpRISCV64ORI { - break - } - y := auxIntToInt64(v_0.AuxInt) - if !(y >= 0 && int64(y) >= int64(x)) { - break - } - v.reset(OpRISCV64MOVDconst) - v.AuxInt = int64ToAuxInt(0) - return true - } return false } func rewriteValueRISCV64_OpRISCV64SLTIU(v *Value) bool { diff --git a/src/cmd/compile/internal/test/testdata/arith_test.go b/src/cmd/compile/internal/test/testdata/arith_test.go index 8984cd3e26..34ac73c068 100644 --- a/src/cmd/compile/internal/test/testdata/arith_test.go +++ b/src/cmd/compile/internal/test/testdata/arith_test.go @@ -444,6 +444,19 @@ func testBitwiseRshU_ssa(a uint32, b, c uint32) uint32 { return a >> b >> c } +//go:noinline +func orLt_ssa(x int) bool { + y := x - x + return (x | 2) < y +} + +// test riscv64 SLTI rules +func testSetIfLessThan(t *testing.T) { + if want, got := true, orLt_ssa(-7); got != want { + t.Errorf("orLt_ssa(-7) = %t want %t", got, want) + } +} + //go:noinline func testShiftCX_ssa() int { v1 := uint8(3) @@ -977,6 +990,7 @@ func TestArithmetic(t *testing.T) { testRegallocCVSpill(t) testSubqToNegq(t) testBitwiseLogic(t) + testSetIfLessThan(t) testOcom(t) testLrot(t) testShiftCX(t) -- cgit v1.3-5-g9baa From 03ed43988ff7f7671094d8c455532de7f2242e70 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Sat, 14 Jun 2025 20:10:50 -0700 Subject: cmd/compile: allow multi-field structs to be stored directly in interfaces If the struct is a bunch of 0-sized fields and one pointer field. Merged revert-of-revert for 4 CLs. original revert 681937 695016 693415 694996 693615 695015 694195 694995 Fixes #74092 Update #74888 Update #74908 Update #74935 (updated issues are bugs in the last attempt at this) Change-Id: I32246d49b8bac3bb080972dc06ab432a5480d560 Reviewed-on: https://go-review.googlesource.com/c/go/+/714421 Auto-Submit: Keith Randall Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/dec.rules | 9 +++-- src/cmd/compile/internal/ssa/_gen/generic.rules | 6 ++- src/cmd/compile/internal/ssa/expand_calls.go | 9 ++++- src/cmd/compile/internal/ssa/rewrite.go | 14 +++++++ src/cmd/compile/internal/ssa/rewritedec.go | 52 +++++++++++++++++++++---- src/cmd/compile/internal/ssa/rewritegeneric.go | 45 ++++++++++++++++----- src/cmd/compile/internal/types/type.go | 21 +--------- src/internal/abi/type.go | 4 +- src/reflect/type.go | 6 +-- src/reflect/value.go | 11 ++++++ test/fixedbugs/issue74888.go | 20 ++++++++++ test/fixedbugs/issue74908.go | 24 ++++++++++++ test/fixedbugs/issue74935.go | 19 +++++++++ 13 files changed, 191 insertions(+), 49 deletions(-) create mode 100644 test/fixedbugs/issue74888.go create mode 100644 test/fixedbugs/issue74908.go create mode 100644 test/fixedbugs/issue74935.go (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/dec.rules b/src/cmd/compile/internal/ssa/_gen/dec.rules index 9f6dc36975..fce0026211 100644 --- a/src/cmd/compile/internal/ssa/_gen/dec.rules +++ b/src/cmd/compile/internal/ssa/_gen/dec.rules @@ -97,8 +97,10 @@ // Helpers for expand calls // Some of these are copied from generic.rules -(IMake _typ (StructMake val)) => (IMake _typ val) -(StructSelect [0] (IData x)) => (IData x) +(IMake _typ (StructMake ___)) => imakeOfStructMake(v) +(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x) +(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake) +(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0) (StructSelect [i] x:(StructMake ___)) => x.Args[i] @@ -109,7 +111,7 @@ // More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr)) // There, result of the StructSelect is an Array (not a pointer) and // the pre-rewrite input to the ArraySelect is a struct, not a pointer. -(StructSelect [0] x) && x.Type.IsPtrShaped() => x +(StructSelect x) && x.Type.IsPtrShaped() => x (ArraySelect [0] x) && x.Type.IsPtrShaped() => x // These, too. Bits is bits. @@ -119,6 +121,7 @@ (Store _ (StructMake ___) _) => rewriteStructStore(v) +(IMake _typ (ArrayMake1 val)) => (IMake _typ val) (ArraySelect (ArrayMake1 x)) => x (ArraySelect [0] (IData x)) => (IData x) diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules index e09cd31c31..372e89863d 100644 --- a/src/cmd/compile/internal/ssa/_gen/generic.rules +++ b/src/cmd/compile/internal/ssa/_gen/generic.rules @@ -944,8 +944,10 @@ @x.Block (Load (OffPtr [t.FieldOff(int(i))] ptr) mem) // Putting struct{*byte} and similar into direct interfaces. -(IMake _typ (StructMake val)) => (IMake _typ val) -(StructSelect [0] (IData x)) => (IData x) +(IMake _typ (StructMake ___)) => imakeOfStructMake(v) +(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x) +(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake) +(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0) // un-SSAable values use mem->mem copies (Store {t} dst (Load src mem) mem) && !CanSSA(t) => diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index 8a5b364c2f..6afce759b2 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -423,7 +423,14 @@ func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value, if a.Op == OpIMake { data := a.Args[1] for data.Op == OpStructMake || data.Op == OpArrayMake1 { - data = data.Args[0] + // A struct make might have a few zero-sized fields. + // Use the pointer-y one we know is there. + for _, a := range data.Args { + if a.Type.Size() > 0 { + data = a + break + } + } } return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type)) } diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 07308973b1..af2568ae89 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -2772,3 +2772,17 @@ func panicBoundsCCToAux(p PanicBoundsCC) Aux { func isDictArgSym(sym Sym) bool { return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName } + +// When v is (IMake typ (StructMake ...)), convert to +// (IMake typ arg) where arg is the pointer-y argument to +// the StructMake (there must be exactly one). +func imakeOfStructMake(v *Value) *Value { + var arg *Value + for _, a := range v.Args[1].Args { + if a.Type.Size() > 0 { + arg = a + break + } + } + return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg) +} diff --git a/src/cmd/compile/internal/ssa/rewritedec.go b/src/cmd/compile/internal/ssa/rewritedec.go index 16d0269210..c45034ead0 100644 --- a/src/cmd/compile/internal/ssa/rewritedec.go +++ b/src/cmd/compile/internal/ssa/rewritedec.go @@ -279,11 +279,20 @@ func rewriteValuedec_OpIData(v *Value) bool { func rewriteValuedec_OpIMake(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (IMake _typ (StructMake val)) + // match: (IMake _typ (StructMake ___)) + // result: imakeOfStructMake(v) + for { + if v_1.Op != OpStructMake { + break + } + v.copyOf(imakeOfStructMake(v)) + return true + } + // match: (IMake _typ (ArrayMake1 val)) // result: (IMake _typ val) for { _typ := v_0 - if v_1.Op != OpStructMake || len(v_1.Args) != 1 { + if v_1.Op != OpArrayMake1 { break } val := v_1.Args[0] @@ -839,17 +848,47 @@ func rewriteValuedec_OpStructMake(v *Value) bool { func rewriteValuedec_OpStructSelect(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (StructSelect [0] (IData x)) + // match: (StructSelect (IData x)) + // cond: v.Type.Size() > 0 // result: (IData x) for { - if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData { + if v_0.Op != OpIData { break } x := v_0.Args[0] + if !(v.Type.Size() > 0) { + break + } v.reset(OpIData) v.AddArg(x) return true } + // match: (StructSelect (IData x)) + // cond: v.Type.Size() == 0 && v.Type.IsStruct() + // result: (StructMake) + for { + if v_0.Op != OpIData { + break + } + if !(v.Type.Size() == 0 && v.Type.IsStruct()) { + break + } + v.reset(OpStructMake) + return true + } + // match: (StructSelect (IData x)) + // cond: v.Type.Size() == 0 && v.Type.IsArray() + // result: (ArrayMake0) + for { + if v_0.Op != OpIData { + break + } + if !(v.Type.Size() == 0 && v.Type.IsArray()) { + break + } + v.reset(OpArrayMake0) + return true + } // match: (StructSelect [i] x:(StructMake ___)) // result: x.Args[i] for { @@ -861,13 +900,10 @@ func rewriteValuedec_OpStructSelect(v *Value) bool { v.copyOf(x.Args[i]) return true } - // match: (StructSelect [0] x) + // match: (StructSelect x) // cond: x.Type.IsPtrShaped() // result: x for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } x := v_0 if !(x.Type.IsPtrShaped()) { break diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 1621153b43..f0560671ac 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -8985,16 +8985,13 @@ func rewriteValuegeneric_OpFloor(v *Value) bool { func rewriteValuegeneric_OpIMake(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (IMake _typ (StructMake val)) - // result: (IMake _typ val) + // match: (IMake _typ (StructMake ___)) + // result: imakeOfStructMake(v) for { - _typ := v_0 - if v_1.Op != OpStructMake || len(v_1.Args) != 1 { + if v_1.Op != OpStructMake { break } - val := v_1.Args[0] - v.reset(OpIMake) - v.AddArg2(_typ, val) + v.copyOf(imakeOfStructMake(v)) return true } // match: (IMake _typ (ArrayMake1 val)) @@ -32109,17 +32106,47 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool { v0.AddArg2(v1, mem) return true } - // match: (StructSelect [0] (IData x)) + // match: (StructSelect (IData x)) + // cond: v.Type.Size() > 0 // result: (IData x) for { - if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData { + if v_0.Op != OpIData { break } x := v_0.Args[0] + if !(v.Type.Size() > 0) { + break + } v.reset(OpIData) v.AddArg(x) return true } + // match: (StructSelect (IData x)) + // cond: v.Type.Size() == 0 && v.Type.IsStruct() + // result: (StructMake) + for { + if v_0.Op != OpIData { + break + } + if !(v.Type.Size() == 0 && v.Type.IsStruct()) { + break + } + v.reset(OpStructMake) + return true + } + // match: (StructSelect (IData x)) + // cond: v.Type.Size() == 0 && v.Type.IsArray() + // result: (ArrayMake0) + for { + if v_0.Op != OpIData { + break + } + if !(v.Type.Size() == 0 && v.Type.IsArray()) { + break + } + v.reset(OpArrayMake0) + return true + } return false } func rewriteValuegeneric_OpSub16(v *Value) bool { diff --git a/src/cmd/compile/internal/types/type.go b/src/cmd/compile/internal/types/type.go index 8de589bae3..2f5de288c2 100644 --- a/src/cmd/compile/internal/types/type.go +++ b/src/cmd/compile/internal/types/type.go @@ -1822,26 +1822,7 @@ func IsReflexive(t *Type) bool { // Can this type be stored directly in an interface word? // Yes, if the representation is a single pointer. func IsDirectIface(t *Type) bool { - switch t.Kind() { - case TPTR: - // Pointers to notinheap types must be stored indirectly. See issue 42076. - return !t.Elem().NotInHeap() - case TCHAN, - TMAP, - TFUNC, - TUNSAFEPTR: - return true - - case TARRAY: - // Array of 1 direct iface type can be direct. - return t.NumElem() == 1 && IsDirectIface(t.Elem()) - - case TSTRUCT: - // Struct with 1 field of direct iface type can be direct. - return t.NumFields() == 1 && IsDirectIface(t.Field(0).Type) - } - - return false + return t.Size() == int64(PtrSize) && PtrDataSize(t) == int64(PtrSize) } // IsInterfaceMethod reports whether (field) m is diff --git a/src/internal/abi/type.go b/src/internal/abi/type.go index 7f44a9de56..243b787cfc 100644 --- a/src/internal/abi/type.go +++ b/src/internal/abi/type.go @@ -121,8 +121,8 @@ const ( TFlagGCMaskOnDemand TFlag = 1 << 4 // TFlagDirectIface means that a value of this type is stored directly - // in the data field of an interface, instead of indirectly. Normally - // this means the type is pointer-ish. + // in the data field of an interface, instead of indirectly. + // This flag is just a cached computation of Size_ == PtrBytes == goarch.PtrSize. TFlagDirectIface TFlag = 1 << 5 // Leaving this breadcrumb behind for dlv. It should not be used, and no diff --git a/src/reflect/type.go b/src/reflect/type.go index 9b8726824e..914b5443f3 100644 --- a/src/reflect/type.go +++ b/src/reflect/type.go @@ -2528,8 +2528,7 @@ func StructOf(fields []StructField) Type { } switch { - case len(fs) == 1 && fs[0].Typ.IsDirectIface(): - // structs of 1 direct iface type can be direct + case typ.Size_ == goarch.PtrSize && typ.PtrBytes == goarch.PtrSize: typ.TFlag |= abi.TFlagDirectIface default: typ.TFlag &^= abi.TFlagDirectIface @@ -2698,8 +2697,7 @@ func ArrayOf(length int, elem Type) Type { } switch { - case length == 1 && typ.IsDirectIface(): - // array of 1 direct iface type can be direct + case array.Size_ == goarch.PtrSize && array.PtrBytes == goarch.PtrSize: array.TFlag |= abi.TFlagDirectIface default: array.TFlag &^= abi.TFlagDirectIface diff --git a/src/reflect/value.go b/src/reflect/value.go index b5d5aa8bf2..a82d976c47 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -1279,6 +1279,17 @@ func (v Value) Field(i int) Value { fl |= flagStickyRO } } + if fl&flagIndir == 0 && typ.Size() == 0 { + // Special case for picking a field out of a direct struct. + // A direct struct must have a pointer field and possibly a + // bunch of zero-sized fields. We must return the zero-sized + // fields indirectly, as only ptr-shaped things can be direct. + // See issue 74935. + // We use nil instead of v.ptr as it doesn't matter and + // we can avoid pinning a possibly now-unused object. + return Value{typ, nil, fl | flagIndir} + } + // Either flagIndir is set and v.ptr points at struct, // or flagIndir is not set and v.ptr is the actual struct data. // In the former case, we want v.ptr + offset. diff --git a/test/fixedbugs/issue74888.go b/test/fixedbugs/issue74888.go new file mode 100644 index 0000000000..a0083adb9c --- /dev/null +++ b/test/fixedbugs/issue74888.go @@ -0,0 +1,20 @@ +// compile + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +type P struct { + q struct{} + p *int +} + +func f(x any) { + h(x.(P)) +} + +//go:noinline +func h(P) { +} diff --git a/test/fixedbugs/issue74908.go b/test/fixedbugs/issue74908.go new file mode 100644 index 0000000000..cb2e951768 --- /dev/null +++ b/test/fixedbugs/issue74908.go @@ -0,0 +1,24 @@ +// compile + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +type Type struct { + any +} + +type typeObject struct { + e struct{} + b *byte +} + +func f(b *byte) Type { + return Type{ + typeObject{ + b: b, + }, + } +} diff --git a/test/fixedbugs/issue74935.go b/test/fixedbugs/issue74935.go new file mode 100644 index 0000000000..1f6f718b02 --- /dev/null +++ b/test/fixedbugs/issue74935.go @@ -0,0 +1,19 @@ +// run + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "reflect" + +type W struct { + E struct{} + X *byte +} + +func main() { + w := reflect.ValueOf(W{}) + _ = w.Field(0).Interface() +} -- cgit v1.3-5-g9baa From aea881230dcc640ad730d3759423104074577756 Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Fri, 14 Nov 2025 14:59:36 -0500 Subject: std: fix printf("%q", int) mistakes For #72850 Change-Id: I07e64f05c82a34b1dadb9a72e16f5045e68cbd24 Reviewed-on: https://go-review.googlesource.com/c/go/+/720642 Auto-Submit: Alan Donovan Reviewed-by: Dmitri Shuralyov Reviewed-by: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ir/fmt.go | 2 +- src/cmd/vet/testdata/print/print.go | 2 +- src/crypto/rsa/equal_test.go | 2 +- src/crypto/tls/bogo_shim_test.go | 2 +- src/database/sql/fakedb_test.go | 2 +- src/fmt/scan_test.go | 2 +- src/internal/pkgbits/pkgbits_test.go | 2 +- src/os/os_test.go | 2 +- src/reflect/all_test.go | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ir/fmt.go b/src/cmd/compile/internal/ir/fmt.go index ae4ff62652..eb64cce47b 100644 --- a/src/cmd/compile/internal/ir/fmt.go +++ b/src/cmd/compile/internal/ir/fmt.go @@ -574,7 +574,7 @@ func exprFmt(n Node, s fmt.State, prec int) { // Special case for rune constants. if typ == types.RuneType || typ == types.UntypedRune { if x, ok := constant.Uint64Val(val); ok && x <= utf8.MaxRune { - fmt.Fprintf(s, "%q", x) + fmt.Fprintf(s, "%q", rune(x)) return } } diff --git a/src/cmd/vet/testdata/print/print.go b/src/cmd/vet/testdata/print/print.go index 3761da420b..9145c12fb8 100644 --- a/src/cmd/vet/testdata/print/print.go +++ b/src/cmd/vet/testdata/print/print.go @@ -75,7 +75,7 @@ func PrintfTests() { fmt.Printf("%b %b %b %b", 3e9, x, fslice, c) fmt.Printf("%o %o", 3, i) fmt.Printf("%p", p) - fmt.Printf("%q %q %q %q", 3, i, 'x', r) + fmt.Printf("%q %q %q", rune(3), 'x', r) fmt.Printf("%s %s %s", "hi", s, []byte{65}) fmt.Printf("%t %t", true, b) fmt.Printf("%T %T", 3, i) diff --git a/src/crypto/rsa/equal_test.go b/src/crypto/rsa/equal_test.go index 435429c3d1..39a9cdc86c 100644 --- a/src/crypto/rsa/equal_test.go +++ b/src/crypto/rsa/equal_test.go @@ -21,7 +21,7 @@ func TestEqual(t *testing.T) { t.Errorf("public key is not equal to itself: %v", public) } if !public.Equal(crypto.Signer(private).Public().(*rsa.PublicKey)) { - t.Errorf("private.Public() is not Equal to public: %q", public) + t.Errorf("private.Public() is not Equal to public: %v", public) } if !private.Equal(private) { t.Errorf("private key is not equal to itself: %v", private) diff --git a/src/crypto/tls/bogo_shim_test.go b/src/crypto/tls/bogo_shim_test.go index 8f171d9259..02a943c13c 100644 --- a/src/crypto/tls/bogo_shim_test.go +++ b/src/crypto/tls/bogo_shim_test.go @@ -461,7 +461,7 @@ func bogoShim() { } if *expectVersion != 0 && cs.Version != uint16(*expectVersion) { - log.Fatalf("expected ssl version %q, got %q", uint16(*expectVersion), cs.Version) + log.Fatalf("expected ssl version %d, got %d", *expectVersion, cs.Version) } if *declineALPN && cs.NegotiatedProtocol != "" { log.Fatal("unexpected ALPN protocol") diff --git a/src/database/sql/fakedb_test.go b/src/database/sql/fakedb_test.go index 003e6c6298..e5f0459303 100644 --- a/src/database/sql/fakedb_test.go +++ b/src/database/sql/fakedb_test.go @@ -969,7 +969,7 @@ func (s *fakeStmt) QueryContext(ctx context.Context, args []driver.NamedValue) ( idx := t.columnIndex(wcol.Column) if idx == -1 { t.mu.Unlock() - return nil, fmt.Errorf("fakedb: invalid where clause column %q", wcol) + return nil, fmt.Errorf("fakedb: invalid where clause column %v", wcol) } tcol := trow.cols[idx] if bs, ok := tcol.([]byte); ok { diff --git a/src/fmt/scan_test.go b/src/fmt/scan_test.go index a4f80c23c2..ff9e4f30ca 100644 --- a/src/fmt/scan_test.go +++ b/src/fmt/scan_test.go @@ -998,7 +998,7 @@ func TestScanStateCount(t *testing.T) { t.Errorf("bad scan rune: %q %q %q should be '1' '2' '➂'", a.rune, b.rune, c.rune) } if a.size != 1 || b.size != 1 || c.size != 3 { - t.Errorf("bad scan size: %q %q %q should be 1 1 3", a.size, b.size, c.size) + t.Errorf("bad scan size: %d %d %d should be 1 1 3", a.size, b.size, c.size) } } diff --git a/src/internal/pkgbits/pkgbits_test.go b/src/internal/pkgbits/pkgbits_test.go index a4755bd35a..f67267189f 100644 --- a/src/internal/pkgbits/pkgbits_test.go +++ b/src/internal/pkgbits/pkgbits_test.go @@ -28,7 +28,7 @@ func TestRoundTrip(t *testing.T) { r := pr.NewDecoder(pkgbits.SectionMeta, pkgbits.PublicRootIdx, pkgbits.SyncPublic) if r.Version() != w.Version() { - t.Errorf("Expected reader version %q to be the writer version %q", r.Version(), w.Version()) + t.Errorf("Expected reader version %d to be the writer version %d", r.Version(), w.Version()) } } } diff --git a/src/os/os_test.go b/src/os/os_test.go index 536734901b..29f2e6d3b2 100644 --- a/src/os/os_test.go +++ b/src/os/os_test.go @@ -1192,7 +1192,7 @@ func TestRenameCaseDifference(pt *testing.T) { } if dirNamesLen := len(dirNames); dirNamesLen != 1 { - t.Fatalf("unexpected dirNames len, got %q, want %q", dirNamesLen, 1) + t.Fatalf("unexpected dirNames len, got %d, want %d", dirNamesLen, 1) } if dirNames[0] != to { diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index 8509f00a5e..30ec3fad51 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -6807,7 +6807,7 @@ func TestMakeFuncStackCopy(t *testing.T) { ValueOf(&concrete).Elem().Set(fn) x := concrete(nil, 7) if x != 9 { - t.Errorf("have %#q want 9", x) + t.Errorf("have %d want 9", x) } } -- cgit v1.3-5-g9baa From bc159638135e751a291fe6753fc8c8c3d61be863 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 14 Nov 2025 14:57:47 -0800 Subject: cmd/compile: clean up prove pass flowLimit no longer needs to return whether it updated any information, after CL 714920 which got rid of the iterate-until-done paradigm. Change-Id: I0c5f592578ff27c27586c1f8b8a8d9071d94846d Reviewed-on: https://go-review.googlesource.com/c/go/+/720720 Reviewed-by: Youlin Feng Reviewed-by: Mark Freeman Reviewed-by: Jorropo LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/prove.go | 148 ++++++++++++++++------------------ 1 file changed, 71 insertions(+), 77 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index 4919d6ad37..bbcab3efa5 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -466,57 +466,56 @@ func (ft *factsTable) initLimitForNewValue(v *Value) { // signedMin records the fact that we know v is at least // min in the signed domain. -func (ft *factsTable) signedMin(v *Value, min int64) bool { - return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64}) +func (ft *factsTable) signedMin(v *Value, min int64) { + ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64}) } // signedMax records the fact that we know v is at most // max in the signed domain. -func (ft *factsTable) signedMax(v *Value, max int64) bool { - return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64}) +func (ft *factsTable) signedMax(v *Value, max int64) { + ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64}) } -func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool { - return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64}) +func (ft *factsTable) signedMinMax(v *Value, min, max int64) { + ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64}) } // setNonNegative records the fact that v is known to be non-negative. -func (ft *factsTable) setNonNegative(v *Value) bool { - return ft.signedMin(v, 0) +func (ft *factsTable) setNonNegative(v *Value) { + ft.signedMin(v, 0) } // unsignedMin records the fact that we know v is at least // min in the unsigned domain. -func (ft *factsTable) unsignedMin(v *Value, min uint64) bool { - return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64}) +func (ft *factsTable) unsignedMin(v *Value, min uint64) { + ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64}) } // unsignedMax records the fact that we know v is at most // max in the unsigned domain. -func (ft *factsTable) unsignedMax(v *Value, max uint64) bool { - return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max}) +func (ft *factsTable) unsignedMax(v *Value, max uint64) { + ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max}) } -func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool { - return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max}) +func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) { + ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max}) } -func (ft *factsTable) booleanFalse(v *Value) bool { - return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0}) +func (ft *factsTable) booleanFalse(v *Value) { + ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0}) } -func (ft *factsTable) booleanTrue(v *Value) bool { - return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1}) +func (ft *factsTable) booleanTrue(v *Value) { + ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1}) } -func (ft *factsTable) pointerNil(v *Value) bool { - return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0}) +func (ft *factsTable) pointerNil(v *Value) { + ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0}) } -func (ft *factsTable) pointerNonNil(v *Value) bool { +func (ft *factsTable) pointerNonNil(v *Value) { l := noLimit l.umin = 1 - return ft.newLimit(v, l) + ft.newLimit(v, l) } // newLimit adds new limiting information for v. -// Returns true if the new limit added any new information. -func (ft *factsTable) newLimit(v *Value, newLim limit) bool { +func (ft *factsTable) newLimit(v *Value, newLim limit) { oldLim := ft.limits[v.ID] // Merge old and new information. @@ -531,13 +530,12 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool { } if lim == oldLim { - return false // nothing new to record + return // nothing new to record } if lim.unsat() { - r := !ft.unsat ft.unsat = true - return r + return } // Check for recursion. This normally happens because in unsatisfiable @@ -548,7 +546,7 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool { // the posets will not notice. if ft.recurseCheck[v.ID] { // This should only happen for unsatisfiable cases. TODO: check - return false + return } ft.recurseCheck[v.ID] = true defer func() { @@ -713,8 +711,6 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool { } } } - - return true } func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) { @@ -1825,7 +1821,7 @@ func initLimit(v *Value) limit { return lim } -// flowLimit updates the known limits of v in ft. Returns true if anything changed. +// flowLimit updates the known limits of v in ft. // flowLimit can use the ranges of input arguments. // // Note: this calculation only happens at the point the value is defined. We do not reevaluate @@ -1838,10 +1834,10 @@ func initLimit(v *Value) limit { // block. We could recompute the range of v once we enter the block so // we know that it is 0 <= v <= 8, but we don't have a mechanism to do // that right now. -func (ft *factsTable) flowLimit(v *Value) bool { +func (ft *factsTable) flowLimit(v *Value) { if !v.Type.IsInteger() { // TODO: boolean? - return false + return } // Additional limits based on opcode and argument. @@ -1851,36 +1847,36 @@ func (ft *factsTable) flowLimit(v *Value) bool { // extensions case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64: a := ft.limits[v.Args[0].ID] - return ft.unsignedMinMax(v, a.umin, a.umax) + ft.unsignedMinMax(v, a.umin, a.umax) case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64: a := ft.limits[v.Args[0].ID] - return ft.signedMinMax(v, a.min, a.max) + ft.signedMinMax(v, a.min, a.max) case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8: a := ft.limits[v.Args[0].ID] if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 { - return ft.unsignedMinMax(v, a.umin, a.umax) + ft.unsignedMinMax(v, a.umin, a.umax) } // math/bits case OpCtz64: a := ft.limits[v.Args[0].ID] if a.nonzero() { - return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1)) + ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1)) } case OpCtz32: a := ft.limits[v.Args[0].ID] if a.nonzero() { - return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1)) + ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1)) } case OpCtz16: a := ft.limits[v.Args[0].ID] if a.nonzero() { - return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1)) + ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1)) } case OpCtz8: a := ft.limits[v.Args[0].ID] if a.nonzero() { - return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1)) + ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1)) } case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8: @@ -1889,26 +1885,26 @@ func (ft *factsTable) flowLimit(v *Value) bool { sharedLeadingMask := ^(uint64(1)<>b.min, a.min>>b.max) vmax := max(a.max>>b.min, a.max>>b.max) - return ft.signedMinMax(v, vmin, vmax) + ft.signedMinMax(v, vmin, vmax) } case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8, OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8, @@ -1988,7 +1983,7 @@ func (ft *factsTable) flowLimit(v *Value) bool { a := ft.limits[v.Args[0].ID] b := ft.limits[v.Args[1].ID] if b.min >= 0 { - return ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min) + ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min) } case OpDiv64, OpDiv32, OpDiv16, OpDiv8: a := ft.limits[v.Args[0].ID] @@ -2008,11 +2003,11 @@ func (ft *factsTable) flowLimit(v *Value) bool { if b.umin > 0 { lim = lim.unsignedMax(a.umax / b.umin) } - return ft.newLimit(v, lim) + ft.newLimit(v, lim) case OpMod64, OpMod32, OpMod16, OpMod8: - return ft.modLimit(true, v, v.Args[0], v.Args[1]) + ft.modLimit(true, v, v.Args[0], v.Args[1]) case OpMod64u, OpMod32u, OpMod16u, OpMod8u: - return ft.modLimit(false, v, v.Args[0], v.Args[1]) + ft.modLimit(false, v, v.Args[0], v.Args[1]) case OpPhi: // Compute the union of all the input phis. @@ -2032,9 +2027,8 @@ func (ft *factsTable) flowLimit(v *Value) bool { l.umin = min(l.umin, l2.umin) l.umax = max(l.umax, l2.umax) } - return ft.newLimit(v, l) + ft.newLimit(v, l) } - return false } // detectSliceLenRelation matches the pattern where @@ -2047,13 +2041,13 @@ func (ft *factsTable) flowLimit(v *Value) bool { // // Note that "index" is not useed for indexing in this pattern, but // in the motivating example (chunked slice iteration) it is. -func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) { +func (ft *factsTable) detectSliceLenRelation(v *Value) { if v.Op != OpSub64 { - return false + return } if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpSliceCap) { - return false + return } slice := v.Args[0].Args[0] @@ -2093,13 +2087,12 @@ func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) { if K < 0 { // We hate thinking about overflow continue } - inferred = inferred || ft.signedMin(v, K) + ft.signedMin(v, K) } - return inferred } // x%d has been rewritten to x - (x/d)*d. -func (ft *factsTable) detectMod(v *Value) bool { +func (ft *factsTable) detectMod(v *Value) { var opDiv, opDivU, opMul, opConst Op switch v.Op { case OpSub64: @@ -2126,36 +2119,37 @@ func (ft *factsTable) detectMod(v *Value) bool { mul := v.Args[1] if mul.Op != opMul { - return false + return } div, con := mul.Args[0], mul.Args[1] if div.Op == opConst { div, con = con, div } if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt { - return false + return } - return ft.modLimit(div.Op == opDiv, v, v.Args[0], con) + ft.modLimit(div.Op == opDiv, v, v.Args[0], con) } // modLimit sets v with facts derived from v = p % q. -func (ft *factsTable) modLimit(signed bool, v, p, q *Value) bool { +func (ft *factsTable) modLimit(signed bool, v, p, q *Value) { a := ft.limits[p.ID] b := ft.limits[q.ID] if signed { if a.min < 0 && b.min > 0 { - return ft.signedMinMax(v, -(b.max - 1), b.max-1) + ft.signedMinMax(v, -(b.max - 1), b.max-1) + return } if !(a.nonnegative() && b.nonnegative()) { // TODO: we could handle signed limits but I didn't bother. - return false + return } if a.min >= 0 && b.min > 0 { ft.setNonNegative(v) } } // Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit. - return ft.unsignedMax(v, min(a.umax, b.umax-1)) + ft.unsignedMax(v, min(a.umax, b.umax-1)) } // getBranch returns the range restrictions added by p -- cgit v1.3-5-g9baa From c12c33709923907348837e8131122ec4c45d2c83 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 14 Nov 2025 15:26:36 -0800 Subject: cmd/compile: teach prove about subtract idioms For v = x-y: if y >= 0 then v <= x if y <= x then v >= 0 (With appropriate guards against overflow/underflow.) Fixes #76304 Change-Id: I8f8f1254156c347fa97802bd057a8379676720ae Reviewed-on: https://go-review.googlesource.com/c/go/+/720740 Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Jorropo Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/prove.go | 43 +++++++++++++++++++++++++++++++++++ test/prove.go | 18 ++++++++++++--- 2 files changed, 58 insertions(+), 3 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index bbcab3efa5..4b2cedc8be 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -1945,6 +1945,7 @@ func (ft *factsTable) flowLimit(v *Value) { ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8)) ft.detectMod(v) ft.detectSliceLenRelation(v) + ft.detectSubRelations(v) case OpNeg64, OpNeg32, OpNeg16, OpNeg8: a := ft.limits[v.Args[0].ID] bitsize := uint(v.Type.Size()) * 8 @@ -2091,6 +2092,48 @@ func (ft *factsTable) detectSliceLenRelation(v *Value) { } } +// v must be Sub{64,32,16,8}. +func (ft *factsTable) detectSubRelations(v *Value) { + // v = x-y + x := v.Args[0] + y := v.Args[1] + if x == y { + ft.signedMinMax(v, 0, 0) + return + } + xLim := ft.limits[x.ID] + yLim := ft.limits[y.ID] + + // Check if we might wrap around. If so, give up. + width := uint(v.Type.Size()) * 8 + if _, ok := safeSub(xLim.min, yLim.max, width); !ok { + return // x-y might underflow + } + if _, ok := safeSub(xLim.max, yLim.min, width); !ok { + return // x-y might overflow + } + + // Subtracting a positive number only makes + // things smaller. + if yLim.min >= 0 { + ft.update(v.Block, v, x, signed, lt|eq) + // TODO: is this worth it? + //if yLim.min > 0 { + // ft.update(v.Block, v, x, signed, lt) + //} + } + + // Subtracting a number from a bigger one + // can't go below 0. + if ft.orderS.OrderedOrEqual(y, x) { + ft.setNonNegative(v) + // TODO: is this worth it? + //if ft.orderS.Ordered(y, x) { + // ft.signedMin(v, 1) + //} + } +} + // x%d has been rewritten to x - (x/d)*d. func (ft *factsTable) detectMod(v *Value) { var opDiv, opDivU, opMul, opConst Op diff --git a/test/prove.go b/test/prove.go index 365e8ba006..3f8990615e 100644 --- a/test/prove.go +++ b/test/prove.go @@ -679,12 +679,12 @@ func natcmp(x, y []uint) (r int) { } func suffix(s, suffix string) bool { - // todo, we're still not able to drop the bound check here in the general case - return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix + // Note: issue 76304 + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix // ERROR "Proved IsSliceInBounds" } func constsuffix(s string) bool { - return suffix(s, "abc") // ERROR "Proved IsSliceInBounds$" + return suffix(s, "abc") // ERROR "Proved IsSliceInBounds$" "Proved slicemask not needed$" "Proved Eq64$" } func atexit(foobar []func()) { @@ -2639,6 +2639,18 @@ func unsignedRightShiftBounds(v uint, s int) { } } +func subLengths1(b []byte, i int) { + if i >= 0 && i <= len(b) { + _ = b[len(b)-i:] // ERROR "Proved IsSliceInBounds" + } +} + +func subLengths2(b []byte, i int) { + if i >= 0 && i <= len(b) { + _ = b[:len(b)-i] // ERROR "Proved IsSliceInBounds" + } +} + //go:noinline func prove(x int) { } -- cgit v1.3-5-g9baa From e1a12c781f55da85a30fd63471f8adcba908acd2 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 17 Nov 2025 12:47:04 -0800 Subject: cmd/compile: use 32x32->64 multiplies on arm64 Gets rid of some sign extensions. Change-Id: Ie67ef36b4ca1cd1a2cd9fa5d84578db553578a22 Reviewed-on: https://go-review.googlesource.com/c/go/+/721241 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 4 +++ src/cmd/compile/internal/ssa/rewriteARM64.go | 48 +++++++++++++++++++++++++++ test/codegen/arithmetic.go | 9 +++++ 3 files changed, 61 insertions(+) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index f54a692725..04f43f3137 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -1814,3 +1814,7 @@ (Select0 (Mul64uover x y)) => (MUL x y) (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH x y) [0])) + +// 32 mul 32 -> 64 +(MUL r:(MOVWUreg x) s:(MOVWUreg y)) && r.Uses == 1 && s.Uses == 1 => (UMULL x y) +(MUL r:(MOVWreg x) s:(MOVWreg y)) && r.Uses == 1 && s.Uses == 1 => (MULL x y) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 6af1558833..6137ec13a0 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -12556,6 +12556,54 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool { } break } + // match: (MUL r:(MOVWUreg x) s:(MOVWUreg y)) + // cond: r.Uses == 1 && s.Uses == 1 + // result: (UMULL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r := v_0 + if r.Op != OpARM64MOVWUreg { + continue + } + x := r.Args[0] + s := v_1 + if s.Op != OpARM64MOVWUreg { + continue + } + y := s.Args[0] + if !(r.Uses == 1 && s.Uses == 1) { + continue + } + v.reset(OpARM64UMULL) + v.AddArg2(x, y) + return true + } + break + } + // match: (MUL r:(MOVWreg x) s:(MOVWreg y)) + // cond: r.Uses == 1 && s.Uses == 1 + // result: (MULL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r := v_0 + if r.Op != OpARM64MOVWreg { + continue + } + x := r.Args[0] + s := v_1 + if s.Op != OpARM64MOVWreg { + continue + } + y := s.Args[0] + if !(r.Uses == 1 && s.Uses == 1) { + continue + } + v.reset(OpARM64MULL) + v.AddArg2(x, y) + return true + } + break + } return false } func rewriteValueARM64_OpARM64MULW(v *Value) bool { diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 6b2c5529e1..9443d812dc 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -333,6 +333,15 @@ func Fold2NegMul(a, b int) int { return -a * -b } +func Mul32(a, b int32) int64 { + // arm64:"SMULL" -"MOVW" + return int64(a) * int64(b) +} +func Mul32U(a, b uint32) uint64 { + // arm64:"UMULL" -"MOVWU" + return uint64(a) * uint64(b) +} + // -------------- // // Division // // -------------- // -- cgit v1.3-5-g9baa From 9859b436430aac382b337964a1b380bc4bfcda70 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Fri, 26 Sep 2025 05:05:49 +1000 Subject: cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 Make use of compressed instructions on riscv64 - add a compress pass to the end of the assembler, which replaces non-compressed instructions with compressed alternatives if possible. Provide a `compressinstructions` compiler and assembler debug flag, such that the compression pass can be disabled via `-asmflags=all=-d=compressinstructions=0` and `-gcflags=all=-d=compressinstructions=0`. Note that this does not prevent the explicit use of compressed instructions via assembly. Note that this does not make use of compressed control transfer instructions - this will be implemented in later changes. Reduces the text size of a hello world binary by ~121KB and reduces the text size of the go binary on riscv64 by ~1.21MB (between 8-10% in both cases). Updates #71105 Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 Change-Id: I24258353688554042c2a836deed4830cc673e985 Reviewed-on: https://go-review.googlesource.com/c/go/+/523478 Reviewed-by: Mark Ryan Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/asm/internal/flags/flags.go | 7 +- src/cmd/asm/main.go | 1 + src/cmd/compile/internal/base/debug.go | 1 + src/cmd/compile/internal/base/flag.go | 2 + src/cmd/internal/obj/link.go | 61 +++++------ src/cmd/internal/obj/riscv/asm_test.go | 16 +-- src/cmd/internal/obj/riscv/cpu.go | 3 + src/cmd/internal/obj/riscv/obj.go | 178 +++++++++++++++++++++++++++++++-- src/cmd/link/internal/ld/ld_test.go | 4 +- 9 files changed, 225 insertions(+), 48 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index e15a062749..19aa65630f 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -29,8 +29,9 @@ var ( ) var DebugFlags struct { - MayMoreStack string `help:"call named function before all stack growth checks"` - PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"` + CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"` + MayMoreStack string `help:"call named function before all stack growth checks"` + PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"` } var ( @@ -47,6 +48,8 @@ func init() { flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help") objabi.AddVersionFlag() // -V objabi.Flagcount("S", "print assembly and machine code", &PrintOut) + + DebugFlags.CompressInstructions = 1 } // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2. diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index f2697db516..25cf307140 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -40,6 +40,7 @@ func main() { log.Fatalf("unrecognized architecture %s", GOARCH) } ctxt := obj.Linknew(architecture.LinkArch) + ctxt.CompressInstructions = flags.DebugFlags.CompressInstructions != 0 ctxt.Debugasm = flags.PrintOut ctxt.Debugvlog = flags.DebugV ctxt.Flag_dynlink = *flags.Dynlink diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index 9e8ab2f488..b532bf435e 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -20,6 +20,7 @@ type DebugFlags struct { Append int `help:"print information about append compilation"` Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"` Closure int `help:"print information about closure compilation"` + CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"` Converthash string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"` Defer int `help:"print information about defer compilation"` DisableNil int `help:"disable nil checks" concurrent:"ok"` diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go index 1d211e0a2d..63cae41524 100644 --- a/src/cmd/compile/internal/base/flag.go +++ b/src/cmd/compile/internal/base/flag.go @@ -177,6 +177,7 @@ func ParseFlags() { Flag.WB = true Debug.ConcurrentOk = true + Debug.CompressInstructions = 1 Debug.MaxShapeLen = 500 Debug.AlignHot = 1 Debug.InlFuncsWithClosures = 1 @@ -299,6 +300,7 @@ func ParseFlags() { } parseSpectre(Flag.Spectre) // left as string for RecordFlags + Ctxt.CompressInstructions = Debug.CompressInstructions != 0 Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared Ctxt.Flag_optimize = Flag.N == 0 Ctxt.Debugasm = int(Flag.S) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 85dca33d27..c70c1d9438 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -1153,36 +1153,37 @@ type Func interface { // Link holds the context for writing object code from a compiler // to be linker input or for reading that input into the linker. type Link struct { - Headtype objabi.HeadType - Arch *LinkArch - Debugasm int - Debugvlog bool - Debugpcln string - Flag_shared bool - Flag_dynlink bool - Flag_linkshared bool - Flag_optimize bool - Flag_locationlists bool - Flag_noRefName bool // do not include referenced symbol names in object file - Retpoline bool // emit use of retpoline stubs for indirect jmp/call - Flag_maymorestack string // If not "", call this function before stack checks - Bso *bufio.Writer - Pathname string - Pkgpath string // the current package's import path - hashmu sync.Mutex // protects hash, funchash - hash map[string]*LSym // name -> sym mapping - funchash map[string]*LSym // name -> sym mapping for ABIInternal syms - statichash map[string]*LSym // name -> sym mapping for static syms - PosTable src.PosTable - InlTree InlTree // global inlining tree used by gc/inl.go - DwFixups *DwarfFixupTable - DwTextCount int - Imports []goobj.ImportedPkg - DiagFunc func(string, ...any) - DiagFlush func() - DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) - GenAbstractFunc func(fn *LSym) - Errors int + Headtype objabi.HeadType + Arch *LinkArch + CompressInstructions bool // use compressed instructions where possible (if supported by architecture) + Debugasm int + Debugvlog bool + Debugpcln string + Flag_shared bool + Flag_dynlink bool + Flag_linkshared bool + Flag_optimize bool + Flag_locationlists bool + Flag_noRefName bool // do not include referenced symbol names in object file + Retpoline bool // emit use of retpoline stubs for indirect jmp/call + Flag_maymorestack string // If not "", call this function before stack checks + Bso *bufio.Writer + Pathname string + Pkgpath string // the current package's import path + hashmu sync.Mutex // protects hash, funchash + hash map[string]*LSym // name -> sym mapping + funchash map[string]*LSym // name -> sym mapping for ABIInternal syms + statichash map[string]*LSym // name -> sym mapping for static syms + PosTable src.PosTable + InlTree InlTree // global inlining tree used by gc/inl.go + DwFixups *DwarfFixupTable + DwTextCount int + Imports []goobj.ImportedPkg + DiagFunc func(string, ...any) + DiagFlush func() + DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) + GenAbstractFunc func(fn *LSym) + Errors int InParallel bool // parallel backend phase in effect UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go index f40e57fa64..5b50d1533a 100644 --- a/src/cmd/internal/obj/riscv/asm_test.go +++ b/src/cmd/internal/obj/riscv/asm_test.go @@ -11,8 +11,8 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" - "strings" "testing" ) @@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") fmt.Fprintln(buf, "BEQ X0, X0, label") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "label:") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } // TestLargeCall generates a large function (>1MB of text) with a call to @@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0") fmt.Fprintln(buf, "CALL ·y(SB)") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "RET") fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") fmt.Fprintln(buf, "RET") } @@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0 // FENCE // NOP // FENCE - // RET - want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00" - if !strings.Contains(string(out), want) { + // RET (CJALR or JALR) + want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ") + if !want.Match(out) { t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want) } } diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index 60174a0b3a..a91395dd38 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -326,6 +326,9 @@ const ( NEED_GOT_PCREL_ITYPE_RELOC ) +const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC | + NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC + // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files // at https://github.com/riscv/riscv-opcodes. // diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 8b9be5d78b..043be17c07 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool { // setPCs sets the Pc field in all instructions reachable from p. // It uses pc as the initial value and returns the next available pc. -func setPCs(p *obj.Prog, pc int64) int64 { +func setPCs(p *obj.Prog, pc int64, compress bool) int64 { for ; p != nil; p = p.Link { p.Pc = pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, compress) { pc += int64(ins.length()) } @@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { big, rescan := false, false - maxPC := setPCs(cursym.Func().Text, 0) + maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions) if maxPC+maxTrampSize > (1 << 20) { big = true } @@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Validate all instructions - this provides nice error messages. for p := cursym.Func().Text; p != nil; p = p.Link { - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { ins.validate(ctxt) } } @@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { } } +func isScaledImmI(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, true) == nil && imm%scale == 0 +} + +func isScaledImmU(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, false) == nil && imm%scale == 0 +} + func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) { if err := immFits(imm, nbits, signed); err != nil { ctxt.Diag("%v: %v", ins, err) @@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31) } +func isIntPrimeReg(r uint32) bool { + return r >= REG_X8 && r <= REG_X15 +} + // wantIntPrimeReg checks that r is an integer register that can be used // in a prime register field of a compressed instruction. func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31) } +func isFloatPrimeReg(r uint32) bool { + return r >= REG_F8 && r <= REG_F15 +} + // wantFloatPrimeReg checks that r is an floating-point register that can // be used in a prime register field of a compressed instruction. func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool { return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP } +func (ins *instruction) compress() { + switch ins.as { + case ALW: + if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as = ACLW + } + + case ALD: + if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACLD + } + + case AFLD: + if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1 + } else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACFLD + } + + case ASW: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1 + } + + case ASD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AFSD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AADDI: + if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) { + ins.as = ACADDI16SP + } else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDI + } else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) { + ins.as = ACADDI4SPN + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil { + ins.as, ins.rs1 = ACLI, obj.REG_NONE + } else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 { + ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1 + } else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 { + ins.as, ins.rs1 = ACNOP, ins.rd + } + + case AADDIW: + if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDIW + } + + case ALUI: + if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACLUI + } + + case ASLLI: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSLLI + } + + case ASRLI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRLI + } + + case ASRAI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRAI + } + + case AANDI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACANDI + } + + case AADD: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 { + ins.as = ACADD + } else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 { + ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1 + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 { + ins.as = ACMV + } + + case AADDW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACADDW + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1 + } + + case ASUB: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUB + } + + case ASUBW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUBW + } + + case AAND: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACAND + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1 + } + + case AOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1 + } + + case AXOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACXOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1 + } + + case AEBREAK: + ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE + } +} + // instructionForProg returns the default *obj.Prog to instruction mapping. func instructionForProg(p *obj.Prog) *instruction { ins := &instruction{ @@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction { } // instructionsForProg returns the machine instructions for an *obj.Prog. -func instructionsForProg(p *obj.Prog) []*instruction { +func instructionsForProg(p *obj.Prog, compress bool) []*instruction { ins := instructionForProg(p) inss := []*instruction{ins} @@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction { ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0 } + // Only compress instructions when there is no relocation, since + // relocation relies on knowledge about the exact instructions that + // are in use. + if compress && p.Mark&NEED_RELOC == 0 { + for _, ins := range inss { + ins.compress() + } + } + for _, ins := range inss { ins.p = p } @@ -4814,7 +4980,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } offset := p.Pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { if ic, err := ins.encode(); err == nil { cursym.WriteInt(ctxt, offset, ins.length(), int64(ic)) offset += int64(ins.length()) diff --git a/src/cmd/link/internal/ld/ld_test.go b/src/cmd/link/internal/ld/ld_test.go index 9a27ac8c76..64b86f3a0b 100644 --- a/src/cmd/link/internal/ld/ld_test.go +++ b/src/cmd/link/internal/ld/ld_test.go @@ -387,7 +387,7 @@ func TestRISCVTrampolines(t *testing.T) { buf := new(bytes.Buffer) fmt.Fprintf(buf, "TEXT a(SB),$0-0\n") for i := 0; i < 1<<17; i++ { - fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + fmt.Fprintf(buf, "\tADD $0, X5, X0\n") } fmt.Fprintf(buf, "\tCALL b(SB)\n") fmt.Fprintf(buf, "\tRET\n") @@ -398,7 +398,7 @@ func TestRISCVTrampolines(t *testing.T) { fmt.Fprintf(buf, "\tRET\n") fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n") for i := 0; i < 1<<17; i++ { - fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + fmt.Fprintf(buf, "\tADD $0, X5, X0\n") } fmt.Fprintf(buf, "\tCALL a(SB)\n") fmt.Fprintf(buf, "\tCALL c(SB)\n") -- cgit v1.3-5-g9baa From ba634ca5c7f19105c853db5752cc0f6d3ca76e45 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 17 Nov 2025 22:40:26 -0800 Subject: cmd/compile: fold boolean NOT into branches Gets rid of an EOR $1 instruction. Change-Id: Ib032b0cee9ac484329c978af9b1305446f8d5dac Reviewed-on: https://go-review.googlesource.com/c/go/+/721501 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 2 ++ src/cmd/compile/internal/ssa/rewriteARM64.go | 31 +++++++++++++++++++++++++++ test/codegen/bool.go | 15 +++++++++++++ 3 files changed, 48 insertions(+) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 04f43f3137..53bb35d289 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -573,6 +573,8 @@ (TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no) +(TB(Z|NZ) [0] (XORconst [1] x) yes no) => (TB(NZ|Z) [0] x yes no) + ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST x y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst [c] y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW x y) yes no) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 6137ec13a0..b3f790dbda 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -25321,6 +25321,37 @@ func rewriteBlockARM64(b *Block) bool { b.resetWithControl(BlockARM64FGE, cc) return true } + // match: (TBNZ [0] (XORconst [1] x) yes no) + // result: (TBZ [0] x yes no) + for b.Controls[0].Op == OpARM64XORconst { + v_0 := b.Controls[0] + if auxIntToInt64(v_0.AuxInt) != 1 { + break + } + x := v_0.Args[0] + if auxIntToInt64(b.AuxInt) != 0 { + break + } + b.resetWithControl(BlockARM64TBZ, x) + b.AuxInt = int64ToAuxInt(0) + return true + } + case BlockARM64TBZ: + // match: (TBZ [0] (XORconst [1] x) yes no) + // result: (TBNZ [0] x yes no) + for b.Controls[0].Op == OpARM64XORconst { + v_0 := b.Controls[0] + if auxIntToInt64(v_0.AuxInt) != 1 { + break + } + x := v_0.Args[0] + if auxIntToInt64(b.AuxInt) != 0 { + break + } + b.resetWithControl(BlockARM64TBNZ, x) + b.AuxInt = int64ToAuxInt(0) + return true + } case BlockARM64UGE: // match: (UGE (FlagConstant [fc]) yes no) // cond: fc.uge() diff --git a/test/codegen/bool.go b/test/codegen/bool.go index 37f85a45b7..8fe7a94687 100644 --- a/test/codegen/bool.go +++ b/test/codegen/bool.go @@ -313,3 +313,18 @@ func constantWrite(b bool, p *bool) { *p = b } } + +func boolCompare1(p, q *bool) int { + // arm64:-"EOR [$]1" + if *p == *q { + return 5 + } + return 7 +} +func boolCompare2(p, q *bool) int { + // arm64:-"EOR [$]1" + if *p != *q { + return 5 + } + return 7 +} -- cgit v1.3-5-g9baa From 4d0658bb0871806a8c5551063d1ef1d205916ceb Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 17 Nov 2025 15:33:01 -0800 Subject: cmd/compile: prefer fixed registers for values For this code: func f() (int, int) { return 0, 0 } We currently generate on arm64: MOVD ZR, R0 MOVD R0, R1 This CL changes that to MOVD ZR, R0 MOVD ZR, R1 Probably not a big performance difference, but it makes the generated code clearer. A followup-ish CL from 633075 when the zero fixed register was exposed to the register allocator. Change-Id: I869a92817dcbbca46c900999fab538e76e10ed05 Reviewed-on: https://go-review.googlesource.com/c/go/+/721440 Reviewed-by: Keith Randall Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/regalloc.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index b5174acbc9..9ed8a0e86c 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -596,17 +596,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos var c *Value if vi.regs != 0 { // Copy from a register that v is already in. - r2 := pickReg(vi.regs) var current *Value - if !s.allocatable.contains(r2) { - current = v // v is in a fixed register + if vi.regs&^s.allocatable != 0 { + // v is in a fixed register, prefer that + current = v } else { + r2 := pickReg(vi.regs) if s.regs[r2].v != v { panic("bad register state") } current = s.regs[r2].c + s.usedSinceBlockStart |= regMask(1) << r2 } - s.usedSinceBlockStart |= regMask(1) << r2 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current) } else if v.rematerializeable() { // Rematerialize instead of loading from the spill location. -- cgit v1.3-5-g9baa From e64023dcbf40af59a637a982cba58ee8272d61c4 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Tue, 18 Nov 2025 01:18:30 +0100 Subject: cmd/compile: cleanup useless if statement in prove Change-Id: Icf5db366b311b5f88809dd07f22cf4bfdead516c Reviewed-on: https://go-review.googlesource.com/c/go/+/721203 Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Auto-Submit: Jorropo Reviewed-by: Keith Randall Reviewed-by: Mark Freeman --- src/cmd/compile/internal/ssa/prove.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index 4b2cedc8be..789d7721d4 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -3030,16 +3030,14 @@ func (ft *factsTable) topoSortValuesInBlock(b *Block) { want := f.NumValues() scores := ft.reusedTopoSortScoresTable - if len(scores) < want { - if want <= cap(scores) { - scores = scores[:want] - } else { - if cap(scores) > 0 { - f.Cache.freeUintSlice(scores) - } - scores = f.Cache.allocUintSlice(want) - ft.reusedTopoSortScoresTable = scores + if want <= cap(scores) { + scores = scores[:want] + } else { + if cap(scores) > 0 { + f.Cache.freeUintSlice(scores) } + scores = f.Cache.allocUintSlice(want) + ft.reusedTopoSortScoresTable = scores } for _, v := range b.Values { -- cgit v1.3-5-g9baa From dc42565a202694731945421c1fd58815f12423b5 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Tue, 18 Nov 2025 01:26:01 +0100 Subject: cmd/compile: fix control flow for unsigned divisions proof relations The continue used to make sense since I first wrote this patch with a loop, for testing the commutativity of the add. This was refactored to just try both but I forgot to fix the continue. Change-Id: I91466a052d5d8ee7193084a71faf69bd27e36d2a Reviewed-on: https://go-review.googlesource.com/c/go/+/721204 Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Auto-Submit: Jorropo --- src/cmd/compile/internal/ssa/prove.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index 789d7721d4..d4e7ed14b1 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -2503,15 +2503,13 @@ func addLocalFacts(ft *factsTable, b *Block) { xl := ft.limits[x.ID] y := add.Args[1] yl := ft.limits[y.ID] - if unsignedAddOverflows(xl.umax, yl.umax, add.Type) { - continue - } - - if xl.umax < uminDivisor { - ft.update(b, v, y, unsigned, lt|eq) - } - if yl.umax < uminDivisor { - ft.update(b, v, x, unsigned, lt|eq) + if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) { + if xl.umax < uminDivisor { + ft.update(b, v, y, unsigned, lt|eq) + } + if yl.umax < uminDivisor { + ft.update(b, v, x, unsigned, lt|eq) + } } } ft.update(b, v, v.Args[0], unsigned, lt|eq) -- cgit v1.3-5-g9baa From 4fef9f8b5596d42a2997fd8b74185d53fb7d0e43 Mon Sep 17 00:00:00 2001 From: Mark Freeman Date: Wed, 19 Nov 2025 15:10:24 -0500 Subject: go/types, types2: fix object path for grouped declaration statements CL 715840 deferred popping from the object path during handling of grouped declaration statements, which leaves extra objects on the path since this executes in a loop. Surprisingly, no test exercised this. This change fixes this small bug and adds a supporting test. Fixes #76366 Change-Id: I7fc038b39d3871eea3e60855c46614b463bcfa4f Reviewed-on: https://go-review.googlesource.com/c/go/+/722060 Reviewed-by: Robert Griesemer Auto-Submit: Mark Freeman LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/types2/decl.go | 2 +- src/go/types/decl.go | 2 +- src/internal/types/testdata/fixedbugs/issue76366.go | 12 ++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 src/internal/types/testdata/fixedbugs/issue76366.go (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index 2df34f3b94..60371651ab 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -876,8 +876,8 @@ func (check *Checker) declStmt(list []syntax.Decl) { scopePos := s.Name.Pos() check.declare(check.scope, s.Name, obj, scopePos) check.push(obj) // mark as grey - defer check.pop() check.typeDecl(obj, s, nil) + check.pop() default: check.errorf(s, InvalidSyntaxTree, "unknown syntax.Decl node %T", s) diff --git a/src/go/types/decl.go b/src/go/types/decl.go index 05cc63e01c..4b374fb66d 100644 --- a/src/go/types/decl.go +++ b/src/go/types/decl.go @@ -935,8 +935,8 @@ func (check *Checker) declStmt(d ast.Decl) { scopePos := d.spec.Name.Pos() check.declare(check.scope, d.spec.Name, obj, scopePos) check.push(obj) // mark as grey - defer check.pop() check.typeDecl(obj, d.spec, nil) + check.pop() default: check.errorf(d.node(), InvalidSyntaxTree, "unknown ast.Decl node %T", d.node()) } diff --git a/src/internal/types/testdata/fixedbugs/issue76366.go b/src/internal/types/testdata/fixedbugs/issue76366.go new file mode 100644 index 0000000000..b78aa4463f --- /dev/null +++ b/src/internal/types/testdata/fixedbugs/issue76366.go @@ -0,0 +1,12 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func _() { + type ( + A = int + B = []A + ) +} -- cgit v1.3-5-g9baa From c4bb9653ba28cba4bcd3a3cbb64285c495a03ba2 Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Mon, 17 Nov 2025 11:33:04 +0800 Subject: cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3A6000 @ 2500.00MHz | old.txt | new.txt | | sec/op | sec/op vs base | ClearFat256 6.406n ± 0% 3.329n ± 1% -48.03% (p=0.000 n=10) ClearFat512 12.810n ± 0% 7.607n ± 0% -40.62% (p=0.000 n=10) ClearFat1024 25.62n ± 0% 14.01n ± 0% -45.32% (p=0.000 n=10) ClearFat1032 26.02n ± 0% 14.28n ± 0% -45.14% (p=0.000 n=10) ClearFat1040 26.02n ± 0% 14.41n ± 0% -44.62% (p=0.000 n=10) MemclrKnownSize192 4.804n ± 0% 2.827n ± 0% -41.15% (p=0.000 n=10) MemclrKnownSize248 6.561n ± 0% 4.371n ± 0% -33.38% (p=0.000 n=10) MemclrKnownSize256 6.406n ± 0% 3.335n ± 0% -47.94% (p=0.000 n=10) geomean 11.41n 6.453n -43.45% goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3C5000 @ 2200.00MHz | old.txt | new.txt | | sec/op | sec/op vs base | ClearFat256 14.570n ± 0% 7.284n ± 0% -50.01% (p=0.000 n=10) ClearFat512 29.13n ± 0% 14.57n ± 0% -49.98% (p=0.000 n=10) ClearFat1024 58.26n ± 0% 29.15n ± 0% -49.97% (p=0.000 n=10) ClearFat1032 58.73n ± 0% 29.15n ± 0% -50.36% (p=0.000 n=10) ClearFat1040 59.18n ± 0% 29.26n ± 0% -50.56% (p=0.000 n=10) MemclrKnownSize192 10.930n ± 0% 5.466n ± 0% -49.99% (p=0.000 n=10) MemclrKnownSize248 14.110n ± 0% 6.772n ± 0% -52.01% (p=0.000 n=10) MemclrKnownSize256 14.570n ± 0% 7.285n ± 0% -50.00% (p=0.000 n=10) geomean 25.75n 12.78n -50.36% Change-Id: I88d7b6ae2f6fc3f095979f24fb83ff42a9d2d42e Reviewed-on: https://go-review.googlesource.com/c/go/+/720940 Reviewed-by: Meidan Li Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: sophie zhao Reviewed-by: Keith Randall Reviewed-by: Keith Randall --- src/cmd/compile/internal/loong64/ssa.go | 153 +++++++++++++++++------- src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 1 + 3 files changed, 115 insertions(+), 40 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index 84bbf9b394..71953109c4 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -575,6 +575,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { case ssa.OpLOONG64LoweredZeroLoop: ptrReg := v.Args[0].Reg() countReg := v.RegTmp() + flagReg := int16(loong64.REGTMP) var off int64 n := v.AuxInt loopSize := int64(64) @@ -587,58 +588,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { // vs // 16 instuctions in the straightline code // Might as well use straightline code. - v.Fatalf("ZeroLoop size tool small %d", n) + v.Fatalf("ZeroLoop size too small %d", n) } - // Put iteration count in a register. - // MOVV $n/loopSize, countReg - p := s.Prog(loong64.AMOVV) - p.From.Type = obj.TYPE_CONST - p.From.Offset = n / loopSize - p.To.Type = obj.TYPE_REG - p.To.Reg = countReg - cntInit := p + // MOVV $n/loopSize, countReg + // MOVBU ir.Syms.Loong64HasLSX, flagReg + // BNE flagReg, lsxInit + // genericInit: + // for off = 0; off < loopSize; off += 8 { + // zero8(s, ptrReg, off) + // } + // ADDV $loopSize, ptrReg + // SUBV $1, countReg + // BNE countReg, genericInit + // JMP tail + // lsxInit: + // VXORV V31, V31, V31, v31 = 0 + // for off = 0; off < loopSize; off += 16 { + // zero16(s, V31, ptrReg, off) + // } + // ADDV $loopSize, ptrReg + // SUBV $1, countReg + // BNE countReg, lsxInit + // tail: + // n %= loopSize + // for off = 0; n >= 8; off += 8, n -= 8 { + // zero8(s, ptrReg, off) + // } + // + // if n != 0 { + // zero8(s, ptrReg, off+n-8) + // } - // Zero loopSize bytes starting at ptrReg. - for range loopSize / 8 { - // MOVV ZR, off(ptrReg) + p1 := s.Prog(loong64.AMOVV) + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = n / loopSize + p1.To.Type = obj.TYPE_REG + p1.To.Reg = countReg + + p2 := s.Prog(loong64.AMOVBU) + p2.From.Type = obj.TYPE_MEM + p2.From.Name = obj.NAME_EXTERN + p2.From.Sym = ir.Syms.Loong64HasLSX + p2.To.Type = obj.TYPE_REG + p2.To.Reg = flagReg + + p3 := s.Prog(loong64.ABNE) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = flagReg + p3.To.Type = obj.TYPE_BRANCH + + for off = 0; off < loopSize; off += 8 { zero8(s, ptrReg, off) - off += 8 } - // Increment ptrReg by loopSize. - // ADDV $loopSize, ptrReg - p = s.Prog(loong64.AADDV) - p.From.Type = obj.TYPE_CONST - p.From.Offset = loopSize - p.To.Type = obj.TYPE_REG - p.To.Reg = ptrReg + p4 := s.Prog(loong64.AADDV) + p4.From.Type = obj.TYPE_CONST + p4.From.Offset = loopSize + p4.To.Type = obj.TYPE_REG + p4.To.Reg = ptrReg - // Decrement loop count. - // SUBV $1, countReg - p = s.Prog(loong64.ASUBV) - p.From.Type = obj.TYPE_CONST - p.From.Offset = 1 - p.To.Type = obj.TYPE_REG - p.To.Reg = countReg + p5 := s.Prog(loong64.ASUBV) + p5.From.Type = obj.TYPE_CONST + p5.From.Offset = 1 + p5.To.Type = obj.TYPE_REG + p5.To.Reg = countReg - // Jump to loop header if we're not done yet. - // BNE countReg, loop header - p = s.Prog(loong64.ABNE) - p.From.Type = obj.TYPE_REG - p.From.Reg = countReg - p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(cntInit.Link) + p6 := s.Prog(loong64.ABNE) + p6.From.Type = obj.TYPE_REG + p6.From.Reg = countReg + p6.To.Type = obj.TYPE_BRANCH + p6.To.SetTarget(p3.Link) + + p7 := s.Prog(obj.AJMP) + p7.To.Type = obj.TYPE_BRANCH + + p8 := s.Prog(loong64.AVXORV) + p8.From.Type = obj.TYPE_REG + p8.From.Reg = loong64.REG_V31 + p8.To.Type = obj.TYPE_REG + p8.To.Reg = loong64.REG_V31 + p3.To.SetTarget(p8) + + for off = 0; off < loopSize; off += 16 { + zero16(s, loong64.REG_V31, ptrReg, off) + } + + p9 := s.Prog(loong64.AADDV) + p9.From.Type = obj.TYPE_CONST + p9.From.Offset = loopSize + p9.To.Type = obj.TYPE_REG + p9.To.Reg = ptrReg + + p10 := s.Prog(loong64.ASUBV) + p10.From.Type = obj.TYPE_CONST + p10.From.Offset = 1 + p10.To.Type = obj.TYPE_REG + p10.To.Reg = countReg + + p11 := s.Prog(loong64.ABNE) + p11.From.Type = obj.TYPE_REG + p11.From.Reg = countReg + p11.To.Type = obj.TYPE_BRANCH + p11.To.SetTarget(p8.Link) + + p12 := s.Prog(obj.ANOP) + p7.To.SetTarget(p12) // Multiples of the loop size are now done. n %= loopSize - - off = 0 // Write any fractional portion. - for n >= 8 { - // MOVV ZR, off(ptrReg) + for off = 0; n >= 8; off += 8 { + // MOVV ZR, off(ptrReg) zero8(s, ptrReg, off) - off += 8 n -= 8 } @@ -1333,7 +1395,7 @@ func move8(s *ssagen.State, src, dst, tmp int16, off int64) { // zero8 zeroes 8 bytes at reg+off. func zero8(s *ssagen.State, reg int16, off int64) { - // MOVV ZR, off(reg) + // MOVV ZR, off(reg) p := s.Prog(loong64.AMOVV) p.From.Type = obj.TYPE_REG p.From.Reg = loong64.REGZERO @@ -1341,3 +1403,14 @@ func zero8(s *ssagen.State, reg int16, off int64) { p.To.Reg = reg p.To.Offset = off } + +// zero16 zeroes 16 bytes at reg+off. +func zero16(s *ssagen.State, regZero, regBase int16, off int64) { + // VMOVQ regZero, off(regBase) + p := s.Prog(loong64.AVMOVQ) + p.From.Type = obj.TYPE_REG + p.From.Reg = regZero + p.To.Type = obj.TYPE_MEM + p.To.Reg = regBase + p.To.Offset = off +} diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index 7e8b8bf497..81d3a3665b 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -388,6 +388,7 @@ func init() { argLength: 2, reg: regInfo{ inputs: []regMask{gp}, + clobbers: buildReg("F31"), clobbersArg0: true, }, faultOnNilArg0: true, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 264f4b3bf3..944e1d7854 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -26107,6 +26107,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 }, + clobbers: 2305843009213693952, // F31 clobbersArg0: true, }, }, -- cgit v1.3-5-g9baa From 32f5aadd2ffc60421c62b185fa7668012fb5e73e Mon Sep 17 00:00:00 2001 From: "khr@golang.org" Date: Fri, 12 Sep 2025 14:43:19 -0700 Subject: cmd/compile: stack allocate backing stores during append We can already stack allocate the backing store during append if the resulting backing store doesn't escape. See CL 664299. This CL enables us to often stack allocate the backing store during append *even if* the result escapes. Typically, for code like: func f(n int) []int { var r []int for i := range n { r = append(r, i) } return r } the backing store for r escapes, but only by returning it. Could we operate with r on the stack for most of its lifeime, and only move it to the heap at the return point? The current implementation of append will need to do an allocation each time it calls growslice. This will happen on the 1st, 2nd, 4th, 8th, etc. append calls. The allocations done by all but the last growslice call will then immediately be garbage. We'd like to avoid doing some of those intermediate allocations if possible. We rewrite the above code by introducing a move2heap operation: func f(n int) []int { var r []int for i := range n { r = append(r, i) } r = move2heap(r) return r } Using the move2heap runtime function, which does: move2heap(r): If r is already backed by heap storage, return r. Otherwise, copy r to the heap and return the copy. Now we can treat the backing store of r allocated at the append site as not escaping. Previous stack allocation optimizations now apply, which can use a fixed-size stack-allocated backing store for r when appending. See the description in cmd/compile/internal/slice/slice.go for how we ensure that this optimization is safe. Change-Id: I81f36e58bade2241d07f67967d8d547fff5302b8 Reviewed-on: https://go-review.googlesource.com/c/go/+/707755 Reviewed-by: Keith Randall Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/deadlocals/deadlocals.go | 5 + src/cmd/compile/internal/escape/leaks.go | 18 + src/cmd/compile/internal/gc/main.go | 3 + src/cmd/compile/internal/ir/expr.go | 26 ++ src/cmd/compile/internal/ir/name.go | 2 +- src/cmd/compile/internal/ir/node.go | 1 + src/cmd/compile/internal/ir/node_gen.go | 28 ++ src/cmd/compile/internal/ir/op_string.go | 19 +- src/cmd/compile/internal/ir/stmt.go | 1 + src/cmd/compile/internal/ir/symtab.go | 5 + src/cmd/compile/internal/slice/slice.go | 455 +++++++++++++++++++++ src/cmd/compile/internal/ssa/_gen/AMD64Ops.go | 3 +- src/cmd/compile/internal/ssa/opGen.go | 2 +- src/cmd/compile/internal/ssagen/ssa.go | 266 ++++++++++-- .../compile/internal/typecheck/_builtin/runtime.go | 6 + src/cmd/compile/internal/typecheck/builtin.go | 212 +++++----- src/cmd/compile/internal/walk/expr.go | 5 + src/runtime/slice.go | 104 +++++ src/runtime/slice_test.go | 319 +++++++++++++++ test/codegen/append.go | 190 +++++++++ 20 files changed, 1519 insertions(+), 151 deletions(-) create mode 100644 src/cmd/compile/internal/slice/slice.go create mode 100644 test/codegen/append.go (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/deadlocals/deadlocals.go b/src/cmd/compile/internal/deadlocals/deadlocals.go index 238450416a..55ad0387a4 100644 --- a/src/cmd/compile/internal/deadlocals/deadlocals.go +++ b/src/cmd/compile/internal/deadlocals/deadlocals.go @@ -44,6 +44,11 @@ func Funcs(fns []*ir.Func) { *as.lhs = ir.BlankNode *as.rhs = zero } + if len(assigns) > 0 { + // k.Defn might be pointing at one of the + // assignments we're overwriting. + k.Defn = nil + } } } } diff --git a/src/cmd/compile/internal/escape/leaks.go b/src/cmd/compile/internal/escape/leaks.go index 942f87d2a2..176bccd847 100644 --- a/src/cmd/compile/internal/escape/leaks.go +++ b/src/cmd/compile/internal/escape/leaks.go @@ -124,3 +124,21 @@ func parseLeaks(s string) leaks { copy(l[:], s[4:]) return l } + +func ParseLeaks(s string) leaks { + return parseLeaks(s) +} + +// Any reports whether the value flows anywhere at all. +func (l leaks) Any() bool { + // TODO: do mutator/callee matter? + if l.Heap() >= 0 || l.Mutator() >= 0 || l.Callee() >= 0 { + return true + } + for i := range numEscResults { + if l.Result(i) >= 0 { + return true + } + } + return false +} diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 42e2afaee4..ef6a5d6017 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -22,6 +22,7 @@ import ( "cmd/compile/internal/pkginit" "cmd/compile/internal/reflectdata" "cmd/compile/internal/rttype" + "cmd/compile/internal/slice" "cmd/compile/internal/ssa" "cmd/compile/internal/ssagen" "cmd/compile/internal/staticinit" @@ -266,6 +267,8 @@ func Main(archInit func(*ssagen.ArchInfo)) { base.Timer.Start("fe", "escapes") escape.Funcs(typecheck.Target.Funcs) + slice.Funcs(typecheck.Target.Funcs) + loopvar.LogTransformations(transformed) // Collect information for go:nowritebarrierrec diff --git a/src/cmd/compile/internal/ir/expr.go b/src/cmd/compile/internal/ir/expr.go index 7a75ff40f2..dd1b94aa0d 100644 --- a/src/cmd/compile/internal/ir/expr.go +++ b/src/cmd/compile/internal/ir/expr.go @@ -192,6 +192,7 @@ type CallExpr struct { IsDDD bool GoDefer bool // whether this call is part of a go or defer statement NoInline bool // whether this call must not be inlined + UseBuf bool // use stack buffer for backing store (OAPPEND only) } func NewCallExpr(pos src.XPos, op Op, fun Node, args []Node) *CallExpr { @@ -1269,3 +1270,28 @@ func MethodExprFunc(n Node) *types.Field { base.Fatalf("unexpected node: %v (%v)", n, n.Op()) panic("unreachable") } + +// A MoveToHeapExpr takes a slice as input and moves it to the +// heap (by copying the backing store if it is not already +// on the heap). +type MoveToHeapExpr struct { + miniExpr + Slice Node + // An expression that evaluates to a *runtime._type + // that represents the slice element type. + RType Node + // If PreserveCapacity is true, the capacity of + // the resulting slice, and all of the elements in + // [len:cap], must be preserved. + // If PreserveCapacity is false, the resulting + // slice may have any capacity >= len, with any + // elements in the resulting [len:cap] range zeroed. + PreserveCapacity bool +} + +func NewMoveToHeapExpr(pos src.XPos, slice Node) *MoveToHeapExpr { + n := &MoveToHeapExpr{Slice: slice} + n.pos = pos + n.op = OMOVE2HEAP + return n +} diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go index 01f1c0c502..63f1b1c931 100644 --- a/src/cmd/compile/internal/ir/name.go +++ b/src/cmd/compile/internal/ir/name.go @@ -43,7 +43,7 @@ type Name struct { Func *Func // TODO(austin): nil for I.M Offset_ int64 val constant.Value - Opt any // for use by escape analysis + Opt any // for use by escape or slice analysis Embed *[]Embed // list of embedded files, for ONAME var // For a local variable (not param) or extern, the initializing assignment (OAS or OAS2). diff --git a/src/cmd/compile/internal/ir/node.go b/src/cmd/compile/internal/ir/node.go index 8c61bb6ed5..f26f61cb18 100644 --- a/src/cmd/compile/internal/ir/node.go +++ b/src/cmd/compile/internal/ir/node.go @@ -293,6 +293,7 @@ const ( OLINKSYMOFFSET // offset within a name OJUMPTABLE // A jump table structure for implementing dense expression switches OINTERFACESWITCH // A type switch with interface cases + OMOVE2HEAP // Promote a stack-backed slice to heap // opcodes for generics ODYNAMICDOTTYPE // x = i.(T) where T is a type parameter (or derived from a type parameter) diff --git a/src/cmd/compile/internal/ir/node_gen.go b/src/cmd/compile/internal/ir/node_gen.go index 2221045c93..4298b3a43d 100644 --- a/src/cmd/compile/internal/ir/node_gen.go +++ b/src/cmd/compile/internal/ir/node_gen.go @@ -1175,6 +1175,34 @@ func (n *MakeExpr) editChildrenWithHidden(edit func(Node) Node) { } } +func (n *MoveToHeapExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } +func (n *MoveToHeapExpr) copy() Node { + c := *n + c.init = copyNodes(c.init) + return &c +} +func (n *MoveToHeapExpr) doChildren(do func(Node) bool) bool { + if doNodes(n.init, do) { + return true + } + if n.Slice != nil && do(n.Slice) { + return true + } + return false +} +func (n *MoveToHeapExpr) doChildrenWithHidden(do func(Node) bool) bool { + return n.doChildren(do) +} +func (n *MoveToHeapExpr) editChildren(edit func(Node) Node) { + editNodes(n.init, edit) + if n.Slice != nil { + n.Slice = edit(n.Slice).(Node) + } +} +func (n *MoveToHeapExpr) editChildrenWithHidden(edit func(Node) Node) { + n.editChildren(edit) +} + func (n *Name) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } func (n *NilExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } diff --git a/src/cmd/compile/internal/ir/op_string.go b/src/cmd/compile/internal/ir/op_string.go index 7494beee4c..f042ad84a4 100644 --- a/src/cmd/compile/internal/ir/op_string.go +++ b/src/cmd/compile/internal/ir/op_string.go @@ -151,18 +151,19 @@ func _() { _ = x[OLINKSYMOFFSET-140] _ = x[OJUMPTABLE-141] _ = x[OINTERFACESWITCH-142] - _ = x[ODYNAMICDOTTYPE-143] - _ = x[ODYNAMICDOTTYPE2-144] - _ = x[ODYNAMICTYPE-145] - _ = x[OTAILCALL-146] - _ = x[OGETG-147] - _ = x[OGETCALLERSP-148] - _ = x[OEND-149] + _ = x[OMOVE2HEAP-143] + _ = x[ODYNAMICDOTTYPE-144] + _ = x[ODYNAMICDOTTYPE2-145] + _ = x[ODYNAMICTYPE-146] + _ = x[OTAILCALL-147] + _ = x[OGETG-148] + _ = x[OGETCALLERSP-149] + _ = x[OEND-150] } -const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND" +const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHMOVE2HEAPDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND" -var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935} +var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 878, 892, 907, 918, 926, 930, 941, 944} func (i Op) String() string { if i >= Op(len(_Op_index)-1) { diff --git a/src/cmd/compile/internal/ir/stmt.go b/src/cmd/compile/internal/ir/stmt.go index 0801ecdd9e..affa5f4551 100644 --- a/src/cmd/compile/internal/ir/stmt.go +++ b/src/cmd/compile/internal/ir/stmt.go @@ -42,6 +42,7 @@ func (*Decl) isStmt() {} type Stmt interface { Node isStmt() + PtrInit() *Nodes } // A miniStmt is a miniNode with extra fields common to statements. diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go index f8eb457880..828c3b553a 100644 --- a/src/cmd/compile/internal/ir/symtab.go +++ b/src/cmd/compile/internal/ir/symtab.go @@ -29,6 +29,11 @@ type symsStruct struct { GCWriteBarrier [8]*obj.LSym Goschedguarded *obj.LSym Growslice *obj.LSym + GrowsliceBuf *obj.LSym + MoveSlice *obj.LSym + MoveSliceNoScan *obj.LSym + MoveSliceNoCap *obj.LSym + MoveSliceNoCapNoScan *obj.LSym InterfaceSwitch *obj.LSym MallocGC *obj.LSym MallocGCSmallNoScan [27]*obj.LSym diff --git a/src/cmd/compile/internal/slice/slice.go b/src/cmd/compile/internal/slice/slice.go new file mode 100644 index 0000000000..7a32e7adbd --- /dev/null +++ b/src/cmd/compile/internal/slice/slice.go @@ -0,0 +1,455 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package slice + +// This file implements a stack-allocation optimization +// for the backing store of slices. +// +// Consider the code: +// +// var s []int +// for i := range ... { +// s = append(s, i) +// } +// return s +// +// Some of the append operations will need to do an allocation +// by calling growslice. This will happen on the 1st, 2nd, 4th, +// 8th, etc. append calls. The allocations done by all but the +// last growslice call will then immediately be garbage. +// +// We'd like to avoid doing some of those intermediate +// allocations if possible. +// +// If we can determine that the "return s" statement is the +// *only* way that the backing store for s escapes, then we +// can rewrite the code to something like: +// +// var s []int +// for i := range N { +// s = append(s, i) +// } +// s = move2heap(s) +// return s +// +// Using the move2heap runtime function, which does: +// +// move2heap(s): +// If s is not backed by a stackframe-allocated +// backing store, return s. Otherwise, copy s +// to the heap and return the copy. +// +// Now we can treat the backing store of s allocated at the +// append site as not escaping. Previous stack allocation +// optimizations now apply, which can use a fixed-size +// stack-allocated backing store for s when appending. +// (See ../ssagen/ssa.go:(*state).append) +// +// It is tricky to do this optimization safely. To describe +// our analysis, we first define what an "exclusive" slice +// variable is. +// +// A slice variable (a variable of slice type) is called +// "exclusive" if, when it has a reference to a +// stackframe-allocated backing store, it is the only +// variable with such a reference. +// +// In other words, a slice variable is exclusive if +// any of the following holds: +// 1) It points to a heap-allocated backing store +// 2) It points to a stack-allocated backing store +// for any parent frame. +// 3) It is the only variable that references its +// backing store. +// 4) It is nil. +// +// The nice thing about exclusive slice variables is that +// it is always safe to do +// s = move2heap(s) +// whenever s is an exclusive slice variable. Because no +// one else has a reference to the backing store, no one +// else can tell that we moved the backing store from one +// location to another. +// +// Note that exclusiveness is a dynamic property. A slice +// variable may be exclusive during some parts of execution +// and not exclusive during others. +// +// The following operations set or preserve the exclusivity +// of a slice variable s: +// s = nil +// s = append(s, ...) +// s = s[i:j] +// ... = s[i] +// s[i] = ... +// f(s) where f does not escape its argument +// Other operations destroy exclusivity. A non-exhaustive list includes: +// x = s +// *p = s +// f(s) where f escapes its argument +// return s +// To err on the safe side, we white list exclusivity-preserving +// operations and we asssume that any other operations that mention s +// destroy its exclusivity. +// +// Our strategy is to move the backing store of s to the heap before +// any exclusive->nonexclusive transition. That way, s will only ever +// have a reference to a stack backing store while it is exclusive. +// +// move2heap for a variable s is implemented with: +// if s points to within the stack frame { +// s2 := make([]T, s.len, s.cap) +// copy(s2[:s.cap], s[:s.cap]) +// s = s2 +// } +// Note that in general we need to copy all of s[:cap(s)] elements when +// moving to the heap. As an optimization, we keep track of slice variables +// whose capacity, and the elements in s[len(s):cap(s)], are never accessed. +// For those slice variables, we can allocate to the next size class above +// the length, which saves memory and copying cost. + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/escape" + "cmd/compile/internal/ir" + "cmd/compile/internal/reflectdata" +) + +func Funcs(all []*ir.Func) { + if base.Flag.N != 0 { + return + } + for _, fn := range all { + analyze(fn) + } +} + +func analyze(fn *ir.Func) { + type sliceInfo struct { + // Slice variable. + s *ir.Name + + // Count of uses that this pass understands. + okUses int32 + // Count of all uses found. + allUses int32 + + // A place where the slice variable transitions from + // exclusive to nonexclusive. + // We could keep track of more than one, but one is enough for now. + // Currently, this can be either a return statement or + // an assignment. + // TODO: other possible transitions? + transition ir.Stmt + + // Each s = append(s, ...) instance we found. + appends []*ir.CallExpr + + // Weight of the number of s = append(s, ...) instances we found. + // The optimizations we do are only really useful if there are at + // least weight 2. (Note: appends in loops have weight >= 2.) + appendWeight int + + // Whether we ever do cap(s), or other operations that use cap(s) + // (possibly implicitly), like s[i:j]. + capUsed bool + } + + // Every variable (*ir.Name) that we are tracking will have + // a non-nil *sliceInfo in its Opt field. + haveLocalSlice := false + maxStackSize := int64(base.Debug.VariableMakeThreshold) + var namedRets []*ir.Name + for _, s := range fn.Dcl { + if !s.Type().IsSlice() { + continue + } + if s.Type().Elem().Size() > maxStackSize { + continue + } + if !base.VariableMakeHash.MatchPos(s.Pos(), nil) { + continue + } + s.Opt = &sliceInfo{s: s} // start tracking s + haveLocalSlice = true + if s.Class == ir.PPARAMOUT { + namedRets = append(namedRets, s) + } + } + if !haveLocalSlice { + return + } + + // Keep track of loop depth while walking. + loopDepth := 0 + + // tracking returns the info for the slice variable if n is a slice + // variable that we're still considering, or nil otherwise. + tracking := func(n ir.Node) *sliceInfo { + if n == nil || n.Op() != ir.ONAME { + return nil + } + s := n.(*ir.Name) + if s.Opt == nil { + return nil + } + return s.Opt.(*sliceInfo) + } + + // addTransition(n, loc) records that s experiences an exclusive->nonexclusive + // transition somewhere within loc. + addTransition := func(i *sliceInfo, loc ir.Stmt) { + if i.transition != nil { + // We only keep track of a single exclusive->nonexclusive transition + // for a slice variable. If we find more than one, give up. + // (More than one transition location would be fine, but we would + // start to get worried about introducing too much additional code.) + i.s.Opt = nil + return + } + i.transition = loc + } + + // Examine an x = y assignment that occurs somewhere within statement stmt. + assign := func(x, y ir.Node, stmt ir.Stmt) { + if i := tracking(x); i != nil { + // s = y. Check for understood patterns for y. + if y == nil || y.Op() == ir.ONIL { + // s = nil is ok. + i.okUses++ + } else if y.Op() == ir.OSLICELIT { + // s = []{...} is ok. + // Note: this reveals capacity. Should it? + i.okUses++ + i.capUsed = true + } else if y.Op() == ir.OSLICE { + y := y.(*ir.SliceExpr) + if y.X == i.s { + // s = s[...:...] is ok + i.okUses += 2 + i.capUsed = true + } + } else if y.Op() == ir.OAPPEND { + y := y.(*ir.CallExpr) + if y.Args[0] == i.s { + // s = append(s, ...) is ok + i.okUses += 2 + i.appends = append(i.appends, y) + i.appendWeight += 1 + loopDepth + } + // TODO: s = append(nil, ...)? + } + // Note that technically s = make([]T, ...) preserves exclusivity, but + // we don't track that because we assume users who wrote that know + // better than the compiler does. + + // TODO: figure out how to handle s = fn(..., s, ...) + // It would be nice to maintain exclusivity of s in this situation. + // But unfortunately, fn can return one of its other arguments, which + // may be a slice with a stack-allocated backing store other than s. + // (which may have preexisting references to its backing store). + // + // Maybe we could do it if s is the only argument? + } + + if i := tracking(y); i != nil { + // ... = s + // Treat this as an exclusive->nonexclusive transition. + i.okUses++ + addTransition(i, stmt) + } + } + + var do func(ir.Node) bool + do = func(n ir.Node) bool { + if n == nil { + return false + } + switch n.Op() { + case ir.ONAME: + if i := tracking(n); i != nil { + // A use of a slice variable. Count it. + i.allUses++ + } + case ir.ODCL: + n := n.(*ir.Decl) + if i := tracking(n.X); i != nil { + i.okUses++ + } + case ir.OINDEX: + n := n.(*ir.IndexExpr) + if i := tracking(n.X); i != nil { + // s[i] is ok. + i.okUses++ + } + case ir.OLEN: + n := n.(*ir.UnaryExpr) + if i := tracking(n.X); i != nil { + // len(s) is ok + i.okUses++ + } + case ir.OCAP: + n := n.(*ir.UnaryExpr) + if i := tracking(n.X); i != nil { + // cap(s) is ok + i.okUses++ + i.capUsed = true + } + case ir.OADDR: + n := n.(*ir.AddrExpr) + if n.X.Op() == ir.OINDEX { + n := n.X.(*ir.IndexExpr) + if i := tracking(n.X); i != nil { + // &s[i] is definitely a nonexclusive transition. + // (We need this case because s[i] is ok, but &s[i] is not.) + i.s.Opt = nil + } + } + case ir.ORETURN: + n := n.(*ir.ReturnStmt) + for _, x := range n.Results { + if i := tracking(x); i != nil { + i.okUses++ + // We go exclusive->nonexclusive here + addTransition(i, n) + } + } + if len(n.Results) == 0 { + // Uses of named result variables are implicit here. + for _, x := range namedRets { + if i := tracking(x); i != nil { + addTransition(i, n) + } + } + } + case ir.OCALLFUNC: + n := n.(*ir.CallExpr) + for idx, arg := range n.Args { + if i := tracking(arg); i != nil { + if !argLeak(n, idx) { + // Passing s to a nonescaping arg is ok. + i.okUses++ + i.capUsed = true + } + } + } + case ir.ORANGE: + // Range over slice is ok. + n := n.(*ir.RangeStmt) + if i := tracking(n.X); i != nil { + i.okUses++ + } + case ir.OAS: + n := n.(*ir.AssignStmt) + assign(n.X, n.Y, n) + case ir.OAS2: + n := n.(*ir.AssignListStmt) + for i := range len(n.Lhs) { + assign(n.Lhs[i], n.Rhs[i], n) + } + case ir.OCLOSURE: + n := n.(*ir.ClosureExpr) + for _, v := range n.Func.ClosureVars { + do(v.Outer) + } + } + if n.Op() == ir.OFOR || n.Op() == ir.ORANGE { + // Note: loopDepth isn't really right for init portion + // of the for statement, but that's ok. Correctness + // does not depend on depth info. + loopDepth++ + defer func() { loopDepth-- }() + } + // Check all the children. + ir.DoChildren(n, do) + return false + } + + // Run the analysis over the whole body. + for _, stmt := range fn.Body { + do(stmt) + } + + // Process accumulated info to find slice variables + // that we can allocate on the stack. + for _, s := range fn.Dcl { + if s.Opt == nil { + continue + } + i := s.Opt.(*sliceInfo) + s.Opt = nil + if i.okUses != i.allUses { + // Some use of i.s that don't understand lurks. Give up. + continue + } + + // At this point, we've decided that we *can* do + // the optimization. + + if i.transition == nil { + // Exclusive for its whole lifetime. That means it + // didn't escape. We can already handle nonescaping + // slices without this pass. + continue + } + if i.appendWeight < 2 { + // This optimization only really helps if there is + // (dynamically) more than one append. + continue + } + + // Commit point - at this point we've decided we *should* + // do the optimization. + + // Insert a move2heap operation before the exclusive->nonexclusive + // transition. + move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s) + if i.capUsed { + move.PreserveCapacity = true + } + move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0]) + move.SetType(i.s.Type()) + move.SetTypecheck(1) + as := ir.NewAssignStmt(i.transition.Pos(), i.s, move) + as.SetTypecheck(1) + i.transition.PtrInit().Prepend(as) + // Note: we prepend because we need to put the move2heap + // operation first, before any other init work, as the transition + // might occur in the init work. + + // Now that we've inserted a move2heap operation before every + // exclusive -> nonexclusive transition, appends can now use + // stack backing stores. + // (This is the whole point of this pass, to enable stack + // allocation of append backing stores.) + for _, a := range i.appends { + a.SetEsc(ir.EscNone) + if i.capUsed { + a.UseBuf = true + } + } + } +} + +// argLeak reports if the idx'th argument to the call n escapes anywhere +// (to the heap, another argument, return value, etc.) +// If unknown returns true. +func argLeak(n *ir.CallExpr, idx int) bool { + if n.Op() != ir.OCALLFUNC { + return true + } + fn := ir.StaticCalleeName(ir.StaticValue(n.Fun)) + if fn == nil { + return true + } + fntype := fn.Type() + if recv := fntype.Recv(); recv != nil { + if idx == 0 { + return escape.ParseLeaks(recv.Note).Any() + } + idx-- + } + return escape.ParseLeaks(fntype.Params()[idx].Note).Any() +} diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index e42b54398d..b23ccc4a70 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -118,6 +118,7 @@ func init() { gp11sb = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} + gp21sp2 = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} gp31shift = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}} @@ -262,7 +263,7 @@ func init() { {name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, - {name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true}, + {name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true}, {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, {name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 944e1d7854..f13373d2c0 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -7643,7 +7643,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 }, outputs: []outputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 3dea733bbd..96be8ddd86 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -124,6 +124,11 @@ func InitConfig() { ir.Syms.GCWriteBarrier[7] = typecheck.LookupRuntimeFunc("gcWriteBarrier8") ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded") ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice") + ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf") + ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice") + ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan") + ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap") + ir.Syms.MoveSliceNoCapNoScan = typecheck.LookupRuntimeFunc("moveSliceNoCapNoScan") ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch") for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ { ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i)) @@ -1091,6 +1096,23 @@ type state struct { // Block starting position, indexed by block id. blockStarts []src.XPos + + // Information for stack allocation. Indexed by the first argument + // to an append call. Normally a slice-typed variable, but not always. + backingStores map[ir.Node]*backingStoreInfo +} + +type backingStoreInfo struct { + // Size of backing store array (in elements) + K int64 + // Stack-allocated backing store variable. + store *ir.Name + // Dynamic boolean variable marking the fact that we used this backing store. + used *ir.Name + // Have we used this variable statically yet? This is just a hint + // to avoid checking the dynamic variable if the answer is obvious. + // (usedStatic == true implies used == true) + usedStatic bool } type funcLine struct { @@ -3673,6 +3695,9 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value { case ir.OAPPEND: return s.append(n.(*ir.CallExpr), false) + case ir.OMOVE2HEAP: + return s.move2heap(n.(*ir.MoveToHeapExpr)) + case ir.OMIN, ir.OMAX: return s.minMax(n.(*ir.CallExpr)) @@ -3734,6 +3759,68 @@ func (s *state) resultAddrOfCall(c *ssa.Value, which int64, t *types.Type) *ssa. return addr } +// Get backing store information for an append call. +func (s *state) getBackingStoreInfoForAppend(n *ir.CallExpr) *backingStoreInfo { + if n.Esc() != ir.EscNone { + return nil + } + return s.getBackingStoreInfo(n.Args[0]) +} +func (s *state) getBackingStoreInfo(n ir.Node) *backingStoreInfo { + t := n.Type() + et := t.Elem() + maxStackSize := int64(base.Debug.VariableMakeThreshold) + if et.Size() == 0 || et.Size() > maxStackSize { + return nil + } + if base.Flag.N != 0 { + return nil + } + if !base.VariableMakeHash.MatchPos(n.Pos(), nil) { + return nil + } + i := s.backingStores[n] + if i != nil { + return i + } + + // Build type of backing store. + K := maxStackSize / et.Size() // rounds down + KT := types.NewArray(et, K) + KT.SetNoalg(true) + types.CalcArraySize(KT) + // Align more than naturally for the type KT. See issue 73199. + align := types.NewArray(types.Types[types.TUINTPTR], 0) + types.CalcArraySize(align) + storeTyp := types.NewStruct([]*types.Field{ + {Sym: types.BlankSym, Type: align}, + {Sym: types.BlankSym, Type: KT}, + }) + storeTyp.SetNoalg(true) + types.CalcStructSize(storeTyp) + + // Make backing store variable. + backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp) + backingStore.SetAddrtaken(true) + + // Make "used" boolean. + used := typecheck.TempAt(n.Pos(), s.curfn, types.Types[types.TBOOL]) + if s.curBlock == s.f.Entry { + s.vars[used] = s.constBool(false) + } else { + // initialize this variable at end of entry block + s.defvars[s.f.Entry.ID][used] = s.constBool(false) + } + + // Initialize an info structure. + if s.backingStores == nil { + s.backingStores = map[ir.Node]*backingStoreInfo{} + } + i = &backingStoreInfo{K: K, store: backingStore, used: used, usedStatic: false} + s.backingStores[n] = i + return i +} + // append converts an OAPPEND node to SSA. // If inplace is false, it converts the OAPPEND expression n to an ssa.Value, // adds it to s, and returns the Value. @@ -3824,9 +3911,29 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // A stack-allocated backing store could be used at every // append that qualifies, but we limit it in some cases to // avoid wasted code and stack space. - // TODO: handle ... append case. - maxStackSize := int64(base.Debug.VariableMakeThreshold) - if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] { + // + // Note that we have two different strategies. + // 1. The standard strategy is just to allocate the full + // backing store at the first append. + // 2. An alternate strategy is used when + // a. The backing store eventually escapes via move2heap + // and b. The capacity is used somehow + // In this case, we don't want to just allocate + // the full buffer at the first append, because when + // we move2heap the buffer to the heap when it escapes, + // we might end up wasting memory because we can't + // change the capacity. + // So in this case we use growsliceBuf to reuse the buffer + // and walk one step up the size class ladder each time. + // + // TODO: handle ... append case? Currently we handle only + // a fixed number of appended elements. + var info *backingStoreInfo + if !inplace { + info = s.getBackingStoreInfoForAppend(n) + } + + if !inplace && info != nil && !n.UseBuf && !info.usedStatic { // if l <= K { // if !used { // if oldLen == 0 { @@ -3850,43 +3957,19 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // It is ok to do it more often, but it is probably helpful only for // the first instance. TODO: this could use more tuning. Using ir.Node // as the key works for *ir.Name instances but probably nothing else. - if s.appendTargets == nil { - s.appendTargets = map[ir.Node]bool{} - } - s.appendTargets[sn] = true - - K := maxStackSize / et.Size() // rounds down - KT := types.NewArray(et, K) - KT.SetNoalg(true) - types.CalcArraySize(KT) - // Align more than naturally for the type KT. See issue 73199. - align := types.NewArray(types.Types[types.TUINTPTR], 0) - types.CalcArraySize(align) - storeTyp := types.NewStruct([]*types.Field{ - {Sym: types.BlankSym, Type: align}, - {Sym: types.BlankSym, Type: KT}, - }) - storeTyp.SetNoalg(true) - types.CalcStructSize(storeTyp) + info.usedStatic = true + // TODO: unset usedStatic somehow? usedTestBlock := s.f.NewBlock(ssa.BlockPlain) oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain) bodyBlock := s.f.NewBlock(ssa.BlockPlain) growSlice := s.f.NewBlock(ssa.BlockPlain) - - // Make "used" boolean. - tBool := types.Types[types.TBOOL] - used := typecheck.TempAt(n.Pos(), s.curfn, tBool) - s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry - - // Make backing store variable. tInt := types.Types[types.TINT] - backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp) - backingStore.SetAddrtaken(true) + tBool := types.Types[types.TBOOL] // if l <= K s.startBlock(grow) - kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K)) + kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, info.K)) b := s.endBlock() b.Kind = ssa.BlockIf b.SetControl(kTest) @@ -3896,7 +3979,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // if !used s.startBlock(usedTestBlock) - usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used)) + usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(info.used)) b = s.endBlock() b.Kind = ssa.BlockIf b.SetControl(usedTest) @@ -3917,18 +4000,18 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // var store struct { _ [0]uintptr; arr [K]T } s.startBlock(bodyBlock) if et.HasPointers() { - s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem()) + s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, info.store, s.mem()) } - addr := s.addr(backingStore) - s.zero(storeTyp, addr) + addr := s.addr(info.store) + s.zero(info.store.Type(), addr) // s = store.arr[:l:K] s.vars[ptrVar] = addr s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test. - s.vars[capVar] = s.constInt(tInt, K) + s.vars[capVar] = s.constInt(tInt, info.K) // used = true - s.assign(used, s.constBool(true), false, 0) + s.assign(info.used, s.constBool(true), false, 0) b = s.endBlock() b.AddEdgeTo(assign) @@ -3939,7 +4022,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // Call growslice s.startBlock(grow) taddr := s.expr(n.Fun) - r := s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr) + var r []*ssa.Value + if info != nil && n.UseBuf { + // Use stack-allocated buffer as backing store, if we can. + if et.HasPointers() && !info.usedStatic { + // Initialize in the function header. Not the best place, + // but it makes sure we don't scan this area before it is + // initialized. + mem := s.defvars[s.f.Entry.ID][memVar] + mem = s.f.Entry.NewValue1A(n.Pos(), ssa.OpVarDef, types.TypeMem, info.store, mem) + addr := s.f.Entry.NewValue2A(n.Pos(), ssa.OpLocalAddr, types.NewPtr(info.store.Type()), info.store, s.sp, mem) + mem = s.f.Entry.NewValue2I(n.Pos(), ssa.OpZero, types.TypeMem, info.store.Type().Size(), addr, mem) + mem.Aux = info.store.Type() + s.defvars[s.f.Entry.ID][memVar] = mem + info.usedStatic = true + } + r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K)) + } else { + r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr) + } // Decompose output slice p = s.newValue1(ssa.OpSlicePtr, pt, r[0]) @@ -4026,6 +4127,95 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { return s.newValue3(ssa.OpSliceMake, n.Type(), p, l, c) } +func (s *state) move2heap(n *ir.MoveToHeapExpr) *ssa.Value { + // s := n.Slice + // if s.ptr points to current stack frame { + // s2 := make([]T, s.len, s.cap) + // copy(s2[:cap], s[:cap]) + // s = s2 + // } + // return s + + slice := s.expr(n.Slice) + et := slice.Type.Elem() + pt := types.NewPtr(et) + + info := s.getBackingStoreInfo(n) + if info == nil { + // Backing store will never be stack allocated, so + // move2heap is a no-op. + return slice + } + + // Decomposse input slice. + p := s.newValue1(ssa.OpSlicePtr, pt, slice) + l := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice) + c := s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], slice) + + moveBlock := s.f.NewBlock(ssa.BlockPlain) + mergeBlock := s.f.NewBlock(ssa.BlockPlain) + + s.vars[ptrVar] = p + s.vars[lenVar] = l + s.vars[capVar] = c + + // Decide if we need to move the slice backing store. + // It needs to be moved if it is currently on the stack. + sub := ssa.OpSub64 + less := ssa.OpLess64U + if s.config.PtrSize == 4 { + sub = ssa.OpSub32 + less = ssa.OpLess32U + } + callerSP := s.newValue1(ssa.OpGetCallerSP, types.Types[types.TUINTPTR], s.mem()) + frameSize := s.newValue2(sub, types.Types[types.TUINTPTR], callerSP, s.sp) + pInt := s.newValue2(ssa.OpConvert, types.Types[types.TUINTPTR], p, s.mem()) + off := s.newValue2(sub, types.Types[types.TUINTPTR], pInt, s.sp) + cond := s.newValue2(less, types.Types[types.TBOOL], off, frameSize) + + b := s.endBlock() + b.Kind = ssa.BlockIf + b.Likely = ssa.BranchUnlikely // fast path is to not have to call into runtime + b.SetControl(cond) + b.AddEdgeTo(moveBlock) + b.AddEdgeTo(mergeBlock) + + // Move the slice to heap + s.startBlock(moveBlock) + var newSlice *ssa.Value + if et.HasPointers() { + typ := s.expr(n.RType) + if n.PreserveCapacity { + newSlice = s.rtcall(ir.Syms.MoveSlice, true, []*types.Type{slice.Type}, typ, p, l, c)[0] + } else { + newSlice = s.rtcall(ir.Syms.MoveSliceNoCap, true, []*types.Type{slice.Type}, typ, p, l)[0] + } + } else { + elemSize := s.constInt(types.Types[types.TUINTPTR], et.Size()) + if n.PreserveCapacity { + newSlice = s.rtcall(ir.Syms.MoveSliceNoScan, true, []*types.Type{slice.Type}, elemSize, p, l, c)[0] + } else { + newSlice = s.rtcall(ir.Syms.MoveSliceNoCapNoScan, true, []*types.Type{slice.Type}, elemSize, p, l)[0] + } + } + // Decompose output slice + s.vars[ptrVar] = s.newValue1(ssa.OpSlicePtr, pt, newSlice) + s.vars[lenVar] = s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], newSlice) + s.vars[capVar] = s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], newSlice) + b = s.endBlock() + b.AddEdgeTo(mergeBlock) + + // Merge fast path (no moving) and slow path (moved) + s.startBlock(mergeBlock) + p = s.variable(ptrVar, pt) // generates phi for ptr + l = s.variable(lenVar, types.Types[types.TINT]) // generates phi for len + c = s.variable(capVar, types.Types[types.TINT]) // generates phi for cap + delete(s.vars, ptrVar) + delete(s.vars, lenVar) + delete(s.vars, capVar) + return s.newValue3(ssa.OpSliceMake, slice.Type, p, l, c) +} + // minMax converts an OMIN/OMAX builtin call into SSA. func (s *state) minMax(n *ir.CallExpr) *ssa.Value { // The OMIN/OMAX builtin is variadic, but its semantics are diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go index fbe8f77abd..7988ebf5b9 100644 --- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go +++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go @@ -195,6 +195,7 @@ func makeslice(typ *byte, len int, cap int) unsafe.Pointer func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any) +func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any) func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64) func panicunsafeslicelen() func panicunsafeslicenilptr() @@ -202,6 +203,11 @@ func unsafestringcheckptr(ptr unsafe.Pointer, len int64) func panicunsafestringlen() func panicunsafestringnilptr() +func moveSlice(typ *byte, old *byte, len, cap int) (*byte, int, int) +func moveSliceNoScan(elemSize uintptr, old *byte, len, cap int) (*byte, int, int) +func moveSliceNoCap(typ *byte, old *byte, len int) (*byte, int, int) +func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int) + func memmove(to *any, frm *any, length uintptr) func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) func memclrHasPointers(ptr unsafe.Pointer, n uintptr) diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go index ff72bdcf37..ee892856dd 100644 --- a/src/cmd/compile/internal/typecheck/builtin.go +++ b/src/cmd/compile/internal/typecheck/builtin.go @@ -160,80 +160,85 @@ var runtimeDecls = [...]struct { {"makeslice64", funcTag, 124}, {"makeslicecopy", funcTag, 125}, {"growslice", funcTag, 127}, - {"unsafeslicecheckptr", funcTag, 128}, + {"growsliceBuf", funcTag, 128}, + {"unsafeslicecheckptr", funcTag, 129}, {"panicunsafeslicelen", funcTag, 9}, {"panicunsafeslicenilptr", funcTag, 9}, - {"unsafestringcheckptr", funcTag, 129}, + {"unsafestringcheckptr", funcTag, 130}, {"panicunsafestringlen", funcTag, 9}, {"panicunsafestringnilptr", funcTag, 9}, - {"memmove", funcTag, 130}, - {"memclrNoHeapPointers", funcTag, 131}, - {"memclrHasPointers", funcTag, 131}, - {"memequal", funcTag, 132}, - {"memequal0", funcTag, 133}, - {"memequal8", funcTag, 133}, - {"memequal16", funcTag, 133}, - {"memequal32", funcTag, 133}, - {"memequal64", funcTag, 133}, - {"memequal128", funcTag, 133}, - {"f32equal", funcTag, 134}, - {"f64equal", funcTag, 134}, - {"c64equal", funcTag, 134}, - {"c128equal", funcTag, 134}, - {"strequal", funcTag, 134}, - {"interequal", funcTag, 134}, - {"nilinterequal", funcTag, 134}, - {"memhash", funcTag, 135}, - {"memhash0", funcTag, 136}, - {"memhash8", funcTag, 136}, - {"memhash16", funcTag, 136}, - {"memhash32", funcTag, 136}, - {"memhash64", funcTag, 136}, - {"memhash128", funcTag, 136}, - {"f32hash", funcTag, 137}, - {"f64hash", funcTag, 137}, - {"c64hash", funcTag, 137}, - {"c128hash", funcTag, 137}, - {"strhash", funcTag, 137}, - {"interhash", funcTag, 137}, - {"nilinterhash", funcTag, 137}, - {"int64div", funcTag, 138}, - {"uint64div", funcTag, 139}, - {"int64mod", funcTag, 138}, - {"uint64mod", funcTag, 139}, - {"float64toint64", funcTag, 140}, - {"float64touint64", funcTag, 141}, - {"float64touint32", funcTag, 142}, - {"int64tofloat64", funcTag, 143}, - {"int64tofloat32", funcTag, 144}, - {"uint64tofloat64", funcTag, 145}, - {"uint64tofloat32", funcTag, 146}, - {"uint32tofloat64", funcTag, 147}, - {"complex128div", funcTag, 148}, + {"moveSlice", funcTag, 131}, + {"moveSliceNoScan", funcTag, 132}, + {"moveSliceNoCap", funcTag, 133}, + {"moveSliceNoCapNoScan", funcTag, 134}, + {"memmove", funcTag, 135}, + {"memclrNoHeapPointers", funcTag, 136}, + {"memclrHasPointers", funcTag, 136}, + {"memequal", funcTag, 137}, + {"memequal0", funcTag, 138}, + {"memequal8", funcTag, 138}, + {"memequal16", funcTag, 138}, + {"memequal32", funcTag, 138}, + {"memequal64", funcTag, 138}, + {"memequal128", funcTag, 138}, + {"f32equal", funcTag, 139}, + {"f64equal", funcTag, 139}, + {"c64equal", funcTag, 139}, + {"c128equal", funcTag, 139}, + {"strequal", funcTag, 139}, + {"interequal", funcTag, 139}, + {"nilinterequal", funcTag, 139}, + {"memhash", funcTag, 140}, + {"memhash0", funcTag, 141}, + {"memhash8", funcTag, 141}, + {"memhash16", funcTag, 141}, + {"memhash32", funcTag, 141}, + {"memhash64", funcTag, 141}, + {"memhash128", funcTag, 141}, + {"f32hash", funcTag, 142}, + {"f64hash", funcTag, 142}, + {"c64hash", funcTag, 142}, + {"c128hash", funcTag, 142}, + {"strhash", funcTag, 142}, + {"interhash", funcTag, 142}, + {"nilinterhash", funcTag, 142}, + {"int64div", funcTag, 143}, + {"uint64div", funcTag, 144}, + {"int64mod", funcTag, 143}, + {"uint64mod", funcTag, 144}, + {"float64toint64", funcTag, 145}, + {"float64touint64", funcTag, 146}, + {"float64touint32", funcTag, 147}, + {"int64tofloat64", funcTag, 148}, + {"int64tofloat32", funcTag, 149}, + {"uint64tofloat64", funcTag, 150}, + {"uint64tofloat32", funcTag, 151}, + {"uint32tofloat64", funcTag, 152}, + {"complex128div", funcTag, 153}, {"racefuncenter", funcTag, 33}, {"racefuncexit", funcTag, 9}, {"raceread", funcTag, 33}, {"racewrite", funcTag, 33}, - {"racereadrange", funcTag, 149}, - {"racewriterange", funcTag, 149}, - {"msanread", funcTag, 149}, - {"msanwrite", funcTag, 149}, - {"msanmove", funcTag, 150}, - {"asanread", funcTag, 149}, - {"asanwrite", funcTag, 149}, - {"checkptrAlignment", funcTag, 151}, - {"checkptrArithmetic", funcTag, 153}, - {"libfuzzerTraceCmp1", funcTag, 154}, - {"libfuzzerTraceCmp2", funcTag, 155}, - {"libfuzzerTraceCmp4", funcTag, 156}, - {"libfuzzerTraceCmp8", funcTag, 157}, - {"libfuzzerTraceConstCmp1", funcTag, 154}, - {"libfuzzerTraceConstCmp2", funcTag, 155}, - {"libfuzzerTraceConstCmp4", funcTag, 156}, - {"libfuzzerTraceConstCmp8", funcTag, 157}, - {"libfuzzerHookStrCmp", funcTag, 158}, - {"libfuzzerHookEqualFold", funcTag, 158}, - {"addCovMeta", funcTag, 160}, + {"racereadrange", funcTag, 154}, + {"racewriterange", funcTag, 154}, + {"msanread", funcTag, 154}, + {"msanwrite", funcTag, 154}, + {"msanmove", funcTag, 155}, + {"asanread", funcTag, 154}, + {"asanwrite", funcTag, 154}, + {"checkptrAlignment", funcTag, 156}, + {"checkptrArithmetic", funcTag, 158}, + {"libfuzzerTraceCmp1", funcTag, 159}, + {"libfuzzerTraceCmp2", funcTag, 160}, + {"libfuzzerTraceCmp4", funcTag, 161}, + {"libfuzzerTraceCmp8", funcTag, 162}, + {"libfuzzerTraceConstCmp1", funcTag, 159}, + {"libfuzzerTraceConstCmp2", funcTag, 160}, + {"libfuzzerTraceConstCmp4", funcTag, 161}, + {"libfuzzerTraceConstCmp8", funcTag, 162}, + {"libfuzzerHookStrCmp", funcTag, 163}, + {"libfuzzerHookEqualFold", funcTag, 163}, + {"addCovMeta", funcTag, 165}, {"x86HasPOPCNT", varTag, 6}, {"x86HasSSE41", varTag, 6}, {"x86HasFMA", varTag, 6}, @@ -243,11 +248,11 @@ var runtimeDecls = [...]struct { {"loong64HasLAM_BH", varTag, 6}, {"loong64HasLSX", varTag, 6}, {"riscv64HasZbb", varTag, 6}, - {"asanregisterglobals", funcTag, 131}, + {"asanregisterglobals", funcTag, 136}, } func runtimeTypes() []*types.Type { - var typs [161]*types.Type + var typs [166]*types.Type typs[0] = types.ByteType typs[1] = types.NewPtr(typs[0]) typs[2] = types.Types[types.TANY] @@ -376,39 +381,44 @@ func runtimeTypes() []*types.Type { typs[125] = newSig(params(typs[1], typs[13], typs[13], typs[7]), params(typs[7])) typs[126] = types.NewSlice(typs[2]) typs[127] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1]), params(typs[126])) - typs[128] = newSig(params(typs[1], typs[7], typs[22]), nil) - typs[129] = newSig(params(typs[7], typs[22]), nil) - typs[130] = newSig(params(typs[3], typs[3], typs[5]), nil) - typs[131] = newSig(params(typs[7], typs[5]), nil) - typs[132] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6])) - typs[133] = newSig(params(typs[3], typs[3]), params(typs[6])) - typs[134] = newSig(params(typs[7], typs[7]), params(typs[6])) - typs[135] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5])) - typs[136] = newSig(params(typs[7], typs[5]), params(typs[5])) - typs[137] = newSig(params(typs[3], typs[5]), params(typs[5])) - typs[138] = newSig(params(typs[22], typs[22]), params(typs[22])) - typs[139] = newSig(params(typs[24], typs[24]), params(typs[24])) - typs[140] = newSig(params(typs[18]), params(typs[22])) - typs[141] = newSig(params(typs[18]), params(typs[24])) - typs[142] = newSig(params(typs[18]), params(typs[67])) - typs[143] = newSig(params(typs[22]), params(typs[18])) - typs[144] = newSig(params(typs[22]), params(typs[20])) - typs[145] = newSig(params(typs[24]), params(typs[18])) - typs[146] = newSig(params(typs[24]), params(typs[20])) - typs[147] = newSig(params(typs[67]), params(typs[18])) - typs[148] = newSig(params(typs[26], typs[26]), params(typs[26])) - typs[149] = newSig(params(typs[5], typs[5]), nil) - typs[150] = newSig(params(typs[5], typs[5], typs[5]), nil) - typs[151] = newSig(params(typs[7], typs[1], typs[5]), nil) - typs[152] = types.NewSlice(typs[7]) - typs[153] = newSig(params(typs[7], typs[152]), nil) - typs[154] = newSig(params(typs[71], typs[71], typs[15]), nil) - typs[155] = newSig(params(typs[65], typs[65], typs[15]), nil) - typs[156] = newSig(params(typs[67], typs[67], typs[15]), nil) - typs[157] = newSig(params(typs[24], typs[24], typs[15]), nil) - typs[158] = newSig(params(typs[30], typs[30], typs[15]), nil) - typs[159] = types.NewArray(typs[0], 16) - typs[160] = newSig(params(typs[7], typs[67], typs[159], typs[30], typs[13], typs[71], typs[71]), params(typs[67])) + typs[128] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1], typs[3], typs[13]), params(typs[126])) + typs[129] = newSig(params(typs[1], typs[7], typs[22]), nil) + typs[130] = newSig(params(typs[7], typs[22]), nil) + typs[131] = newSig(params(typs[1], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13])) + typs[132] = newSig(params(typs[5], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13])) + typs[133] = newSig(params(typs[1], typs[1], typs[13]), params(typs[1], typs[13], typs[13])) + typs[134] = newSig(params(typs[5], typs[1], typs[13]), params(typs[1], typs[13], typs[13])) + typs[135] = newSig(params(typs[3], typs[3], typs[5]), nil) + typs[136] = newSig(params(typs[7], typs[5]), nil) + typs[137] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6])) + typs[138] = newSig(params(typs[3], typs[3]), params(typs[6])) + typs[139] = newSig(params(typs[7], typs[7]), params(typs[6])) + typs[140] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5])) + typs[141] = newSig(params(typs[7], typs[5]), params(typs[5])) + typs[142] = newSig(params(typs[3], typs[5]), params(typs[5])) + typs[143] = newSig(params(typs[22], typs[22]), params(typs[22])) + typs[144] = newSig(params(typs[24], typs[24]), params(typs[24])) + typs[145] = newSig(params(typs[18]), params(typs[22])) + typs[146] = newSig(params(typs[18]), params(typs[24])) + typs[147] = newSig(params(typs[18]), params(typs[67])) + typs[148] = newSig(params(typs[22]), params(typs[18])) + typs[149] = newSig(params(typs[22]), params(typs[20])) + typs[150] = newSig(params(typs[24]), params(typs[18])) + typs[151] = newSig(params(typs[24]), params(typs[20])) + typs[152] = newSig(params(typs[67]), params(typs[18])) + typs[153] = newSig(params(typs[26], typs[26]), params(typs[26])) + typs[154] = newSig(params(typs[5], typs[5]), nil) + typs[155] = newSig(params(typs[5], typs[5], typs[5]), nil) + typs[156] = newSig(params(typs[7], typs[1], typs[5]), nil) + typs[157] = types.NewSlice(typs[7]) + typs[158] = newSig(params(typs[7], typs[157]), nil) + typs[159] = newSig(params(typs[71], typs[71], typs[15]), nil) + typs[160] = newSig(params(typs[65], typs[65], typs[15]), nil) + typs[161] = newSig(params(typs[67], typs[67], typs[15]), nil) + typs[162] = newSig(params(typs[24], typs[24], typs[15]), nil) + typs[163] = newSig(params(typs[30], typs[30], typs[15]), nil) + typs[164] = types.NewArray(typs[0], 16) + typs[165] = newSig(params(typs[7], typs[67], typs[164], typs[30], typs[13], typs[71], typs[71]), params(typs[67])) return typs[:] } diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go index 989ae0a1db..2794671c73 100644 --- a/src/cmd/compile/internal/walk/expr.go +++ b/src/cmd/compile/internal/walk/expr.go @@ -351,6 +351,11 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node { case ir.OMETHVALUE: return walkMethodValue(n.(*ir.SelectorExpr), init) + + case ir.OMOVE2HEAP: + n := n.(*ir.MoveToHeapExpr) + n.Slice = walkExpr(n.Slice, init) + return n } // No return! Each case must return (or panic), diff --git a/src/runtime/slice.go b/src/runtime/slice.go index e31d5dccb2..a9e8fc1610 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte { cap := roundupsize(uintptr(len), true) return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len] } + +// moveSlice copies the input slice to the heap and returns it. +// et is the element type of the slice. +func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * et.Size_ + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoScan is like moveSlice except the element type is known to +// not have any pointers. We instead pass in the size of the element. +func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * elemSize + new := mallocgc(capmem, nil, false) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity +// for the returned slice. +// Elements between len and cap in the returned slice will be zeroed. +func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * et.Size_ + capmem := roundupsize(lenmem, false) + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et) + memmove(new, old, lenmem) + return new, len, int(capmem / et.Size_) +} + +// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap. +func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * elemSize + capmem := roundupsize(lenmem, true) + new := mallocgc(capmem, nil, false) + memmove(new, old, lenmem) + if capmem > lenmem { + memclrNoHeapPointers(add(new, lenmem), capmem-lenmem) + } + return new, len, int(capmem / elemSize) +} + +// growsliceBuf is like growslice, but we can use the given buffer +// as a backing store if we want. bufPtr must be on the stack. +func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice { + if newLen > bufLen { + // Doesn't fit, process like a normal growslice. + return growslice(oldPtr, newLen, oldCap, num, et) + } + oldLen := newLen - num + if oldPtr != bufPtr && oldLen != 0 { + // Move data to start of buffer. + // Note: bufPtr is on the stack, so no write barrier needed. + memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_) + } + // Pick a new capacity. + // + // Unlike growslice, we don't need to double the size each time. + // The work done here is not proportional to the length of the slice. + // (Unless the memmove happens above, but that is rare, and in any + // case there are not many elements on this path.) + // + // Instead, we try to just bump up to the next size class. + // This will ensure that we don't waste any space when we eventually + // call moveSlice with the resulting slice. + newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_) + + // Zero slice beyond newLen. + // The buffer is stack memory, so NoHeapPointers is ok. + // Caller will overwrite [oldLen:newLen], so we don't need to zero that portion. + // If et.Pointers(), buffer is at least initialized so we don't need to + // worry about the caller overwriting junk in [oldLen:newLen]. + if newLen < newCap { + memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_) + } + + return slice{bufPtr, newLen, newCap} +} diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index cd2bc26d1e..5463b6c02f 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -6,6 +6,9 @@ package runtime_test import ( "fmt" + "internal/race" + "internal/testenv" + "runtime" "testing" ) @@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) { }) } + +//go:noinline +func byteSlice(n int) []byte { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r +} +func TestAppendByteInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48. + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r = byteSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func ptrSlice(n int, p *[]*byte) { + var r []*byte + for range n { + r = append(r, nil) + } + *p = r +} +func TestAppendPtrInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + var tests [][3]int + if runtime.PtrSize == 8 { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 1}, + {2, 1, 2}, + {3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32. + {4, 1, 4}, + {5, 1, 8}, + {6, 1, 8}, + {7, 1, 8}, + {8, 1, 8}, + {9, 2, 16}, + } + } else { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 2}, + {2, 1, 2}, + {3, 1, 4}, + {4, 1, 4}, + {5, 1, 6}, // These two are also 24 bytes instead of 32. + {6, 1, 6}, // + {7, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {10, 1, 16}, + {11, 1, 16}, + {12, 1, 16}, + {13, 1, 16}, + {14, 1, 16}, + {15, 1, 16}, + {16, 1, 16}, + {17, 2, 32}, + } + } + for _, test := range tests { + n := test[0] + want := test[1] + wantCap := test[2] + var r []*byte + got := testing.AllocsPerRun(10, func() { + ptrSlice(n, &r) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func byteCapSlice(n int) ([]byte, int) { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r, cap(r) +} +func TestAppendByteCapInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r, _ = byteCapSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +func TestAppendGeneric(t *testing.T) { + type I *int + r := testAppendGeneric[I](100) + if len(r) != 100 { + t.Errorf("bad length") + } +} + +//go:noinline +func testAppendGeneric[E any](n int) []E { + var r []E + var z E + for range n { + r = append(r, z) + } + return r +} + +func appendSomeBytes(r []byte, s []byte) []byte { + for _, b := range s { + r = append(r, b) + } + return r +} + +func TestAppendOfArg(t *testing.T) { + r := make([]byte, 24) + for i := 0; i < 24; i++ { + r[i] = byte(i) + } + appendSomeBytes(r, []byte{25, 26, 27}) + // Do the same thing, trying to overwrite any + // stack-allocated buffers used above. + s := make([]byte, 24) + for i := 0; i < 24; i++ { + s[i] = 99 + } + appendSomeBytes(s, []byte{99, 99, 99}) + // Check that we still have the right data. + for i, b := range r { + if b != byte(i) { + t.Errorf("r[%d]=%d, want %d", i, b, byte(i)) + } + } + +} + +func BenchmarkAppendInLoop(b *testing.B) { + for _, size := range []int{0, 1, 8, 16, 32, 64, 128} { + b.Run(fmt.Sprintf("%d", size), + func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + byteSlice(size) + } + }) + } +} + +func TestMoveToHeapEarly(t *testing.T) { + // Just checking that this compiles. + var x []int + y := x // causes a move2heap in the entry block + for range 5 { + x = append(x, 5) + } + _ = y +} + +func TestMoveToHeapCap(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = cap(s) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func runit(f func()) { + f() +} + +func TestMoveToHeapClosure1(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + runit(func() { + c = cap(s) + }) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} +func TestMoveToHeapClosure2(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = func() int { + return cap(s) + }() + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func buildClosure(t *testing.T) ([]byte, func()) { + var s []byte + for i := range 20 { + s = append(s, byte(i)) + } + c := func() { + for i, b := range s { + if b != byte(i) { + t.Errorf("s[%d]=%d, want %d", i, b, i) + } + } + } + return s, c +} + +func TestMoveToHeapClosure3(t *testing.T) { + _, f := buildClosure(t) + overwriteStack(0) + f() +} + +//go:noinline +func overwriteStack(n int) uint64 { + var x [100]uint64 + for i := range x { + x[i] = 0xabcdabcdabcdabcd + } + return x[n] +} + +var sinkSlice []byte diff --git a/test/codegen/append.go b/test/codegen/append.go new file mode 100644 index 0000000000..0e58a48c45 --- /dev/null +++ b/test/codegen/append.go @@ -0,0 +1,190 @@ +// asmcheck + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func Append1(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append2(n int) (r []int) { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return +} + +func Append3(n int) (r []int) { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append4(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + println(cap(r)) + // amd64:`.*moveSliceNoScan` + return r +} + +func Append5(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + useSlice(r) + // amd64:`.*moveSliceNoScan` + return r +} + +func Append6(n int) []*int { + var r []*int + for i := range n { + // amd64:`.*growslice` + r = append(r, new(i)) + } + // amd64:`.*moveSliceNoCap` + return r +} + +func Append7(n int) []*int { + var r []*int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, new(i)) + } + println(cap(r)) + // amd64:`.*moveSlice` + return r +} + +func Append8(n int, p *[]int) { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + *p = r +} + +func Append9(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + println(len(r)) + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append10(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + println(r[3]) + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append11(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + r = r[3:5] + // amd64:`.*moveSliceNoScan` + return r +} + +func Append12(n int) []int { + var r []int + r = nil + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append13(n int) []int { + var r []int + r, r = nil, nil + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append14(n int) []int { + var r []int + r = []int{3, 4, 5} + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + // amd64:`.*moveSliceNoScan` + return r +} + +func Append15(n int) []int { + r := []int{3, 4, 5} + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + // amd64:`.*moveSliceNoScan` + return r +} + +func Append16(r []int, n int) []int { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append17(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + for i, x := range r { + println(i, x) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +//go:noinline +func useSlice(s []int) { +} -- cgit v1.3-5-g9baa From 790384c6c23f7ce44199ea3cd61c856d632b08aa Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Tue, 18 Nov 2025 15:47:44 -0800 Subject: spec: adjust rule for type parameter on RHS of alias declaration Per discussion on issue #75885, a type parameter on the RHS of an alias declaration must not be declared in the same declaration (but it may be declared by an enclosing function). This relaxes the spec slightly and allows for (pre-existing) test cases. Add a corresponding check to the type checker (there was no check for type parameters on the RHS of alias declarations at all, before). Fixes #75884. Fixes #75885. Change-Id: I1e5675978e6423d626c068829d4bf5e90035ea82 Reviewed-on: https://go-review.googlesource.com/c/go/+/721820 Auto-Submit: Robert Griesemer Reviewed-by: Robert Griesemer Reviewed-by: Mark Freeman Reviewed-by: Robert Findley LUCI-TryBot-Result: Go LUCI --- doc/go_spec.html | 14 +++++++++----- src/cmd/compile/internal/types2/decl.go | 9 +++++++-- src/go/types/decl.go | 9 +++++++-- src/internal/types/testdata/fixedbugs/issue75885.go | 15 +++++++++++++++ 4 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 src/internal/types/testdata/fixedbugs/issue75885.go (limited to 'src/cmd/compile') diff --git a/doc/go_spec.html b/doc/go_spec.html index 77400ad210..b75845372d 100644 --- a/doc/go_spec.html +++ b/doc/go_spec.html @@ -1,6 +1,6 @@ @@ -2487,11 +2487,15 @@ type set[P comparable] = map[P]bool

-In an alias declaration the given type cannot be a type parameter. +In an alias declaration the given type cannot be a type parameter declared in the same declaration.

-type A[P any] = P    // illegal: P is a type parameter
+type A[P any] = P   // illegal: P is a type parameter declared in the declaration of A
+
+func f[P any]() {
+	type A = P  // ok: T is a type parameter declared by the enclosing function
+}
 

Type definitions

@@ -2601,8 +2605,8 @@ In a type definition the given type cannot be a type parameter.
 type T[P any] P    // illegal: P is a type parameter
 
-func f[T any]() {
-	type L T   // illegal: T is a type parameter declared by the enclosing function
+func f[P any]() {
+	type L P   // illegal: P is a type parameter declared by the enclosing function
 }
 
diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go index 60371651ab..5cb52fdbe4 100644 --- a/src/cmd/compile/internal/types2/decl.go +++ b/src/cmd/compile/internal/types2/decl.go @@ -497,9 +497,14 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN rhs = check.declaredType(tdecl.Type, obj) assert(rhs != nil) - alias.fromRHS = rhs - unalias(alias) // populate alias.actual + + // spec: In an alias declaration the given type cannot be a type parameter declared in the same declaration." + // (see also go.dev/issue/75884, go.dev/issue/#75885) + if tpar, ok := rhs.(*TypeParam); ok && alias.tparams != nil && slices.Index(alias.tparams.list(), tpar) >= 0 { + check.error(tdecl.Type, MisplacedTypeParam, "cannot use type parameter declared in alias declaration as RHS") + alias.fromRHS = Typ[Invalid] + } } else { if !versionErr && tparam0 != nil { check.error(tdecl, UnsupportedFeature, "generic type alias requires GODEBUG=gotypesalias=1 or unset") diff --git a/src/go/types/decl.go b/src/go/types/decl.go index 4b374fb66d..fffcb590e6 100644 --- a/src/go/types/decl.go +++ b/src/go/types/decl.go @@ -572,9 +572,14 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *ast.TypeSpec, def *TypeName rhs = check.declaredType(tdecl.Type, obj) assert(rhs != nil) - alias.fromRHS = rhs - unalias(alias) // populate alias.actual + + // spec: In an alias declaration the given type cannot be a type parameter declared in the same declaration." + // (see also go.dev/issue/75884, go.dev/issue/#75885) + if tpar, ok := rhs.(*TypeParam); ok && alias.tparams != nil && slices.Index(alias.tparams.list(), tpar) >= 0 { + check.error(tdecl.Type, MisplacedTypeParam, "cannot use type parameter declared in alias declaration as RHS") + alias.fromRHS = Typ[Invalid] + } } else { // With Go1.23, the default behavior is to use Alias nodes, // reflected by check.enableAlias. Signal non-default behavior. diff --git a/src/internal/types/testdata/fixedbugs/issue75885.go b/src/internal/types/testdata/fixedbugs/issue75885.go new file mode 100644 index 0000000000..f0cf4a65ed --- /dev/null +++ b/src/internal/types/testdata/fixedbugs/issue75885.go @@ -0,0 +1,15 @@ +// -gotypesalias=1 + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type A[P any] = P // ERROR "cannot use type parameter declared in alias declaration as RHS" + +func _[P any]() { + type A[P any] = P // ERROR "cannot use type parameter declared in alias declaration as RHS" + type B = P + type C[Q any] = P +} -- cgit v1.3-5-g9baa