[dev.simd] all: merge master (ca37d24) into dev.simd

Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
author: Cherry Mui <cherryyz@google.com> 2025-11-20 14:40:43 -0500
committer: Cherry Mui <cherryyz@google.com> 2025-11-20 14:40:43 -0500
commit: e3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree: 5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/cmd/compile
parent: 95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parent: ca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
download: go-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz
48 files changed, 2102 insertions, 650 deletions
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index 9e8ab2f488..b532bf435e 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -20,6 +20,7 @@ type DebugFlags struct {
 	Append                int    `help:"print information about append compilation"`
 	Checkptr              int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
 	Closure               int    `help:"print information about closure compilation"`
+	CompressInstructions  int    `help:"use compressed instructions when possible (if supported by architecture)"`
 	Converthash           string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"`
 	Defer                 int    `help:"print information about defer compilation"`
 	DisableNil            int    `help:"disable nil checks" concurrent:"ok"`
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 1d211e0a2d..63cae41524 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -177,6 +177,7 @@ func ParseFlags() {
 	Flag.WB = true
 
 	Debug.ConcurrentOk = true
+	Debug.CompressInstructions = 1
 	Debug.MaxShapeLen = 500
 	Debug.AlignHot = 1
 	Debug.InlFuncsWithClosures = 1
@@ -299,6 +300,7 @@ func ParseFlags() {
 	}
 	parseSpectre(Flag.Spectre) // left as string for RecordFlags
 
+	Ctxt.CompressInstructions = Debug.CompressInstructions != 0
 	Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared
 	Ctxt.Flag_optimize = Flag.N == 0
 	Ctxt.Debugasm = int(Flag.S)
diff --git a/src/cmd/compile/internal/deadlocals/deadlocals.go b/src/cmd/compile/internal/deadlocals/deadlocals.go
index 238450416a..55ad0387a4 100644
--- a/src/cmd/compile/internal/deadlocals/deadlocals.go
+++ b/src/cmd/compile/internal/deadlocals/deadlocals.go
@@ -44,6 +44,11 @@ func Funcs(fns []*ir.Func) {
 				*as.lhs = ir.BlankNode
 				*as.rhs = zero
 			}
+			if len(assigns) > 0 {
+				// k.Defn might be pointing at one of the
+				// assignments we're overwriting.
+				k.Defn = nil
+			}
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/escape/leaks.go b/src/cmd/compile/internal/escape/leaks.go
index 942f87d2a2..176bccd847 100644
--- a/src/cmd/compile/internal/escape/leaks.go
+++ b/src/cmd/compile/internal/escape/leaks.go
@@ -124,3 +124,21 @@ func parseLeaks(s string) leaks {
 	copy(l[:], s[4:])
 	return l
 }
+
+func ParseLeaks(s string) leaks {
+	return parseLeaks(s)
+}
+
+// Any reports whether the value flows anywhere at all.
+func (l leaks) Any() bool {
+	// TODO: do mutator/callee matter?
+	if l.Heap() >= 0 || l.Mutator() >= 0 || l.Callee() >= 0 {
+		return true
+	}
+	for i := range numEscResults {
+		if l.Result(i) >= 0 {
+			return true
+		}
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 918d3f3514..6418ab9357 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -22,6 +22,7 @@ import (
 	"cmd/compile/internal/pkginit"
 	"cmd/compile/internal/reflectdata"
 	"cmd/compile/internal/rttype"
+	"cmd/compile/internal/slice"
 	"cmd/compile/internal/ssa"
 	"cmd/compile/internal/ssagen"
 	"cmd/compile/internal/staticinit"
@@ -271,6 +272,8 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	base.Timer.Start("fe", "escapes")
 	escape.Funcs(typecheck.Target.Funcs)
 
+	slice.Funcs(typecheck.Target.Funcs)
+
 	loopvar.LogTransformations(transformed)
 
 	// Collect information for go:nowritebarrierrec
diff --git a/src/cmd/compile/internal/ir/expr.go b/src/cmd/compile/internal/ir/expr.go
index 25654ca253..1a3514db6c 100644
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@@ -192,6 +192,7 @@ type CallExpr struct {
 	IsDDD     bool
 	GoDefer   bool // whether this call is part of a go or defer statement
 	NoInline  bool // whether this call must not be inlined
+	UseBuf    bool // use stack buffer for backing store (OAPPEND only)
 }
 
 func NewCallExpr(pos src.XPos, op Op, fun Node, args []Node) *CallExpr {
@@ -1280,3 +1281,28 @@ func MethodExprFunc(n Node) *types.Field {
 	base.Fatalf("unexpected node: %v (%v)", n, n.Op())
 	panic("unreachable")
 }
+
+// A MoveToHeapExpr takes a slice as input and moves it to the
+// heap (by copying the backing store if it is not already
+// on the heap).
+type MoveToHeapExpr struct {
+	miniExpr
+	Slice Node
+	// An expression that evaluates to a *runtime._type
+	// that represents the slice element type.
+	RType Node
+	// If PreserveCapacity is true, the capacity of
+	// the resulting slice, and all of the elements in
+	// [len:cap], must be preserved.
+	// If PreserveCapacity is false, the resulting
+	// slice may have any capacity >= len, with any
+	// elements in the resulting [len:cap] range zeroed.
+	PreserveCapacity bool
+}
+
+func NewMoveToHeapExpr(pos src.XPos, slice Node) *MoveToHeapExpr {
+	n := &MoveToHeapExpr{Slice: slice}
+	n.pos = pos
+	n.op = OMOVE2HEAP
+	return n
+}
diff --git a/src/cmd/compile/internal/ir/fmt.go b/src/cmd/compile/internal/ir/fmt.go
index ae4ff62652..eb64cce47b 100644
--- a/src/cmd/compile/internal/ir/fmt.go
+++ b/src/cmd/compile/internal/ir/fmt.go
@@ -574,7 +574,7 @@ func exprFmt(n Node, s fmt.State, prec int) {
 		// Special case for rune constants.
 		if typ == types.RuneType || typ == types.UntypedRune {
 			if x, ok := constant.Uint64Val(val); ok && x <= utf8.MaxRune {
-				fmt.Fprintf(s, "%q", x)
+				fmt.Fprintf(s, "%q", rune(x))
 				return
 			}
 		}
diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go
index 01f1c0c502..63f1b1c931 100644
--- a/src/cmd/compile/internal/ir/name.go
+++ b/src/cmd/compile/internal/ir/name.go
@@ -43,7 +43,7 @@ type Name struct {
 	Func      *Func // TODO(austin): nil for I.M
 	Offset_   int64
 	val       constant.Value
-	Opt       any      // for use by escape analysis
+	Opt       any      // for use by escape or slice analysis
 	Embed     *[]Embed // list of embedded files, for ONAME var
 
 	// For a local variable (not param) or extern, the initializing assignment (OAS or OAS2).
diff --git a/src/cmd/compile/internal/ir/node.go b/src/cmd/compile/internal/ir/node.go
index 8c61bb6ed5..f26f61cb18 100644
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@@ -293,6 +293,7 @@ const (
 	OLINKSYMOFFSET   // offset within a name
 	OJUMPTABLE       // A jump table structure for implementing dense expression switches
 	OINTERFACESWITCH // A type switch with interface cases
+	OMOVE2HEAP       // Promote a stack-backed slice to heap
 
 	// opcodes for generics
 	ODYNAMICDOTTYPE  // x = i.(T) where T is a type parameter (or derived from a type parameter)
diff --git a/src/cmd/compile/internal/ir/node_gen.go b/src/cmd/compile/internal/ir/node_gen.go
index 2221045c93..4298b3a43d 100644
--- a/src/cmd/compile/internal/ir/node_gen.go
+++ b/src/cmd/compile/internal/ir/node_gen.go
@@ -1175,6 +1175,34 @@ func (n *MakeExpr) editChildrenWithHidden(edit func(Node) Node) {
 	}
 }
 
+func (n *MoveToHeapExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *MoveToHeapExpr) copy() Node {
+	c := *n
+	c.init = copyNodes(c.init)
+	return &c
+}
+func (n *MoveToHeapExpr) doChildren(do func(Node) bool) bool {
+	if doNodes(n.init, do) {
+		return true
+	}
+	if n.Slice != nil && do(n.Slice) {
+		return true
+	}
+	return false
+}
+func (n *MoveToHeapExpr) doChildrenWithHidden(do func(Node) bool) bool {
+	return n.doChildren(do)
+}
+func (n *MoveToHeapExpr) editChildren(edit func(Node) Node) {
+	editNodes(n.init, edit)
+	if n.Slice != nil {
+		n.Slice = edit(n.Slice).(Node)
+	}
+}
+func (n *MoveToHeapExpr) editChildrenWithHidden(edit func(Node) Node) {
+	n.editChildren(edit)
+}
+
 func (n *Name) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
 
 func (n *NilExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
diff --git a/src/cmd/compile/internal/ir/op_string.go b/src/cmd/compile/internal/ir/op_string.go
index 7494beee4c..f042ad84a4 100644
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@@ -151,18 +151,19 @@ func _() {
 	_ = x[OLINKSYMOFFSET-140]
 	_ = x[OJUMPTABLE-141]
 	_ = x[OINTERFACESWITCH-142]
-	_ = x[ODYNAMICDOTTYPE-143]
-	_ = x[ODYNAMICDOTTYPE2-144]
-	_ = x[ODYNAMICTYPE-145]
-	_ = x[OTAILCALL-146]
-	_ = x[OGETG-147]
-	_ = x[OGETCALLERSP-148]
-	_ = x[OEND-149]
+	_ = x[OMOVE2HEAP-143]
+	_ = x[ODYNAMICDOTTYPE-144]
+	_ = x[ODYNAMICDOTTYPE2-145]
+	_ = x[ODYNAMICTYPE-146]
+	_ = x[OTAILCALL-147]
+	_ = x[OGETG-148]
+	_ = x[OGETCALLERSP-149]
+	_ = x[OEND-150]
 }
 
-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHMOVE2HEAPDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
 
-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 878, 892, 907, 918, 926, 930, 941, 944}
 
 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
diff --git a/src/cmd/compile/internal/ir/stmt.go b/src/cmd/compile/internal/ir/stmt.go
index 0801ecdd9e..affa5f4551 100644
--- a/src/cmd/compile/internal/ir/stmt.go
+++ b/src/cmd/compile/internal/ir/stmt.go
@@ -42,6 +42,7 @@ func (*Decl) isStmt() {}
 type Stmt interface {
 	Node
 	isStmt()
+	PtrInit() *Nodes
 }
 
 // A miniStmt is a miniNode with extra fields common to statements.
diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go
index 344985f7be..4b5bf17a3d 100644
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@@ -29,6 +29,11 @@ type symsStruct struct {
 	GCWriteBarrier            [8]*obj.LSym
 	Goschedguarded            *obj.LSym
 	Growslice                 *obj.LSym
+	GrowsliceBuf              *obj.LSym
+	MoveSlice                 *obj.LSym
+	MoveSliceNoScan           *obj.LSym
+	MoveSliceNoCap            *obj.LSym
+	MoveSliceNoCapNoScan      *obj.LSym
 	InterfaceSwitch           *obj.LSym
 	MallocGC                  *obj.LSym
 	MallocGCSmallNoScan       [27]*obj.LSym
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go
index 84bbf9b394..71953109c4 100644
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@@ -575,6 +575,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 	case ssa.OpLOONG64LoweredZeroLoop:
 		ptrReg := v.Args[0].Reg()
 		countReg := v.RegTmp()
+		flagReg := int16(loong64.REGTMP)
 		var off int64
 		n := v.AuxInt
 		loopSize := int64(64)
@@ -587,58 +588,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 			//   vs
 			//     16 instuctions in the straightline code
 			//   Might as well use straightline code.
-			v.Fatalf("ZeroLoop size tool small %d", n)
+			v.Fatalf("ZeroLoop size too small %d", n)
 		}
 
-		// Put iteration count in a register.
-		//   MOVV     $n/loopSize, countReg
-		p := s.Prog(loong64.AMOVV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = n / loopSize
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = countReg
-		cntInit := p
+		//    MOVV    $n/loopSize, countReg
+		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
+		//    BNE     flagReg, lsxInit
+		// genericInit:
+		//    for off = 0; off < loopSize; off += 8 {
+		//            zero8(s, ptrReg, off)
+		//    }
+		//    ADDV    $loopSize, ptrReg
+		//    SUBV    $1, countReg
+		//    BNE     countReg, genericInit
+		//    JMP     tail
+		// lsxInit:
+		//    VXORV   V31, V31, V31, v31 = 0
+		//    for off = 0; off < loopSize; off += 16 {
+		//            zero16(s, V31, ptrReg, off)
+		//    }
+		//    ADDV    $loopSize, ptrReg
+		//    SUBV    $1, countReg
+		//    BNE     countReg, lsxInit
+		// tail:
+		//    n %= loopSize
+		//    for off = 0; n >= 8; off += 8, n -= 8 {
+		//            zero8(s, ptrReg, off)
+		//    }
+		//
+		//    if n != 0 {
+		//           zero8(s, ptrReg, off+n-8)
+		//    }
 
-		// Zero loopSize bytes starting at ptrReg.
-		for range loopSize / 8 {
-			// MOVV     ZR, off(ptrReg)
+		p1 := s.Prog(loong64.AMOVV)
+		p1.From.Type = obj.TYPE_CONST
+		p1.From.Offset = n / loopSize
+		p1.To.Type = obj.TYPE_REG
+		p1.To.Reg = countReg
+
+		p2 := s.Prog(loong64.AMOVBU)
+		p2.From.Type = obj.TYPE_MEM
+		p2.From.Name = obj.NAME_EXTERN
+		p2.From.Sym = ir.Syms.Loong64HasLSX
+		p2.To.Type = obj.TYPE_REG
+		p2.To.Reg = flagReg
+
+		p3 := s.Prog(loong64.ABNE)
+		p3.From.Type = obj.TYPE_REG
+		p3.From.Reg = flagReg
+		p3.To.Type = obj.TYPE_BRANCH
+
+		for off = 0; off < loopSize; off += 8 {
 			zero8(s, ptrReg, off)
-			off += 8
 		}
 
-		// Increment ptrReg by loopSize.
-		//   ADDV     $loopSize, ptrReg
-		p = s.Prog(loong64.AADDV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = loopSize
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = ptrReg
+		p4 := s.Prog(loong64.AADDV)
+		p4.From.Type = obj.TYPE_CONST
+		p4.From.Offset = loopSize
+		p4.To.Type = obj.TYPE_REG
+		p4.To.Reg = ptrReg
 
-		// Decrement loop count.
-		//   SUBV     $1, countReg
-		p = s.Prog(loong64.ASUBV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 1
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = countReg
+		p5 := s.Prog(loong64.ASUBV)
+		p5.From.Type = obj.TYPE_CONST
+		p5.From.Offset = 1
+		p5.To.Type = obj.TYPE_REG
+		p5.To.Reg = countReg
 
-		// Jump to loop header if we're not done yet.
-		//   BNE     countReg, loop header
-		p = s.Prog(loong64.ABNE)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = countReg
-		p.To.Type = obj.TYPE_BRANCH
-		p.To.SetTarget(cntInit.Link)
+		p6 := s.Prog(loong64.ABNE)
+		p6.From.Type = obj.TYPE_REG
+		p6.From.Reg = countReg
+		p6.To.Type = obj.TYPE_BRANCH
+		p6.To.SetTarget(p3.Link)
+
+		p7 := s.Prog(obj.AJMP)
+		p7.To.Type = obj.TYPE_BRANCH
+
+		p8 := s.Prog(loong64.AVXORV)
+		p8.From.Type = obj.TYPE_REG
+		p8.From.Reg = loong64.REG_V31
+		p8.To.Type = obj.TYPE_REG
+		p8.To.Reg = loong64.REG_V31
+		p3.To.SetTarget(p8)
+
+		for off = 0; off < loopSize; off += 16 {
+			zero16(s, loong64.REG_V31, ptrReg, off)
+		}
+
+		p9 := s.Prog(loong64.AADDV)
+		p9.From.Type = obj.TYPE_CONST
+		p9.From.Offset = loopSize
+		p9.To.Type = obj.TYPE_REG
+		p9.To.Reg = ptrReg
+
+		p10 := s.Prog(loong64.ASUBV)
+		p10.From.Type = obj.TYPE_CONST
+		p10.From.Offset = 1
+		p10.To.Type = obj.TYPE_REG
+		p10.To.Reg = countReg
+
+		p11 := s.Prog(loong64.ABNE)
+		p11.From.Type = obj.TYPE_REG
+		p11.From.Reg = countReg
+		p11.To.Type = obj.TYPE_BRANCH
+		p11.To.SetTarget(p8.Link)
+
+		p12 := s.Prog(obj.ANOP)
+		p7.To.SetTarget(p12)
 
 		// Multiples of the loop size are now done.
 		n %= loopSize
-
-		off = 0
 		// Write any fractional portion.
-		for n >= 8 {
-			// MOVV     ZR, off(ptrReg)
+		for off = 0; n >= 8; off += 8 {
+			// MOVV   ZR, off(ptrReg)
 			zero8(s, ptrReg, off)
-			off += 8
 			n -= 8
 		}
 
@@ -1333,7 +1395,7 @@ func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
 
 // zero8 zeroes 8 bytes at reg+off.
 func zero8(s *ssagen.State, reg int16, off int64) {
-	// MOVV     ZR, off(reg)
+	// MOVV   ZR, off(reg)
 	p := s.Prog(loong64.AMOVV)
 	p.From.Type = obj.TYPE_REG
 	p.From.Reg = loong64.REGZERO
@@ -1341,3 +1403,14 @@ func zero8(s *ssagen.State, reg int16, off int64) {
 	p.To.Reg = reg
 	p.To.Offset = off
 }
+
+// zero16 zeroes 16 bytes at reg+off.
+func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
+	// VMOVQ   regZero, off(regBase)
+	p := s.Prog(loong64.AVMOVQ)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = regZero
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = regBase
+	p.To.Offset = off
+}
diff --git a/src/cmd/compile/internal/slice/slice.go b/src/cmd/compile/internal/slice/slice.go
new file mode 100644
index 0000000000..7a32e7adbd
--- /dev/null
+++ b/src/cmd/compile/internal/slice/slice.go
@@ -0,0 +1,455 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slice
+
+// This file implements a stack-allocation optimization
+// for the backing store of slices.
+//
+// Consider the code:
+//
+//     var s []int
+//     for i := range ... {
+//        s = append(s, i)
+//     }
+//     return s
+//
+// Some of the append operations will need to do an allocation
+// by calling growslice. This will happen on the 1st, 2nd, 4th,
+// 8th, etc. append calls. The allocations done by all but the
+// last growslice call will then immediately be garbage.
+//
+// We'd like to avoid doing some of those intermediate
+// allocations if possible.
+//
+// If we can determine that the "return s" statement is the
+// *only* way that the backing store for s escapes, then we
+// can rewrite the code to something like:
+//
+//     var s []int
+//     for i := range N {
+//        s = append(s, i)
+//     }
+//     s = move2heap(s)
+//     return s
+//
+// Using the move2heap runtime function, which does:
+//
+//     move2heap(s):
+//         If s is not backed by a stackframe-allocated
+//         backing store, return s. Otherwise, copy s
+//         to the heap and return the copy.
+//
+// Now we can treat the backing store of s allocated at the
+// append site as not escaping. Previous stack allocation
+// optimizations now apply, which can use a fixed-size
+// stack-allocated backing store for s when appending.
+// (See ../ssagen/ssa.go:(*state).append)
+//
+// It is tricky to do this optimization safely. To describe
+// our analysis, we first define what an "exclusive" slice
+// variable is.
+//
+// A slice variable (a variable of slice type) is called
+// "exclusive" if, when it has a reference to a
+// stackframe-allocated backing store, it is the only
+// variable with such a reference.
+//
+// In other words, a slice variable is exclusive if
+// any of the following holds:
+//  1) It points to a heap-allocated backing store
+//  2) It points to a stack-allocated backing store
+//     for any parent frame.
+//  3) It is the only variable that references its
+//     backing store.
+//  4) It is nil.
+//
+// The nice thing about exclusive slice variables is that
+// it is always safe to do
+//    s = move2heap(s)
+// whenever s is an exclusive slice variable. Because no
+// one else has a reference to the backing store, no one
+// else can tell that we moved the backing store from one
+// location to another.
+//
+// Note that exclusiveness is a dynamic property. A slice
+// variable may be exclusive during some parts of execution
+// and not exclusive during others.
+//
+// The following operations set or preserve the exclusivity
+// of a slice variable s:
+//     s = nil
+//     s = append(s, ...)
+//     s = s[i:j]
+//     ... = s[i]
+//     s[i] = ...
+//     f(s) where f does not escape its argument
+// Other operations destroy exclusivity. A non-exhaustive list includes:
+//     x = s
+//     *p = s
+//     f(s) where f escapes its argument
+//     return s
+// To err on the safe side, we white list exclusivity-preserving
+// operations and we asssume that any other operations that mention s
+// destroy its exclusivity.
+//
+// Our strategy is to move the backing store of s to the heap before
+// any exclusive->nonexclusive transition. That way, s will only ever
+// have a reference to a stack backing store while it is exclusive.
+//
+// move2heap for a variable s is implemented with:
+//     if s points to within the stack frame {
+//         s2 := make([]T, s.len, s.cap)
+//         copy(s2[:s.cap], s[:s.cap])
+//         s = s2
+//     }
+// Note that in general we need to copy all of s[:cap(s)] elements when
+// moving to the heap. As an optimization, we keep track of slice variables
+// whose capacity, and the elements in s[len(s):cap(s)], are never accessed.
+// For those slice variables, we can allocate to the next size class above
+// the length, which saves memory and copying cost.
+
+import (
+	"cmd/compile/internal/base"
+	"cmd/compile/internal/escape"
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/reflectdata"
+)
+
+func Funcs(all []*ir.Func) {
+	if base.Flag.N != 0 {
+		return
+	}
+	for _, fn := range all {
+		analyze(fn)
+	}
+}
+
+func analyze(fn *ir.Func) {
+	type sliceInfo struct {
+		// Slice variable.
+		s *ir.Name
+
+		// Count of uses that this pass understands.
+		okUses int32
+		// Count of all uses found.
+		allUses int32
+
+		// A place where the slice variable transitions from
+		// exclusive to nonexclusive.
+		// We could keep track of more than one, but one is enough for now.
+		// Currently, this can be either a return statement or
+		// an assignment.
+		// TODO: other possible transitions?
+		transition ir.Stmt
+
+		// Each s = append(s, ...) instance we found.
+		appends []*ir.CallExpr
+
+		// Weight of the number of s = append(s, ...) instances we found.
+		// The optimizations we do are only really useful if there are at
+		// least weight 2. (Note: appends in loops have weight >= 2.)
+		appendWeight int
+
+		// Whether we ever do cap(s), or other operations that use cap(s)
+		// (possibly implicitly), like s[i:j].
+		capUsed bool
+	}
+
+	// Every variable (*ir.Name) that we are tracking will have
+	// a non-nil *sliceInfo in its Opt field.
+	haveLocalSlice := false
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	var namedRets []*ir.Name
+	for _, s := range fn.Dcl {
+		if !s.Type().IsSlice() {
+			continue
+		}
+		if s.Type().Elem().Size() > maxStackSize {
+			continue
+		}
+		if !base.VariableMakeHash.MatchPos(s.Pos(), nil) {
+			continue
+		}
+		s.Opt = &sliceInfo{s: s} // start tracking s
+		haveLocalSlice = true
+		if s.Class == ir.PPARAMOUT {
+			namedRets = append(namedRets, s)
+		}
+	}
+	if !haveLocalSlice {
+		return
+	}
+
+	// Keep track of loop depth while walking.
+	loopDepth := 0
+
+	// tracking returns the info for the slice variable if n is a slice
+	// variable that we're still considering, or nil otherwise.
+	tracking := func(n ir.Node) *sliceInfo {
+		if n == nil || n.Op() != ir.ONAME {
+			return nil
+		}
+		s := n.(*ir.Name)
+		if s.Opt == nil {
+			return nil
+		}
+		return s.Opt.(*sliceInfo)
+	}
+
+	// addTransition(n, loc) records that s experiences an exclusive->nonexclusive
+	// transition somewhere within loc.
+	addTransition := func(i *sliceInfo, loc ir.Stmt) {
+		if i.transition != nil {
+			// We only keep track of a single exclusive->nonexclusive transition
+			// for a slice variable. If we find more than one, give up.
+			// (More than one transition location would be fine, but we would
+			// start to get worried about introducing too much additional code.)
+			i.s.Opt = nil
+			return
+		}
+		i.transition = loc
+	}
+
+	// Examine an x = y assignment that occurs somewhere within statement stmt.
+	assign := func(x, y ir.Node, stmt ir.Stmt) {
+		if i := tracking(x); i != nil {
+			// s = y. Check for understood patterns for y.
+			if y == nil || y.Op() == ir.ONIL {
+				// s = nil is ok.
+				i.okUses++
+			} else if y.Op() == ir.OSLICELIT {
+				// s = []{...} is ok.
+				// Note: this reveals capacity. Should it?
+				i.okUses++
+				i.capUsed = true
+			} else if y.Op() == ir.OSLICE {
+				y := y.(*ir.SliceExpr)
+				if y.X == i.s {
+					// s = s[...:...] is ok
+					i.okUses += 2
+					i.capUsed = true
+				}
+			} else if y.Op() == ir.OAPPEND {
+				y := y.(*ir.CallExpr)
+				if y.Args[0] == i.s {
+					// s = append(s, ...) is ok
+					i.okUses += 2
+					i.appends = append(i.appends, y)
+					i.appendWeight += 1 + loopDepth
+				}
+				// TODO: s = append(nil, ...)?
+			}
+			// Note that technically s = make([]T, ...) preserves exclusivity, but
+			// we don't track that because we assume users who wrote that know
+			// better than the compiler does.
+
+			// TODO: figure out how to handle s = fn(..., s, ...)
+			// It would be nice to maintain exclusivity of s in this situation.
+			// But unfortunately, fn can return one of its other arguments, which
+			// may be a slice with a stack-allocated backing store other than s.
+			// (which may have preexisting references to its backing store).
+			//
+			// Maybe we could do it if s is the only argument?
+		}
+
+		if i := tracking(y); i != nil {
+			// ... = s
+			// Treat this as an exclusive->nonexclusive transition.
+			i.okUses++
+			addTransition(i, stmt)
+		}
+	}
+
+	var do func(ir.Node) bool
+	do = func(n ir.Node) bool {
+		if n == nil {
+			return false
+		}
+		switch n.Op() {
+		case ir.ONAME:
+			if i := tracking(n); i != nil {
+				// A use of a slice variable. Count it.
+				i.allUses++
+			}
+		case ir.ODCL:
+			n := n.(*ir.Decl)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OINDEX:
+			n := n.(*ir.IndexExpr)
+			if i := tracking(n.X); i != nil {
+				// s[i] is ok.
+				i.okUses++
+			}
+		case ir.OLEN:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// len(s) is ok
+				i.okUses++
+			}
+		case ir.OCAP:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// cap(s) is ok
+				i.okUses++
+				i.capUsed = true
+			}
+		case ir.OADDR:
+			n := n.(*ir.AddrExpr)
+			if n.X.Op() == ir.OINDEX {
+				n := n.X.(*ir.IndexExpr)
+				if i := tracking(n.X); i != nil {
+					// &s[i] is definitely a nonexclusive transition.
+					// (We need this case because s[i] is ok, but &s[i] is not.)
+					i.s.Opt = nil
+				}
+			}
+		case ir.ORETURN:
+			n := n.(*ir.ReturnStmt)
+			for _, x := range n.Results {
+				if i := tracking(x); i != nil {
+					i.okUses++
+					// We go exclusive->nonexclusive here
+					addTransition(i, n)
+				}
+			}
+			if len(n.Results) == 0 {
+				// Uses of named result variables are implicit here.
+				for _, x := range namedRets {
+					if i := tracking(x); i != nil {
+						addTransition(i, n)
+					}
+				}
+			}
+		case ir.OCALLFUNC:
+			n := n.(*ir.CallExpr)
+			for idx, arg := range n.Args {
+				if i := tracking(arg); i != nil {
+					if !argLeak(n, idx) {
+						// Passing s to a nonescaping arg is ok.
+						i.okUses++
+						i.capUsed = true
+					}
+				}
+			}
+		case ir.ORANGE:
+			// Range over slice is ok.
+			n := n.(*ir.RangeStmt)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OAS:
+			n := n.(*ir.AssignStmt)
+			assign(n.X, n.Y, n)
+		case ir.OAS2:
+			n := n.(*ir.AssignListStmt)
+			for i := range len(n.Lhs) {
+				assign(n.Lhs[i], n.Rhs[i], n)
+			}
+		case ir.OCLOSURE:
+			n := n.(*ir.ClosureExpr)
+			for _, v := range n.Func.ClosureVars {
+				do(v.Outer)
+			}
+		}
+		if n.Op() == ir.OFOR || n.Op() == ir.ORANGE {
+			// Note: loopDepth isn't really right for init portion
+			// of the for statement, but that's ok. Correctness
+			// does not depend on depth info.
+			loopDepth++
+			defer func() { loopDepth-- }()
+		}
+		// Check all the children.
+		ir.DoChildren(n, do)
+		return false
+	}
+
+	// Run the analysis over the whole body.
+	for _, stmt := range fn.Body {
+		do(stmt)
+	}
+
+	// Process accumulated info to find slice variables
+	// that we can allocate on the stack.
+	for _, s := range fn.Dcl {
+		if s.Opt == nil {
+			continue
+		}
+		i := s.Opt.(*sliceInfo)
+		s.Opt = nil
+		if i.okUses != i.allUses {
+			// Some use of i.s that don't understand lurks. Give up.
+			continue
+		}
+
+		// At this point, we've decided that we *can* do
+		// the optimization.
+
+		if i.transition == nil {
+			// Exclusive for its whole lifetime. That means it
+			// didn't escape. We can already handle nonescaping
+			// slices without this pass.
+			continue
+		}
+		if i.appendWeight < 2 {
+			// This optimization only really helps if there is
+			// (dynamically) more than one append.
+			continue
+		}
+
+		// Commit point - at this point we've decided we *should*
+		// do the optimization.
+
+		// Insert a move2heap operation before the exclusive->nonexclusive
+		// transition.
+		move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s)
+		if i.capUsed {
+			move.PreserveCapacity = true
+		}
+		move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0])
+		move.SetType(i.s.Type())
+		move.SetTypecheck(1)
+		as := ir.NewAssignStmt(i.transition.Pos(), i.s, move)
+		as.SetTypecheck(1)
+		i.transition.PtrInit().Prepend(as)
+		// Note: we prepend because we need to put the move2heap
+		// operation first, before any other init work, as the transition
+		// might occur in the init work.
+
+		// Now that we've inserted a move2heap operation before every
+		// exclusive -> nonexclusive transition, appends can now use
+		// stack backing stores.
+		// (This is the whole point of this pass, to enable stack
+		// allocation of append backing stores.)
+		for _, a := range i.appends {
+			a.SetEsc(ir.EscNone)
+			if i.capUsed {
+				a.UseBuf = true
+			}
+		}
+	}
+}
+
+// argLeak reports if the idx'th argument to the call n escapes anywhere
+// (to the heap, another argument, return value, etc.)
+// If unknown returns true.
+func argLeak(n *ir.CallExpr, idx int) bool {
+	if n.Op() != ir.OCALLFUNC {
+		return true
+	}
+	fn := ir.StaticCalleeName(ir.StaticValue(n.Fun))
+	if fn == nil {
+		return true
+	}
+	fntype := fn.Type()
+	if recv := fntype.Recv(); recv != nil {
+		if idx == 0 {
+			return escape.ParseLeaks(recv.Note).Any()
+		}
+		idx--
+	}
+	return escape.ParseLeaks(fntype.Params()[idx].Note).Any()
+}
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index 1e9eb0146e..e77f55ab5e 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -156,6 +156,7 @@ func init() {
 		gp11sb         = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly}
 		gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
 		gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21sp2        = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly}
 		gp21sb         = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly}
 		gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
 		gp31shift      = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}}
@@ -361,7 +362,7 @@ func init() {
 		{name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 		{name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 
-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
+		{name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
 		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},
 		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true},
 		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true},
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index f54a692725..53bb35d289 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -573,6 +573,8 @@
 (TBNZ [0] (GreaterThanF  cc) yes no) => (FGT cc yes no)
 (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)
 
+(TB(Z|NZ) [0] (XORconst [1] x) yes no) => (TB(NZ|Z) [0] x yes no)
+
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST                x y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst         [c] y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW               x y) yes no)
@@ -1814,3 +1816,7 @@
 
 (Select0 (Mul64uover x y)) => (MUL x y)
 (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH <typ.UInt64> x y) [0]))
+
+// 32 mul 32 -> 64
+(MUL r:(MOVWUreg x) s:(MOVWUreg y)) && r.Uses == 1 && s.Uses == 1 => (UMULL x y)
+(MUL r:(MOVWreg  x) s:(MOVWreg  y)) && r.Uses == 1 && s.Uses == 1 =>  (MULL x y)
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
index 9691296043..2beba0b1c5 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -743,9 +743,6 @@
 
 (MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
 
-(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c]))
-(MULV (NEGV x) (NEGV y)) => (MULV x y)
-
 (ADDV x0 x1:(SLLVconst [c] y)) && x1.Uses == 1 && c > 0 && c <= 4 => (ADDshiftLLV x0 y [c])
 
 // fold constant in ADDshift op
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
index 7e8b8bf497..81d3a3665b 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -388,6 +388,7 @@ func init() {
 			argLength: 2,
 			reg: regInfo{
 				inputs:       []regMask{gp},
+				clobbers:     buildReg("F31"),
 				clobbersArg0: true,
 			},
 			faultOnNilArg0: true,
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 646948f2df..13a8cab3b5 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -689,36 +689,36 @@
 (MOVDnop (MOVDconst [c])) => (MOVDconst [c])
 
 // Avoid unnecessary zero and sign extension when right shifting.
-(SRAI <t> [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW <t> [int64(x)] y)
-(SRLI <t> [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW <t> [int64(x)] y)
+(SRAI [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW [x] y)
+(SRLI [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW [x] y)
 
 // Replace right shifts that exceed size of signed type.
 (SRAI <t> [x] (MOVBreg y)) && x >=  8 => (SRAI  [63] (SLLI <t> [56] y))
 (SRAI <t> [x] (MOVHreg y)) && x >= 16 => (SRAI  [63] (SLLI <t> [48] y))
-(SRAI <t> [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)
+(SRAI     [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)
 
 // Eliminate right shifts that exceed size of unsigned type.
-(SRLI <t> [x] (MOVBUreg y)) && x >=  8 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVHUreg y)) && x >= 16 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVWUreg y)) && x >= 32 => (MOVDconst <t> [0])
+(SRLI [x] (MOVBUreg y)) && x >=  8 => (MOVDconst [0])
+(SRLI [x] (MOVHUreg y)) && x >= 16 => (MOVDconst [0])
+(SRLI [x] (MOVWUreg y)) && x >= 32 => (MOVDconst [0])
 
 // Fold constant into immediate instructions where possible.
 (ADD (MOVDconst <t> [val]) x) && is32Bit(val) && !t.IsPtr() => (ADDI [val] x)
 (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x)
 (OR  (MOVDconst [val]) x) && is32Bit(val) => (ORI  [val] x)
 (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x)
-(ROL  x (MOVDconst [val])) => (RORI  [int64(int8(-val)&63)] x)
-(ROLW x (MOVDconst [val])) => (RORIW [int64(int8(-val)&31)] x)
-(ROR  x (MOVDconst [val])) => (RORI  [int64(val&63)] x)
-(RORW x (MOVDconst [val])) => (RORIW [int64(val&31)] x)
-(SLL  x (MOVDconst [val])) => (SLLI [int64(val&63)] x)
-(SRL  x (MOVDconst [val])) => (SRLI [int64(val&63)] x)
-(SLLW x (MOVDconst [val])) => (SLLIW [int64(val&31)] x)
-(SRLW x (MOVDconst [val])) => (SRLIW [int64(val&31)] x)
-(SRA  x (MOVDconst [val])) => (SRAI [int64(val&63)] x)
-(SRAW x (MOVDconst [val])) => (SRAIW [int64(val&31)] x)
-(SLT  x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTI  [val] x)
-(SLTU x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTIU [val] x)
+(ROL  x (MOVDconst [val])) => (RORI  [-val&63] x)
+(ROLW x (MOVDconst [val])) => (RORIW [-val&31] x)
+(ROR  x (MOVDconst [val])) => (RORI  [val&63] x)
+(RORW x (MOVDconst [val])) => (RORIW [val&31] x)
+(SLL  x (MOVDconst [val])) => (SLLI  [val&63] x)
+(SLLW x (MOVDconst [val])) => (SLLIW [val&31] x)
+(SRL  x (MOVDconst [val])) => (SRLI  [val&63] x)
+(SRLW x (MOVDconst [val])) => (SRLIW [val&31] x)
+(SRA  x (MOVDconst [val])) => (SRAI  [val&63] x)
+(SRAW x (MOVDconst [val])) => (SRAIW [val&31] x)
+(SLT  x (MOVDconst [val])) && is12Bit(val) => (SLTI  [val] x)
+(SLTU x (MOVDconst [val])) && is12Bit(val) => (SLTIU [val] x)
 
 // Replace negated left rotation with right rotation.
 (ROL  x (NEG y)) => (ROR  x y)
@@ -782,7 +782,7 @@
 (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> uint32(x)])
 
 // Combine doubling via addition with shift.
-(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI <t> [c+1] x)
+(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI [c+1] x)
 (SLLI <t> [c] (ADD x x)) && c >= t.Size() * 8 - 1 => (MOVDconst [0])
 
 // SLTI/SLTIU with constants.
@@ -792,7 +792,6 @@
 // SLTI/SLTIU with known outcomes.
 (SLTI  [x] (ANDI [y] _)) && y >= 0 && int64(y) < int64(x) => (MOVDconst [1])
 (SLTIU [x] (ANDI [y] _)) && y >= 0 && uint64(y) < uint64(x) => (MOVDconst [1])
-(SLTI  [x] (ORI  [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0])
 (SLTIU [x] (ORI  [y] _)) && y >= 0 && uint64(y) >= uint64(x) => (MOVDconst [0])
 
 // SLT/SLTU with known outcomes.
diff --git a/src/cmd/compile/internal/ssa/_gen/dec.rules b/src/cmd/compile/internal/ssa/_gen/dec.rules
index 9f6dc36975..fce0026211 100644
--- a/src/cmd/compile/internal/ssa/_gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/_gen/dec.rules
@@ -97,8 +97,10 @@
 // Helpers for expand calls
 // Some of these are copied from generic.rules
 
-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)
 
 (StructSelect [i] x:(StructMake ___)) => x.Args[i]
 
@@ -109,7 +111,7 @@
 // More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr))
 // There, result of the StructSelect is an Array (not a pointer) and
 // the pre-rewrite input to the ArraySelect is a struct, not a pointer.
-(StructSelect [0] x) && x.Type.IsPtrShaped()  => x
+(StructSelect x) && x.Type.IsPtrShaped()  => x
 (ArraySelect [0] x) && x.Type.IsPtrShaped()  => x
 
 // These, too.  Bits is bits.
@@ -119,6 +121,7 @@
 
 (Store _ (StructMake ___) _) => rewriteStructStore(v)
 
+(IMake _typ (ArrayMake1 val)) => (IMake _typ val)
 (ArraySelect (ArrayMake1 x)) => x
 (ArraySelect [0] (IData x)) => (IData x)
 
diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules
index ccdf0bf50d..6a213cd03a 100644
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -195,6 +195,11 @@
 // Convert x * -1 to -x.
 (Mul(8|16|32|64)  (Const(8|16|32|64)  [-1]) x) => (Neg(8|16|32|64)  x)
 
+// Convert -x * c to x * -c
+(Mul(8|16|32|64) (Const(8|16|32|64) <t> [c]) (Neg(8|16|32|64) x)) => (Mul(8|16|32|64) x (Const(8|16|32|64) <t> [-c]))
+
+(Mul(8|16|32|64) (Neg(8|16|32|64) x) (Neg(8|16|32|64) y)) => (Mul(8|16|32|64) x y)
+
 // DeMorgan's Laws
 (And(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (Or(8|16|32|64) <t> x y))
 (Or(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (And(8|16|32|64) <t> x y))
@@ -337,6 +342,12 @@
 (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
 (OrB ((Less|Leq)8U  (Const8  [c]) x) (Leq8U  x (Const8  [d]))) && uint8(c)  >= uint8(d+1)  && uint8(d+1)  > uint8(d)  => ((Less|Leq)8U  (Const8  <x.Type> [c-d-1]) (Sub8  <x.Type> x (Const8  <x.Type> [d+1])))
 
+// single bit difference: ( x != c && x != d ) -> ( x|(c^d) != c )
+(AndB (Neq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Neq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Neq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
+// single bit difference: ( x == c || x == d ) -> ( x|(c^d) == c )
+(OrB (Eq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Eq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Eq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
 // NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) )
 (OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x))
 (OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y))
@@ -933,8 +944,10 @@
   @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
 
 // Putting struct{*byte} and similar into direct interfaces.
-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)
 
 // un-SSAable values use mem->mem copies
 (Store {t} dst (Load src mem) mem) && !CanSSA(t) =>
@@ -2222,4 +2235,4 @@
 (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => x
 (Neq(64|32|16) (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => (Not x)
 (Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [1])) => x
-(Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x)
-\ No newline at end of file
+(Eq(64|32|16)  (SignExt8to(64|32|16) (CvtBoolToUint8 x)) (Const(64|32|16) [0])) => (Not x)
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index c1726b2797..1a2985d5af 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -426,7 +426,14 @@ func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value,
 		if a.Op == OpIMake {
 			data := a.Args[1]
 			for data.Op == OpStructMake || data.Op == OpArrayMake1 {
-				data = data.Args[0]
+				// A struct make might have a few zero-sized fields.
+				// Use the pointer-y one we know is there.
+				for _, a := range data.Args {
+					if a.Type.Size() > 0 {
+						data = a
+						break
+					}
+				}
 			}
 			return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type))
 		}
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
index 0cee91b532..e95064c1df 100644
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -10,7 +10,9 @@ import (
 )
 
 // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
-func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) }
+func fuseEarly(f *Func) {
+	fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeSingleBitDifference|fuseTypeNanCheck)
+}
 
 // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
 func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
@@ -21,6 +23,7 @@ const (
 	fuseTypePlain fuseType = 1 << iota
 	fuseTypeIf
 	fuseTypeIntInRange
+	fuseTypeSingleBitDifference
 	fuseTypeNanCheck
 	fuseTypeBranchRedirect
 	fuseTypeShortCircuit
@@ -41,6 +44,9 @@ func fuse(f *Func, typ fuseType) {
 			if typ&fuseTypeIntInRange != 0 {
 				changed = fuseIntInRange(b) || changed
 			}
+			if typ&fuseTypeSingleBitDifference != 0 {
+				changed = fuseSingleBitDifference(b) || changed
+			}
 			if typ&fuseTypeNanCheck != 0 {
 				changed = fuseNanCheck(b) || changed
 			}
diff --git a/src/cmd/compile/internal/ssa/fuse_comparisons.go b/src/cmd/compile/internal/ssa/fuse_comparisons.go
index b6eb8fcb90..898c034485 100644
--- a/src/cmd/compile/internal/ssa/fuse_comparisons.go
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@@ -19,6 +19,14 @@ func fuseNanCheck(b *Block) bool {
 	return fuseComparisons(b, canOptNanCheck)
 }
 
+// fuseSingleBitDifference replaces the short-circuit operators between equality checks with
+// constants that only differ by a single bit. For example, it would convert
+// `if x == 4 || x == 6 { ... }` into `if (x == 4) | (x == 6) { ... }`. Rewrite rules can
+// then optimize these using a bitwise operation, in this case generating `if x|2 == 6 { ... }`.
+func fuseSingleBitDifference(b *Block) bool {
+	return fuseComparisons(b, canOptSingleBitDifference)
+}
+
 // fuseComparisons looks for control graphs that match this pattern:
 //
 //	p - predecessor
@@ -229,3 +237,40 @@ func canOptNanCheck(x, y *Value, op Op) bool {
 	}
 	return false
 }
+
+// canOptSingleBitDifference returns true if x op y matches either:
+//
+//	v == c || v == d
+//	v != c && v != d
+//
+// Where c and d are constant values that differ by a single bit.
+func canOptSingleBitDifference(x, y *Value, op Op) bool {
+	if x.Op != y.Op {
+		return false
+	}
+	switch x.Op {
+	case OpEq64, OpEq32, OpEq16, OpEq8:
+		if op != OpOrB {
+			return false
+		}
+	case OpNeq64, OpNeq32, OpNeq16, OpNeq8:
+		if op != OpAndB {
+			return false
+		}
+	default:
+		return false
+	}
+
+	xi := getConstIntArgIndex(x)
+	if xi < 0 {
+		return false
+	}
+	yi := getConstIntArgIndex(y)
+	if yi < 0 {
+		return false
+	}
+	if x.Args[xi^1] != y.Args[yi^1] {
+		return false
+	}
+	return oneBit(x.Args[xi].AuxInt ^ y.Args[yi].AuxInt)
+}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 9c5d79fa56..ea5491362f 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -11481,7 +11481,7 @@ var opcodeTable = [...]opInfo{
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 			outputs: []outputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -68770,6 +68770,7 @@ var opcodeTable = [...]opInfo{
 			inputs: []inputInfo{
 				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
 			},
+			clobbers:     2305843009213693952, // F31
 			clobbersArg0: true,
 		},
 	},
diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go
index 4919d6ad37..d4e7ed14b1 100644
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@@ -466,57 +466,56 @@ func (ft *factsTable) initLimitForNewValue(v *Value) {
 
 // signedMin records the fact that we know v is at least
 // min in the signed domain.
-func (ft *factsTable) signedMin(v *Value, min int64) bool {
-	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMin(v *Value, min int64) {
+	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
 }
 
 // signedMax records the fact that we know v is at most
 // max in the signed domain.
-func (ft *factsTable) signedMax(v *Value, max int64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMax(v *Value, max int64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
 }
-func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
-	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
+	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
 }
 
 // setNonNegative records the fact that v is known to be non-negative.
-func (ft *factsTable) setNonNegative(v *Value) bool {
-	return ft.signedMin(v, 0)
+func (ft *factsTable) setNonNegative(v *Value) {
+	ft.signedMin(v, 0)
 }
 
 // unsignedMin records the fact that we know v is at least
 // min in the unsigned domain.
-func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
+func (ft *factsTable) unsignedMin(v *Value, min uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
 }
 
 // unsignedMax records the fact that we know v is at most
 // max in the unsigned domain.
-func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
+func (ft *factsTable) unsignedMax(v *Value, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
 }
-func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
+func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
 }
 
-func (ft *factsTable) booleanFalse(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) booleanFalse(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) booleanTrue(v *Value) bool {
-	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
+func (ft *factsTable) booleanTrue(v *Value) {
+	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
 }
-func (ft *factsTable) pointerNil(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) pointerNil(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) pointerNonNil(v *Value) bool {
+func (ft *factsTable) pointerNonNil(v *Value) {
 	l := noLimit
 	l.umin = 1
-	return ft.newLimit(v, l)
+	ft.newLimit(v, l)
 }
 
 // newLimit adds new limiting information for v.
-// Returns true if the new limit added any new information.
-func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
+func (ft *factsTable) newLimit(v *Value, newLim limit) {
 	oldLim := ft.limits[v.ID]
 
 	// Merge old and new information.
@@ -531,13 +530,12 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	}
 
 	if lim == oldLim {
-		return false // nothing new to record
+		return // nothing new to record
 	}
 
 	if lim.unsat() {
-		r := !ft.unsat
 		ft.unsat = true
-		return r
+		return
 	}
 
 	// Check for recursion. This normally happens because in unsatisfiable
@@ -548,7 +546,7 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	// the posets will not notice.
 	if ft.recurseCheck[v.ID] {
 		// This should only happen for unsatisfiable cases. TODO: check
-		return false
+		return
 	}
 	ft.recurseCheck[v.ID] = true
 	defer func() {
@@ -713,8 +711,6 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 			}
 		}
 	}
-
-	return true
 }
 
 func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
@@ -1825,7 +1821,7 @@ func initLimit(v *Value) limit {
 	return lim
 }
 
-// flowLimit updates the known limits of v in ft. Returns true if anything changed.
+// flowLimit updates the known limits of v in ft.
 // flowLimit can use the ranges of input arguments.
 //
 // Note: this calculation only happens at the point the value is defined. We do not reevaluate
@@ -1838,10 +1834,10 @@ func initLimit(v *Value) limit {
 // block. We could recompute the range of v once we enter the block so
 // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
 // that right now.
-func (ft *factsTable) flowLimit(v *Value) bool {
+func (ft *factsTable) flowLimit(v *Value) {
 	if !v.Type.IsInteger() {
 		// TODO: boolean?
-		return false
+		return
 	}
 
 	// Additional limits based on opcode and argument.
@@ -1851,36 +1847,36 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 	// extensions
 	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v, a.umin, a.umax)
+		ft.unsignedMinMax(v, a.umin, a.umax)
 	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.signedMinMax(v, a.min, a.max)
+		ft.signedMinMax(v, a.min, a.max)
 	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 		a := ft.limits[v.Args[0].ID]
 		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
-			return ft.unsignedMinMax(v, a.umin, a.umax)
+			ft.unsignedMinMax(v, a.umin, a.umax)
 		}
 
 	// math/bits
 	case OpCtz64:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
+			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
 		}
 	case OpCtz32:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
 		}
 	case OpCtz16:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
 		}
 	case OpCtz8:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
 		}
 
 	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
@@ -1889,26 +1885,26 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
 		fixedBits := a.umax & sharedLeadingMask
 		min := uint64(bits.OnesCount64(fixedBits))
-		return ft.unsignedMinMax(v, min, min+changingBitsCount)
+		ft.unsignedMinMax(v, min, min+changingBitsCount)
 
 	case OpBitLen64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len64(a.umin)),
 			uint64(bits.Len64(a.umax)))
 	case OpBitLen32:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len32(uint32(a.umin))),
 			uint64(bits.Len32(uint32(a.umax))))
 	case OpBitLen16:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len16(uint16(a.umin))),
 			uint64(bits.Len16(uint16(a.umax))))
 	case OpBitLen8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len8(uint8(a.umin))),
 			uint64(bits.Len8(uint8(a.umax))))
 
@@ -1921,43 +1917,43 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		// AND can only make the value smaller.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, min(a.umax, b.umax))
+		ft.unsignedMax(v, min(a.umax, b.umax))
 	case OpOr64, OpOr32, OpOr16, OpOr8:
 		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			max(a.umin, b.umin),
 			1<<bits.Len64(a.umax|b.umax)-1)
 	case OpXor64, OpXor32, OpXor16, OpXor8:
 		// XOR can't flip bits that are proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
+		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
 	case OpCom64, OpCom32, OpCom16, OpCom8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
+		ft.newLimit(v, a.com(uint(v.Type.Size())*8))
 
 	// Arithmetic.
 	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
 	case OpSub64, OpSub32, OpSub16, OpSub8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
-		mod := ft.detectMod(v)
-		inferred := ft.detectSliceLenRelation(v)
-		return sub || mod || inferred
+		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
+		ft.detectMod(v)
+		ft.detectSliceLenRelation(v)
+		ft.detectSubRelations(v)
 	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
 		a := ft.limits[v.Args[0].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
+		ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
 	case OpMul64, OpMul32, OpMul16, OpMul8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
 	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
 		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
 		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
@@ -1965,7 +1961,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
+		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
 	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
 		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
 		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
@@ -1979,7 +1975,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			// Easier to compute min and max of both than to write sign logic.
 			vmin := min(a.min>>b.min, a.min>>b.max)
 			vmax := max(a.max>>b.min, a.max>>b.max)
-			return ft.signedMinMax(v, vmin, vmax)
+			ft.signedMinMax(v, vmin, vmax)
 		}
 	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
 		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
@@ -1988,7 +1984,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		if b.min >= 0 {
-			return ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
+			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
 		}
 	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
 		a := ft.limits[v.Args[0].ID]
@@ -2008,11 +2004,11 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		if b.umin > 0 {
 			lim = lim.unsignedMax(a.umax / b.umin)
 		}
-		return ft.newLimit(v, lim)
+		ft.newLimit(v, lim)
 	case OpMod64, OpMod32, OpMod16, OpMod8:
-		return ft.modLimit(true, v, v.Args[0], v.Args[1])
+		ft.modLimit(true, v, v.Args[0], v.Args[1])
 	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
-		return ft.modLimit(false, v, v.Args[0], v.Args[1])
+		ft.modLimit(false, v, v.Args[0], v.Args[1])
 
 	case OpPhi:
 		// Compute the union of all the input phis.
@@ -2032,9 +2028,8 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			l.umin = min(l.umin, l2.umin)
 			l.umax = max(l.umax, l2.umax)
 		}
-		return ft.newLimit(v, l)
+		ft.newLimit(v, l)
 	}
-	return false
 }
 
 // detectSliceLenRelation matches the pattern where
@@ -2047,13 +2042,13 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 //
 // Note that "index" is not useed for indexing in this pattern, but
 // in the motivating example (chunked slice iteration) it is.
-func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
+func (ft *factsTable) detectSliceLenRelation(v *Value) {
 	if v.Op != OpSub64 {
-		return false
+		return
 	}
 
 	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpSliceCap) {
-		return false
+		return
 	}
 
 	slice := v.Args[0].Args[0]
@@ -2093,13 +2088,54 @@ func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
 		if K < 0 { // We hate thinking about overflow
 			continue
 		}
-		inferred = inferred || ft.signedMin(v, K)
+		ft.signedMin(v, K)
+	}
+}
+
+// v must be Sub{64,32,16,8}.
+func (ft *factsTable) detectSubRelations(v *Value) {
+	// v = x-y
+	x := v.Args[0]
+	y := v.Args[1]
+	if x == y {
+		ft.signedMinMax(v, 0, 0)
+		return
+	}
+	xLim := ft.limits[x.ID]
+	yLim := ft.limits[y.ID]
+
+	// Check if we might wrap around. If so, give up.
+	width := uint(v.Type.Size()) * 8
+	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
+		return // x-y might underflow
+	}
+	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
+		return // x-y might overflow
+	}
+
+	// Subtracting a positive number only makes
+	// things smaller.
+	if yLim.min >= 0 {
+		ft.update(v.Block, v, x, signed, lt|eq)
+		// TODO: is this worth it?
+		//if yLim.min > 0 {
+		//	ft.update(v.Block, v, x, signed, lt)
+		//}
+	}
+
+	// Subtracting a number from a bigger one
+	// can't go below 0.
+	if ft.orderS.OrderedOrEqual(y, x) {
+		ft.setNonNegative(v)
+		// TODO: is this worth it?
+		//if ft.orderS.Ordered(y, x) {
+		//	ft.signedMin(v, 1)
+		//}
 	}
-	return inferred
 }
 
 // x%d has been rewritten to x - (x/d)*d.
-func (ft *factsTable) detectMod(v *Value) bool {
+func (ft *factsTable) detectMod(v *Value) {
 	var opDiv, opDivU, opMul, opConst Op
 	switch v.Op {
 	case OpSub64:
@@ -2126,36 +2162,37 @@ func (ft *factsTable) detectMod(v *Value) bool {
 
 	mul := v.Args[1]
 	if mul.Op != opMul {
-		return false
+		return
 	}
 	div, con := mul.Args[0], mul.Args[1]
 	if div.Op == opConst {
 		div, con = con, div
 	}
 	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
-		return false
+		return
 	}
-	return ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
+	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
 }
 
 // modLimit sets v with facts derived from v = p % q.
-func (ft *factsTable) modLimit(signed bool, v, p, q *Value) bool {
+func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
 	a := ft.limits[p.ID]
 	b := ft.limits[q.ID]
 	if signed {
 		if a.min < 0 && b.min > 0 {
-			return ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			return
 		}
 		if !(a.nonnegative() && b.nonnegative()) {
 			// TODO: we could handle signed limits but I didn't bother.
-			return false
+			return
 		}
 		if a.min >= 0 && b.min > 0 {
 			ft.setNonNegative(v)
 		}
 	}
 	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
-	return ft.unsignedMax(v, min(a.umax, b.umax-1))
+	ft.unsignedMax(v, min(a.umax, b.umax-1))
 }
 
 // getBranch returns the range restrictions added by p
@@ -2466,15 +2503,13 @@ func addLocalFacts(ft *factsTable, b *Block) {
 				xl := ft.limits[x.ID]
 				y := add.Args[1]
 				yl := ft.limits[y.ID]
-				if unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
-					continue
-				}
-
-				if xl.umax < uminDivisor {
-					ft.update(b, v, y, unsigned, lt|eq)
-				}
-				if yl.umax < uminDivisor {
-					ft.update(b, v, x, unsigned, lt|eq)
+				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
+					if xl.umax < uminDivisor {
+						ft.update(b, v, y, unsigned, lt|eq)
+					}
+					if yl.umax < uminDivisor {
+						ft.update(b, v, x, unsigned, lt|eq)
+					}
 				}
 			}
 			ft.update(b, v, v.Args[0], unsigned, lt|eq)
@@ -2993,16 +3028,14 @@ func (ft *factsTable) topoSortValuesInBlock(b *Block) {
 	want := f.NumValues()
 
 	scores := ft.reusedTopoSortScoresTable
-	if len(scores) < want {
-		if want <= cap(scores) {
-			scores = scores[:want]
-		} else {
-			if cap(scores) > 0 {
-				f.Cache.freeUintSlice(scores)
-			}
-			scores = f.Cache.allocUintSlice(want)
-			ft.reusedTopoSortScoresTable = scores
+	if want <= cap(scores) {
+		scores = scores[:want]
+	} else {
+		if cap(scores) > 0 {
+			f.Cache.freeUintSlice(scores)
 		}
+		scores = f.Cache.allocUintSlice(want)
+		ft.reusedTopoSortScoresTable = scores
 	}
 
 	for _, v := range b.Values {
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index 4d022555b7..11dd53bfc7 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -596,17 +596,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
 	var c *Value
 	if vi.regs != 0 {
 		// Copy from a register that v is already in.
-		r2 := pickReg(vi.regs)
 		var current *Value
-		if !s.allocatable.contains(r2) {
-			current = v // v is in a fixed register
+		if vi.regs&^s.allocatable != 0 {
+			// v is in a fixed register, prefer that
+			current = v
 		} else {
+			r2 := pickReg(vi.regs)
 			if s.regs[r2].v != v {
 				panic("bad register state")
 			}
 			current = s.regs[r2].c
+			s.usedSinceBlockStart |= regMask(1) << r2
 		}
-		s.usedSinceBlockStart |= regMask(1) << r2
 		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
 	} else if v.rematerializeable() {
 		// Rematerialize instead of loading from the spill location.
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 07308973b1..af2568ae89 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -2772,3 +2772,17 @@ func panicBoundsCCToAux(p PanicBoundsCC) Aux {
 func isDictArgSym(sym Sym) bool {
 	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
 }
+
+// When v is (IMake typ (StructMake ...)), convert to
+// (IMake typ arg) where arg is the pointer-y argument to
+// the StructMake (there must be exactly one).
+func imakeOfStructMake(v *Value) *Value {
+	var arg *Value
+	for _, a := range v.Args[1].Args {
+		if a.Type.Size() > 0 {
+			arg = a
+			break
+		}
+	}
+	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 6af1558833..b3f790dbda 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -12556,6 +12556,54 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
 		}
 		break
 	}
+	// match: (MUL r:(MOVWUreg x) s:(MOVWUreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (UMULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWUreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWUreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64UMULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (MUL r:(MOVWreg x) s:(MOVWreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (MULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64MULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MULW(v *Value) bool {
@@ -25273,6 +25321,37 @@ func rewriteBlockARM64(b *Block) bool {
 			b.resetWithControl(BlockARM64FGE, cc)
 			return true
 		}
+		// match: (TBNZ [0] (XORconst [1] x) yes no)
+		// result: (TBZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
+	case BlockARM64TBZ:
+		// match: (TBZ [0] (XORconst [1] x) yes no)
+		// result: (TBNZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBNZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
 	case BlockARM64UGE:
 		// match: (UGE (FlagConstant [fc]) yes no)
 		// cond: fc.uge()
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
index 4262d4e0fb..bf2dd114a9 100644
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@@ -5866,7 +5866,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	config := b.Func.Config
-	typ := &b.Func.Config.Types
 	// match: (MULV _ (MOVVconst [0]))
 	// result: (MOVVconst [0])
 	for {
@@ -5911,44 +5910,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 		}
 		break
 	}
-	// match: (MULV (NEGV x) (MOVVconst [c]))
-	// result: (MULV x (MOVVconst [-c]))
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64MOVVconst {
-				continue
-			}
-			c := auxIntToInt64(v_1.AuxInt)
-			v.reset(OpLOONG64MULV)
-			v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-			v0.AuxInt = int64ToAuxInt(-c)
-			v.AddArg2(x, v0)
-			return true
-		}
-		break
-	}
-	// match: (MULV (NEGV x) (NEGV y))
-	// result: (MULV x y)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64NEGV {
-				continue
-			}
-			y := v_1.Args[0]
-			v.reset(OpLOONG64MULV)
-			v.AddArg2(x, y)
-			return true
-		}
-		break
-	}
 	// match: (MULV (MOVVconst [c]) (MOVVconst [d]))
 	// result: (MOVVconst [c*d])
 	for {
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index 191c7b3d48..284d88967b 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -7027,7 +7027,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROL x (MOVDconst [val]))
-	// result: (RORI [int64(int8(-val)&63)] x)
+	// result: (RORI [-val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7035,7 +7035,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 63))
+		v.AuxInt = int64ToAuxInt(-val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7057,7 +7057,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROLW x (MOVDconst [val]))
-	// result: (RORIW [int64(int8(-val)&31)] x)
+	// result: (RORIW [-val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7065,7 +7065,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 31))
+		v.AuxInt = int64ToAuxInt(-val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7087,7 +7087,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROR x (MOVDconst [val]))
-	// result: (RORI [int64(val&63)] x)
+	// result: (RORI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7095,7 +7095,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7105,7 +7105,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (RORW x (MOVDconst [val]))
-	// result: (RORIW [int64(val&31)] x)
+	// result: (RORIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7113,7 +7113,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7212,7 +7212,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLL x (MOVDconst [val]))
-	// result: (SLLI [int64(val&63)] x)
+	// result: (SLLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7220,7 +7220,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7246,7 +7246,7 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 	}
 	// match: (SLLI <t> [c] (ADD x x))
 	// cond: c < t.Size() * 8 - 1
-	// result: (SLLI <t> [c+1] x)
+	// result: (SLLI [c+1] x)
 	for {
 		t := v.Type
 		c := auxIntToInt64(v.AuxInt)
@@ -7258,7 +7258,6 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SLLI)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(c + 1)
 		v.AddArg(x)
 		return true
@@ -7286,7 +7285,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLLW x (MOVDconst [val]))
-	// result: (SLLIW [int64(val&31)] x)
+	// result: (SLLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7294,7 +7293,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7304,7 +7303,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLT x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTI [val] x)
 	for {
 		x := v_0
@@ -7312,7 +7311,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTI)
@@ -7363,22 +7362,6 @@ func rewriteValueRISCV64_OpRISCV64SLTI(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(1)
 		return true
 	}
-	// match: (SLTI [x] (ORI [y] _))
-	// cond: y >= 0 && int64(y) >= int64(x)
-	// result: (MOVDconst [0])
-	for {
-		x := auxIntToInt64(v.AuxInt)
-		if v_0.Op != OpRISCV64ORI {
-			break
-		}
-		y := auxIntToInt64(v_0.AuxInt)
-		if !(y >= 0 && int64(y) >= int64(x)) {
-			break
-		}
-		v.reset(OpRISCV64MOVDconst)
-		v.AuxInt = int64ToAuxInt(0)
-		return true
-	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64SLTIU(v *Value) bool {
@@ -7433,7 +7416,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLTU x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTIU [val] x)
 	for {
 		x := v_0
@@ -7441,7 +7424,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTIU)
@@ -7555,7 +7538,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRA x (MOVDconst [val]))
-	// result: (SRAI [int64(val&63)] x)
+	// result: (SRAI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7563,7 +7546,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7572,11 +7555,10 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRAIW <t> [int64(x)] y)
+	// result: (SRAIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWreg {
 			break
@@ -7586,8 +7568,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRAIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
@@ -7633,7 +7614,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 		v.AddArg(v0)
 		return true
 	}
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 32
 	// result: (SRAIW [31] y)
 	for {
@@ -7668,7 +7649,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRAW x (MOVDconst [val]))
-	// result: (SRAIW [int64(val&31)] x)
+	// result: (SRAIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7676,7 +7657,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@@ -7686,7 +7667,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRL x (MOVDconst [val]))
-	// result: (SRLI [int64(val&63)] x)
+	// result: (SRLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7694,7 +7675,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@@ -7702,11 +7683,10 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 }
 func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRLIW <t> [int64(x)] y)
+	// result: (SRLIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@@ -7716,16 +7696,14 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRLIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVBUreg y))
+	// match: (SRLI [x] (MOVBUreg y))
 	// cond: x >= 8
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVBUreg {
 			break
@@ -7734,15 +7712,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVHUreg y))
+	// match: (SRLI [x] (MOVHUreg y))
 	// cond: x >= 16
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVHUreg {
 			break
@@ -7751,15 +7727,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 32
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@@ -7768,7 +7742,6 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
@@ -7790,7 +7763,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRLW x (MOVDconst [val]))
-	// result: (SRLIW [int64(val&31)] x)
+	// result: (SRLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@@ -7798,7 +7771,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
diff --git a/src/cmd/compile/internal/ssa/rewritedec.go b/src/cmd/compile/internal/ssa/rewritedec.go
index 16d0269210..c45034ead0 100644
--- a/src/cmd/compile/internal/ssa/rewritedec.go
+++ b/src/cmd/compile/internal/ssa/rewritedec.go
@@ -279,11 +279,20 @@ func rewriteValuedec_OpIData(v *Value) bool {
 func rewriteValuedec_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
+	for {
+		if v_1.Op != OpStructMake {
+			break
+		}
+		v.copyOf(imakeOfStructMake(v))
+		return true
+	}
+	// match: (IMake _typ (ArrayMake1 val))
 	// result: (IMake _typ val)
 	for {
 		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpArrayMake1 {
 			break
 		}
 		val := v_1.Args[0]
@@ -839,17 +848,47 @@ func rewriteValuedec_OpStructMake(v *Value) bool {
 func rewriteValuedec_OpStructSelect(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	// match: (StructSelect [i] x:(StructMake ___))
 	// result: x.Args[i]
 	for {
@@ -861,13 +900,10 @@ func rewriteValuedec_OpStructSelect(v *Value) bool {
 		v.copyOf(x.Args[i])
 		return true
 	}
-	// match: (StructSelect [0] x)
+	// match: (StructSelect x)
 	// cond: x.Type.IsPtrShaped()
 	// result: x
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 {
-			break
-		}
 		x := v_0
 		if !(x.Type.IsPtrShaped()) {
 			break
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 5b5494f43a..5c183fc2a6 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -5332,6 +5332,182 @@ func rewriteValuegeneric_OpAndB(v *Value) bool {
 		}
 		break
 	}
+	// match: (AndB (Neq64 x cv:(Const64 [c])) (Neq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpNeq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq32 x cv:(Const32 [c])) (Neq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpNeq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq16 x cv:(Const16 [c])) (Neq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpNeq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq8 x cv:(Const8 [c])) (Neq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpNeq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	return false
 }
 func rewriteValuegeneric_OpArraySelect(v *Value) bool {
@@ -8809,16 +8985,13 @@ func rewriteValuegeneric_OpFloor(v *Value) bool {
 func rewriteValuegeneric_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
-	// result: (IMake _typ val)
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
 	for {
-		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpStructMake {
 			break
 		}
-		val := v_1.Args[0]
-		v.reset(OpIMake)
-		v.AddArg2(_typ, val)
+		v.copyOf(imakeOfStructMake(v))
 		return true
 	}
 	// match: (IMake _typ (ArrayMake1 val))
@@ -16610,6 +16783,45 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul16 (Const16 <t> [c]) (Neg16 x))
+	// result: (Mul16 x (Const16 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst16 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt16(v_0.AuxInt)
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul16)
+			v0 := b.NewValue0(v.Pos, OpConst16, t)
+			v0.AuxInt = int16ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul16 (Neg16 x) (Neg16 y))
+	// result: (Mul16 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul16)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul16 (Const16 <t> [c]) (Add16 <t> (Const16 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add16 (Const16 <t> [c*d]) (Mul16 <t> (Const16 <t> [c]) x))
@@ -16821,6 +17033,45 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul32 (Const32 <t> [c]) (Neg32 x))
+	// result: (Mul32 x (Const32 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst32 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt32(v_0.AuxInt)
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul32)
+			v0 := b.NewValue0(v.Pos, OpConst32, t)
+			v0.AuxInt = int32ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul32 (Neg32 x) (Neg32 y))
+	// result: (Mul32 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul32)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x))
@@ -17193,6 +17444,45 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul64 (Const64 <t> [c]) (Neg64 x))
+	// result: (Mul64 x (Const64 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst64 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt64(v_0.AuxInt)
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul64)
+			v0 := b.NewValue0(v.Pos, OpConst64, t)
+			v0.AuxInt = int64ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul64 (Neg64 x) (Neg64 y))
+	// result: (Mul64 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul64)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
@@ -17565,6 +17855,45 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul8 (Const8 <t> [c]) (Neg8 x))
+	// result: (Mul8 x (Const8 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst8 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt8(v_0.AuxInt)
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul8)
+			v0 := b.NewValue0(v.Pos, OpConst8, t)
+			v0.AuxInt = int8ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul8 (Neg8 x) (Neg8 y))
+	// result: (Mul8 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul8)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul8 (Const8 <t> [c]) (Add8 <t> (Const8 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add8 (Const8 <t> [c*d]) (Mul8 <t> (Const8 <t> [c]) x))
@@ -23242,6 +23571,182 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
 		}
 		break
 	}
+	// match: (OrB (Eq64 x cv:(Const64 [c])) (Eq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpEq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq32 x cv:(Const32 [c])) (Eq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpEq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq16 x cv:(Const16 [c])) (Eq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpEq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq8 x cv:(Const8 [c])) (Eq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpEq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	// match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c])))
 	// result: (Not (Leq64F y x))
 	for {
@@ -31601,17 +32106,47 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool {
 		v0.AddArg2(v1, mem)
 		return true
 	}
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	return false
 }
 func rewriteValuegeneric_OpSub16(v *Value) bool {
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index e854cd9895..e59451c773 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -124,6 +124,11 @@ func InitConfig() {
 	ir.Syms.GCWriteBarrier[7] = typecheck.LookupRuntimeFunc("gcWriteBarrier8")
 	ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
 	ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
+	ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf")
+	ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice")
+	ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan")
+	ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap")
+	ir.Syms.MoveSliceNoCapNoScan = typecheck.LookupRuntimeFunc("moveSliceNoCapNoScan")
 	ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
 	for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ {
 		ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i))
@@ -1096,6 +1101,23 @@ type state struct {
 
 	// Block starting position, indexed by block id.
 	blockStarts []src.XPos
+
+	// Information for stack allocation. Indexed by the first argument
+	// to an append call. Normally a slice-typed variable, but not always.
+	backingStores map[ir.Node]*backingStoreInfo
+}
+
+type backingStoreInfo struct {
+	// Size of backing store array (in elements)
+	K int64
+	// Stack-allocated backing store variable.
+	store *ir.Name
+	// Dynamic boolean variable marking the fact that we used this backing store.
+	used *ir.Name
+	// Have we used this variable statically yet? This is just a hint
+	// to avoid checking the dynamic variable if the answer is obvious.
+	// (usedStatic == true implies used == true)
+	usedStatic bool
 }
 
 type funcLine struct {
@@ -3683,6 +3705,9 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value {
 	case ir.OAPPEND:
 		return s.append(n.(*ir.CallExpr), false)
 
+	case ir.OMOVE2HEAP:
+		return s.move2heap(n.(*ir.MoveToHeapExpr))
+
 	case ir.OMIN, ir.OMAX:
 		return s.minMax(n.(*ir.CallExpr))
 
@@ -3744,6 +3769,68 @@ func (s *state) resultAddrOfCall(c *ssa.Value, which int64, t *types.Type) *ssa.
 	return addr
 }
 
+// Get backing store information for an append call.
+func (s *state) getBackingStoreInfoForAppend(n *ir.CallExpr) *backingStoreInfo {
+	if n.Esc() != ir.EscNone {
+		return nil
+	}
+	return s.getBackingStoreInfo(n.Args[0])
+}
+func (s *state) getBackingStoreInfo(n ir.Node) *backingStoreInfo {
+	t := n.Type()
+	et := t.Elem()
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	if et.Size() == 0 || et.Size() > maxStackSize {
+		return nil
+	}
+	if base.Flag.N != 0 {
+		return nil
+	}
+	if !base.VariableMakeHash.MatchPos(n.Pos(), nil) {
+		return nil
+	}
+	i := s.backingStores[n]
+	if i != nil {
+		return i
+	}
+
+	// Build type of backing store.
+	K := maxStackSize / et.Size() // rounds down
+	KT := types.NewArray(et, K)
+	KT.SetNoalg(true)
+	types.CalcArraySize(KT)
+	// Align more than naturally for the type KT. See issue 73199.
+	align := types.NewArray(types.Types[types.TUINTPTR], 0)
+	types.CalcArraySize(align)
+	storeTyp := types.NewStruct([]*types.Field{
+		{Sym: types.BlankSym, Type: align},
+		{Sym: types.BlankSym, Type: KT},
+	})
+	storeTyp.SetNoalg(true)
+	types.CalcStructSize(storeTyp)
+
+	// Make backing store variable.
+	backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
+	backingStore.SetAddrtaken(true)
+
+	// Make "used" boolean.
+	used := typecheck.TempAt(n.Pos(), s.curfn, types.Types[types.TBOOL])
+	if s.curBlock == s.f.Entry {
+		s.vars[used] = s.constBool(false)
+	} else {
+		// initialize this variable at end of entry block
+		s.defvars[s.f.Entry.ID][used] = s.constBool(false)
+	}
+
+	// Initialize an info structure.
+	if s.backingStores == nil {
+		s.backingStores = map[ir.Node]*backingStoreInfo{}
+	}
+	i = &backingStoreInfo{K: K, store: backingStore, used: used, usedStatic: false}
+	s.backingStores[n] = i
+	return i
+}
+
 // append converts an OAPPEND node to SSA.
 // If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
 // adds it to s, and returns the Value.
@@ -3834,9 +3921,29 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// A stack-allocated backing store could be used at every
 	// append that qualifies, but we limit it in some cases to
 	// avoid wasted code and stack space.
-	// TODO: handle ... append case.
-	maxStackSize := int64(base.Debug.VariableMakeThreshold)
-	if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] {
+	//
+	// Note that we have two different strategies.
+	// 1. The standard strategy is just to allocate the full
+	//    backing store at the first append.
+	// 2. An alternate strategy is used when
+	//        a. The backing store eventually escapes via move2heap
+	//    and b. The capacity is used somehow
+	//    In this case, we don't want to just allocate
+	//    the full buffer at the first append, because when
+	//    we move2heap the buffer to the heap when it escapes,
+	//    we might end up wasting memory because we can't
+	//    change the capacity.
+	//    So in this case we use growsliceBuf to reuse the buffer
+	//    and walk one step up the size class ladder each time.
+	//
+	// TODO: handle ... append case? Currently we handle only
+	// a fixed number of appended elements.
+	var info *backingStoreInfo
+	if !inplace {
+		info = s.getBackingStoreInfoForAppend(n)
+	}
+
+	if !inplace && info != nil && !n.UseBuf && !info.usedStatic {
 		// if l <= K {
 		//   if !used {
 		//     if oldLen == 0 {
@@ -3860,43 +3967,19 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// It is ok to do it more often, but it is probably helpful only for
 		// the first instance. TODO: this could use more tuning. Using ir.Node
 		// as the key works for *ir.Name instances but probably nothing else.
-		if s.appendTargets == nil {
-			s.appendTargets = map[ir.Node]bool{}
-		}
-		s.appendTargets[sn] = true
-
-		K := maxStackSize / et.Size() // rounds down
-		KT := types.NewArray(et, K)
-		KT.SetNoalg(true)
-		types.CalcArraySize(KT)
-		// Align more than naturally for the type KT. See issue 73199.
-		align := types.NewArray(types.Types[types.TUINTPTR], 0)
-		types.CalcArraySize(align)
-		storeTyp := types.NewStruct([]*types.Field{
-			{Sym: types.BlankSym, Type: align},
-			{Sym: types.BlankSym, Type: KT},
-		})
-		storeTyp.SetNoalg(true)
-		types.CalcStructSize(storeTyp)
+		info.usedStatic = true
+		// TODO: unset usedStatic somehow?
 
 		usedTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		bodyBlock := s.f.NewBlock(ssa.BlockPlain)
 		growSlice := s.f.NewBlock(ssa.BlockPlain)
-
-		// Make "used" boolean.
-		tBool := types.Types[types.TBOOL]
-		used := typecheck.TempAt(n.Pos(), s.curfn, tBool)
-		s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry
-
-		// Make backing store variable.
 		tInt := types.Types[types.TINT]
-		backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
-		backingStore.SetAddrtaken(true)
+		tBool := types.Types[types.TBOOL]
 
 		// if l <= K
 		s.startBlock(grow)
-		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K))
+		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, info.K))
 		b := s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(kTest)
@@ -3906,7 +3989,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 
 		// if !used
 		s.startBlock(usedTestBlock)
-		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used))
+		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(info.used))
 		b = s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(usedTest)
@@ -3927,18 +4010,18 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// var store struct { _ [0]uintptr; arr [K]T }
 		s.startBlock(bodyBlock)
 		if et.HasPointers() {
-			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem())
+			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, info.store, s.mem())
 		}
-		addr := s.addr(backingStore)
-		s.zero(storeTyp, addr)
+		addr := s.addr(info.store)
+		s.zero(info.store.Type(), addr)
 
 		// s = store.arr[:l:K]
 		s.vars[ptrVar] = addr
 		s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test.
-		s.vars[capVar] = s.constInt(tInt, K)
+		s.vars[capVar] = s.constInt(tInt, info.K)
 
 		// used = true
-		s.assign(used, s.constBool(true), false, 0)
+		s.assign(info.used, s.constBool(true), false, 0)
 		b = s.endBlock()
 		b.AddEdgeTo(assign)
 
@@ -3949,7 +4032,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// Call growslice
 	s.startBlock(grow)
 	taddr := s.expr(n.Fun)
-	r := s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	var r []*ssa.Value
+	if info != nil && n.UseBuf {
+		// Use stack-allocated buffer as backing store, if we can.
+		if et.HasPointers() && !info.usedStatic {
+			// Initialize in the function header. Not the best place,
+			// but it makes sure we don't scan this area before it is
+			// initialized.
+			mem := s.defvars[s.f.Entry.ID][memVar]
+			mem = s.f.Entry.NewValue1A(n.Pos(), ssa.OpVarDef, types.TypeMem, info.store, mem)
+			addr := s.f.Entry.NewValue2A(n.Pos(), ssa.OpLocalAddr, types.NewPtr(info.store.Type()), info.store, s.sp, mem)
+			mem = s.f.Entry.NewValue2I(n.Pos(), ssa.OpZero, types.TypeMem, info.store.Type().Size(), addr, mem)
+			mem.Aux = info.store.Type()
+			s.defvars[s.f.Entry.ID][memVar] = mem
+			info.usedStatic = true
+		}
+		r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K))
+	} else {
+		r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	}
 
 	// Decompose output slice
 	p = s.newValue1(ssa.OpSlicePtr, pt, r[0])
@@ -4036,6 +4137,95 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	return s.newValue3(ssa.OpSliceMake, n.Type(), p, l, c)
 }
 
+func (s *state) move2heap(n *ir.MoveToHeapExpr) *ssa.Value {
+	// s := n.Slice
+	// if s.ptr points to current stack frame {
+	//     s2 := make([]T, s.len, s.cap)
+	//     copy(s2[:cap], s[:cap])
+	//     s = s2
+	// }
+	// return s
+
+	slice := s.expr(n.Slice)
+	et := slice.Type.Elem()
+	pt := types.NewPtr(et)
+
+	info := s.getBackingStoreInfo(n)
+	if info == nil {
+		// Backing store will never be stack allocated, so
+		// move2heap is a no-op.
+		return slice
+	}
+
+	// Decomposse input slice.
+	p := s.newValue1(ssa.OpSlicePtr, pt, slice)
+	l := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
+	c := s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], slice)
+
+	moveBlock := s.f.NewBlock(ssa.BlockPlain)
+	mergeBlock := s.f.NewBlock(ssa.BlockPlain)
+
+	s.vars[ptrVar] = p
+	s.vars[lenVar] = l
+	s.vars[capVar] = c
+
+	// Decide if we need to move the slice backing store.
+	// It needs to be moved if it is currently on the stack.
+	sub := ssa.OpSub64
+	less := ssa.OpLess64U
+	if s.config.PtrSize == 4 {
+		sub = ssa.OpSub32
+		less = ssa.OpLess32U
+	}
+	callerSP := s.newValue1(ssa.OpGetCallerSP, types.Types[types.TUINTPTR], s.mem())
+	frameSize := s.newValue2(sub, types.Types[types.TUINTPTR], callerSP, s.sp)
+	pInt := s.newValue2(ssa.OpConvert, types.Types[types.TUINTPTR], p, s.mem())
+	off := s.newValue2(sub, types.Types[types.TUINTPTR], pInt, s.sp)
+	cond := s.newValue2(less, types.Types[types.TBOOL], off, frameSize)
+
+	b := s.endBlock()
+	b.Kind = ssa.BlockIf
+	b.Likely = ssa.BranchUnlikely // fast path is to not have to call into runtime
+	b.SetControl(cond)
+	b.AddEdgeTo(moveBlock)
+	b.AddEdgeTo(mergeBlock)
+
+	// Move the slice to heap
+	s.startBlock(moveBlock)
+	var newSlice *ssa.Value
+	if et.HasPointers() {
+		typ := s.expr(n.RType)
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSlice, true, []*types.Type{slice.Type}, typ, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCap, true, []*types.Type{slice.Type}, typ, p, l)[0]
+		}
+	} else {
+		elemSize := s.constInt(types.Types[types.TUINTPTR], et.Size())
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoScan, true, []*types.Type{slice.Type}, elemSize, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCapNoScan, true, []*types.Type{slice.Type}, elemSize, p, l)[0]
+		}
+	}
+	// Decompose output slice
+	s.vars[ptrVar] = s.newValue1(ssa.OpSlicePtr, pt, newSlice)
+	s.vars[lenVar] = s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], newSlice)
+	s.vars[capVar] = s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], newSlice)
+	b = s.endBlock()
+	b.AddEdgeTo(mergeBlock)
+
+	// Merge fast path (no moving) and slow path (moved)
+	s.startBlock(mergeBlock)
+	p = s.variable(ptrVar, pt)                      // generates phi for ptr
+	l = s.variable(lenVar, types.Types[types.TINT]) // generates phi for len
+	c = s.variable(capVar, types.Types[types.TINT]) // generates phi for cap
+	delete(s.vars, ptrVar)
+	delete(s.vars, lenVar)
+	delete(s.vars, capVar)
+	return s.newValue3(ssa.OpSliceMake, slice.Type, p, l, c)
+}
+
 // minMax converts an OMIN/OMAX builtin call into SSA.
 func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
 	// The OMIN/OMAX builtin is variadic, but its semantics are
diff --git a/src/cmd/compile/internal/test/testdata/arith_test.go b/src/cmd/compile/internal/test/testdata/arith_test.go
index 8984cd3e26..34ac73c068 100644
--- a/src/cmd/compile/internal/test/testdata/arith_test.go
+++ b/src/cmd/compile/internal/test/testdata/arith_test.go
@@ -445,6 +445,19 @@ func testBitwiseRshU_ssa(a uint32, b, c uint32) uint32 {
 }
 
 //go:noinline
+func orLt_ssa(x int) bool {
+	y := x - x
+	return (x | 2) < y
+}
+
+// test riscv64 SLTI rules
+func testSetIfLessThan(t *testing.T) {
+	if want, got := true, orLt_ssa(-7); got != want {
+		t.Errorf("orLt_ssa(-7) = %t want %t", got, want)
+	}
+}
+
+//go:noinline
 func testShiftCX_ssa() int {
 	v1 := uint8(3)
 	v4 := (v1 * v1) ^ v1 | v1 - v1 - v1&v1 ^ uint8(3+2) + v1*1>>0 - v1 | 1 | v1<<(2*3|0-0*0^1)
@@ -977,6 +990,7 @@ func TestArithmetic(t *testing.T) {
 	testRegallocCVSpill(t)
 	testSubqToNegq(t)
 	testBitwiseLogic(t)
+	testSetIfLessThan(t)
 	testOcom(t)
 	testLrot(t)
 	testShiftCX(t)
diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
index 3c9707252e..3e2324b528 100644
--- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go
+++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
@@ -195,6 +195,7 @@ func makeslice(typ *byte, len int, cap int) unsafe.Pointer
 func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer
 func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer
 func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any)
+func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any)
 func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64)
 func panicunsafeslicelen()
 func panicunsafeslicenilptr()
@@ -202,6 +203,11 @@ func unsafestringcheckptr(ptr unsafe.Pointer, len int64)
 func panicunsafestringlen()
 func panicunsafestringnilptr()
 
+func moveSlice(typ *byte, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoScan(elemSize uintptr, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoCap(typ *byte, old *byte, len int) (*byte, int, int)
+func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int)
+
 func memmove(to *any, frm *any, length uintptr)
 func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
 func memclrHasPointers(ptr unsafe.Pointer, n uintptr)
diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go
index eea7fd7d05..537de2cbe9 100644
--- a/src/cmd/compile/internal/typecheck/builtin.go
+++ b/src/cmd/compile/internal/typecheck/builtin.go
@@ -160,80 +160,85 @@ var runtimeDecls = [...]struct {
 	{"makeslice64", funcTag, 124},
 	{"makeslicecopy", funcTag, 125},
 	{"growslice", funcTag, 127},
-	{"unsafeslicecheckptr", funcTag, 128},
+	{"growsliceBuf", funcTag, 128},
+	{"unsafeslicecheckptr", funcTag, 129},
 	{"panicunsafeslicelen", funcTag, 9},
 	{"panicunsafeslicenilptr", funcTag, 9},
-	{"unsafestringcheckptr", funcTag, 129},
+	{"unsafestringcheckptr", funcTag, 130},
 	{"panicunsafestringlen", funcTag, 9},
 	{"panicunsafestringnilptr", funcTag, 9},
-	{"memmove", funcTag, 130},
-	{"memclrNoHeapPointers", funcTag, 131},
-	{"memclrHasPointers", funcTag, 131},
-	{"memequal", funcTag, 132},
-	{"memequal0", funcTag, 133},
-	{"memequal8", funcTag, 133},
-	{"memequal16", funcTag, 133},
-	{"memequal32", funcTag, 133},
-	{"memequal64", funcTag, 133},
-	{"memequal128", funcTag, 133},
-	{"f32equal", funcTag, 134},
-	{"f64equal", funcTag, 134},
-	{"c64equal", funcTag, 134},
-	{"c128equal", funcTag, 134},
-	{"strequal", funcTag, 134},
-	{"interequal", funcTag, 134},
-	{"nilinterequal", funcTag, 134},
-	{"memhash", funcTag, 135},
-	{"memhash0", funcTag, 136},
-	{"memhash8", funcTag, 136},
-	{"memhash16", funcTag, 136},
-	{"memhash32", funcTag, 136},
-	{"memhash64", funcTag, 136},
-	{"memhash128", funcTag, 136},
-	{"f32hash", funcTag, 137},
-	{"f64hash", funcTag, 137},
-	{"c64hash", funcTag, 137},
-	{"c128hash", funcTag, 137},
-	{"strhash", funcTag, 137},
-	{"interhash", funcTag, 137},
-	{"nilinterhash", funcTag, 137},
-	{"int64div", funcTag, 138},
-	{"uint64div", funcTag, 139},
-	{"int64mod", funcTag, 138},
-	{"uint64mod", funcTag, 139},
-	{"float64toint64", funcTag, 140},
-	{"float64touint64", funcTag, 141},
-	{"float64touint32", funcTag, 142},
-	{"int64tofloat64", funcTag, 143},
-	{"int64tofloat32", funcTag, 144},
-	{"uint64tofloat64", funcTag, 145},
-	{"uint64tofloat32", funcTag, 146},
-	{"uint32tofloat64", funcTag, 147},
-	{"complex128div", funcTag, 148},
+	{"moveSlice", funcTag, 131},
+	{"moveSliceNoScan", funcTag, 132},
+	{"moveSliceNoCap", funcTag, 133},
+	{"moveSliceNoCapNoScan", funcTag, 134},
+	{"memmove", funcTag, 135},
+	{"memclrNoHeapPointers", funcTag, 136},
+	{"memclrHasPointers", funcTag, 136},
+	{"memequal", funcTag, 137},
+	{"memequal0", funcTag, 138},
+	{"memequal8", funcTag, 138},
+	{"memequal16", funcTag, 138},
+	{"memequal32", funcTag, 138},
+	{"memequal64", funcTag, 138},
+	{"memequal128", funcTag, 138},
+	{"f32equal", funcTag, 139},
+	{"f64equal", funcTag, 139},
+	{"c64equal", funcTag, 139},
+	{"c128equal", funcTag, 139},
+	{"strequal", funcTag, 139},
+	{"interequal", funcTag, 139},
+	{"nilinterequal", funcTag, 139},
+	{"memhash", funcTag, 140},
+	{"memhash0", funcTag, 141},
+	{"memhash8", funcTag, 141},
+	{"memhash16", funcTag, 141},
+	{"memhash32", funcTag, 141},
+	{"memhash64", funcTag, 141},
+	{"memhash128", funcTag, 141},
+	{"f32hash", funcTag, 142},
+	{"f64hash", funcTag, 142},
+	{"c64hash", funcTag, 142},
+	{"c128hash", funcTag, 142},
+	{"strhash", funcTag, 142},
+	{"interhash", funcTag, 142},
+	{"nilinterhash", funcTag, 142},
+	{"int64div", funcTag, 143},
+	{"uint64div", funcTag, 144},
+	{"int64mod", funcTag, 143},
+	{"uint64mod", funcTag, 144},
+	{"float64toint64", funcTag, 145},
+	{"float64touint64", funcTag, 146},
+	{"float64touint32", funcTag, 147},
+	{"int64tofloat64", funcTag, 148},
+	{"int64tofloat32", funcTag, 149},
+	{"uint64tofloat64", funcTag, 150},
+	{"uint64tofloat32", funcTag, 151},
+	{"uint32tofloat64", funcTag, 152},
+	{"complex128div", funcTag, 153},
 	{"racefuncenter", funcTag, 33},
 	{"racefuncexit", funcTag, 9},
 	{"raceread", funcTag, 33},
 	{"racewrite", funcTag, 33},
-	{"racereadrange", funcTag, 149},
-	{"racewriterange", funcTag, 149},
-	{"msanread", funcTag, 149},
-	{"msanwrite", funcTag, 149},
-	{"msanmove", funcTag, 150},
-	{"asanread", funcTag, 149},
-	{"asanwrite", funcTag, 149},
-	{"checkptrAlignment", funcTag, 151},
-	{"checkptrArithmetic", funcTag, 153},
-	{"libfuzzerTraceCmp1", funcTag, 154},
-	{"libfuzzerTraceCmp2", funcTag, 155},
-	{"libfuzzerTraceCmp4", funcTag, 156},
-	{"libfuzzerTraceCmp8", funcTag, 157},
-	{"libfuzzerTraceConstCmp1", funcTag, 154},
-	{"libfuzzerTraceConstCmp2", funcTag, 155},
-	{"libfuzzerTraceConstCmp4", funcTag, 156},
-	{"libfuzzerTraceConstCmp8", funcTag, 157},
-	{"libfuzzerHookStrCmp", funcTag, 158},
-	{"libfuzzerHookEqualFold", funcTag, 158},
-	{"addCovMeta", funcTag, 160},
+	{"racereadrange", funcTag, 154},
+	{"racewriterange", funcTag, 154},
+	{"msanread", funcTag, 154},
+	{"msanwrite", funcTag, 154},
+	{"msanmove", funcTag, 155},
+	{"asanread", funcTag, 154},
+	{"asanwrite", funcTag, 154},
+	{"checkptrAlignment", funcTag, 156},
+	{"checkptrArithmetic", funcTag, 158},
+	{"libfuzzerTraceCmp1", funcTag, 159},
+	{"libfuzzerTraceCmp2", funcTag, 160},
+	{"libfuzzerTraceCmp4", funcTag, 161},
+	{"libfuzzerTraceCmp8", funcTag, 162},
+	{"libfuzzerTraceConstCmp1", funcTag, 159},
+	{"libfuzzerTraceConstCmp2", funcTag, 160},
+	{"libfuzzerTraceConstCmp4", funcTag, 161},
+	{"libfuzzerTraceConstCmp8", funcTag, 162},
+	{"libfuzzerHookStrCmp", funcTag, 163},
+	{"libfuzzerHookEqualFold", funcTag, 163},
+	{"addCovMeta", funcTag, 165},
 	{"x86HasAVX", varTag, 6},
 	{"x86HasFMA", varTag, 6},
 	{"x86HasPOPCNT", varTag, 6},
@@ -244,11 +249,11 @@ var runtimeDecls = [...]struct {
 	{"loong64HasLAM_BH", varTag, 6},
 	{"loong64HasLSX", varTag, 6},
 	{"riscv64HasZbb", varTag, 6},
-	{"asanregisterglobals", funcTag, 131},
+	{"asanregisterglobals", funcTag, 136},
 }
 
 func runtimeTypes() []*types.Type {
-	var typs [161]*types.Type
+	var typs [166]*types.Type
 	typs[0] = types.ByteType
 	typs[1] = types.NewPtr(typs[0])
 	typs[2] = types.Types[types.TANY]
@@ -377,39 +382,44 @@ func runtimeTypes() []*types.Type {
 	typs[125] = newSig(params(typs[1], typs[13], typs[13], typs[7]), params(typs[7]))
 	typs[126] = types.NewSlice(typs[2])
 	typs[127] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1]), params(typs[126]))
-	typs[128] = newSig(params(typs[1], typs[7], typs[22]), nil)
-	typs[129] = newSig(params(typs[7], typs[22]), nil)
-	typs[130] = newSig(params(typs[3], typs[3], typs[5]), nil)
-	typs[131] = newSig(params(typs[7], typs[5]), nil)
-	typs[132] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
-	typs[133] = newSig(params(typs[3], typs[3]), params(typs[6]))
-	typs[134] = newSig(params(typs[7], typs[7]), params(typs[6]))
-	typs[135] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
-	typs[136] = newSig(params(typs[7], typs[5]), params(typs[5]))
-	typs[137] = newSig(params(typs[3], typs[5]), params(typs[5]))
-	typs[138] = newSig(params(typs[22], typs[22]), params(typs[22]))
-	typs[139] = newSig(params(typs[24], typs[24]), params(typs[24]))
-	typs[140] = newSig(params(typs[18]), params(typs[22]))
-	typs[141] = newSig(params(typs[18]), params(typs[24]))
-	typs[142] = newSig(params(typs[18]), params(typs[67]))
-	typs[143] = newSig(params(typs[22]), params(typs[18]))
-	typs[144] = newSig(params(typs[22]), params(typs[20]))
-	typs[145] = newSig(params(typs[24]), params(typs[18]))
-	typs[146] = newSig(params(typs[24]), params(typs[20]))
-	typs[147] = newSig(params(typs[67]), params(typs[18]))
-	typs[148] = newSig(params(typs[26], typs[26]), params(typs[26]))
-	typs[149] = newSig(params(typs[5], typs[5]), nil)
-	typs[150] = newSig(params(typs[5], typs[5], typs[5]), nil)
-	typs[151] = newSig(params(typs[7], typs[1], typs[5]), nil)
-	typs[152] = types.NewSlice(typs[7])
-	typs[153] = newSig(params(typs[7], typs[152]), nil)
-	typs[154] = newSig(params(typs[71], typs[71], typs[15]), nil)
-	typs[155] = newSig(params(typs[65], typs[65], typs[15]), nil)
-	typs[156] = newSig(params(typs[67], typs[67], typs[15]), nil)
-	typs[157] = newSig(params(typs[24], typs[24], typs[15]), nil)
-	typs[158] = newSig(params(typs[30], typs[30], typs[15]), nil)
-	typs[159] = types.NewArray(typs[0], 16)
-	typs[160] = newSig(params(typs[7], typs[67], typs[159], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
+	typs[128] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1], typs[3], typs[13]), params(typs[126]))
+	typs[129] = newSig(params(typs[1], typs[7], typs[22]), nil)
+	typs[130] = newSig(params(typs[7], typs[22]), nil)
+	typs[131] = newSig(params(typs[1], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[132] = newSig(params(typs[5], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[133] = newSig(params(typs[1], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[134] = newSig(params(typs[5], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[135] = newSig(params(typs[3], typs[3], typs[5]), nil)
+	typs[136] = newSig(params(typs[7], typs[5]), nil)
+	typs[137] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
+	typs[138] = newSig(params(typs[3], typs[3]), params(typs[6]))
+	typs[139] = newSig(params(typs[7], typs[7]), params(typs[6]))
+	typs[140] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
+	typs[141] = newSig(params(typs[7], typs[5]), params(typs[5]))
+	typs[142] = newSig(params(typs[3], typs[5]), params(typs[5]))
+	typs[143] = newSig(params(typs[22], typs[22]), params(typs[22]))
+	typs[144] = newSig(params(typs[24], typs[24]), params(typs[24]))
+	typs[145] = newSig(params(typs[18]), params(typs[22]))
+	typs[146] = newSig(params(typs[18]), params(typs[24]))
+	typs[147] = newSig(params(typs[18]), params(typs[67]))
+	typs[148] = newSig(params(typs[22]), params(typs[18]))
+	typs[149] = newSig(params(typs[22]), params(typs[20]))
+	typs[150] = newSig(params(typs[24]), params(typs[18]))
+	typs[151] = newSig(params(typs[24]), params(typs[20]))
+	typs[152] = newSig(params(typs[67]), params(typs[18]))
+	typs[153] = newSig(params(typs[26], typs[26]), params(typs[26]))
+	typs[154] = newSig(params(typs[5], typs[5]), nil)
+	typs[155] = newSig(params(typs[5], typs[5], typs[5]), nil)
+	typs[156] = newSig(params(typs[7], typs[1], typs[5]), nil)
+	typs[157] = types.NewSlice(typs[7])
+	typs[158] = newSig(params(typs[7], typs[157]), nil)
+	typs[159] = newSig(params(typs[71], typs[71], typs[15]), nil)
+	typs[160] = newSig(params(typs[65], typs[65], typs[15]), nil)
+	typs[161] = newSig(params(typs[67], typs[67], typs[15]), nil)
+	typs[162] = newSig(params(typs[24], typs[24], typs[15]), nil)
+	typs[163] = newSig(params(typs[30], typs[30], typs[15]), nil)
+	typs[164] = types.NewArray(typs[0], 16)
+	typs[165] = newSig(params(typs[7], typs[67], typs[164], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
 	return typs[:]
 }
 
diff --git a/src/cmd/compile/internal/types/type.go b/src/cmd/compile/internal/types/type.go
index e8aca90081..6663c49dd8 100644
--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@@ -1842,26 +1842,7 @@ func IsReflexive(t *Type) bool {
 // Can this type be stored directly in an interface word?
 // Yes, if the representation is a single pointer.
 func IsDirectIface(t *Type) bool {
-	switch t.Kind() {
-	case TPTR:
-		// Pointers to notinheap types must be stored indirectly. See issue 42076.
-		return !t.Elem().NotInHeap()
-	case TCHAN,
-		TMAP,
-		TFUNC,
-		TUNSAFEPTR:
-		return true
-
-	case TARRAY:
-		// Array of 1 direct iface type can be direct.
-		return t.NumElem() == 1 && IsDirectIface(t.Elem())
-
-	case TSTRUCT:
-		// Struct with 1 field of direct iface type can be direct.
-		return t.NumFields() == 1 && IsDirectIface(t.Field(0).Type)
-	}
-
-	return false
+	return t.Size() == int64(PtrSize) && PtrDataSize(t) == int64(PtrSize)
 }
 
 // IsInterfaceMethod reports whether (field) m is
diff --git a/src/cmd/compile/internal/types2/check.go b/src/cmd/compile/internal/types2/check.go
index 25cda4f73d..42218b4caf 100644
--- a/src/cmd/compile/internal/types2/check.go
+++ b/src/cmd/compile/internal/types2/check.go
@@ -171,12 +171,13 @@ type Checker struct {
 	usedPkgNames  map[*PkgName]bool          // set of used package names
 	mono          monoGraph                  // graph for detecting non-monomorphizable instantiation loops
 
-	firstErr error                    // first error encountered
-	methods  map[*TypeName][]*Func    // maps package scope type names to associated non-blank (non-interface) methods
-	untyped  map[syntax.Expr]exprInfo // map of expressions without final type
-	delayed  []action                 // stack of delayed action segments; segments are processed in FIFO order
-	objPath  []Object                 // path of object dependencies during type inference (for cycle reporting)
-	cleaners []cleaner                // list of types that may need a final cleanup at the end of type-checking
+	firstErr   error                    // first error encountered
+	methods    map[*TypeName][]*Func    // maps package scope type names to associated non-blank (non-interface) methods
+	untyped    map[syntax.Expr]exprInfo // map of expressions without final type
+	delayed    []action                 // stack of delayed action segments; segments are processed in FIFO order
+	objPath    []Object                 // path of object dependencies during type-checking (for cycle reporting)
+	objPathIdx map[Object]int           // map of object to object path index during type-checking (for cycle reporting)
+	cleaners   []cleaner                // list of types that may need a final cleanup at the end of type-checking
 
 	// environment within which the current object is type-checked (valid only
 	// for the duration of type-checking a specific object)
@@ -248,19 +249,22 @@ func (check *Checker) later(f func()) *action {
 	return &check.delayed[i]
 }
 
-// push pushes obj onto the object path and returns its index in the path.
-func (check *Checker) push(obj Object) int {
+// push pushes obj onto the object path and records its index in the path index map.
+func (check *Checker) push(obj Object) {
+	if check.objPathIdx == nil {
+		check.objPathIdx = make(map[Object]int)
+	}
+	check.objPathIdx[obj] = len(check.objPath)
 	check.objPath = append(check.objPath, obj)
-	return len(check.objPath) - 1
 }
 
-// pop pops and returns the topmost object from the object path.
-func (check *Checker) pop() Object {
+// pop pops an object from the object path and removes it from the path index map.
+func (check *Checker) pop() {
 	i := len(check.objPath) - 1
 	obj := check.objPath[i]
-	check.objPath[i] = nil
+	check.objPath[i] = nil // help the garbage collector
 	check.objPath = check.objPath[:i]
-	return obj
+	delete(check.objPathIdx, obj)
 }
 
 type cleaner interface {
@@ -319,6 +323,7 @@ func (check *Checker) initFiles(files []*syntax.File) {
 	check.untyped = nil
 	check.delayed = nil
 	check.objPath = nil
+	check.objPathIdx = nil
 	check.cleaners = nil
 
 	// We must initialize usedVars and usedPkgNames both here and in NewChecker,
diff --git a/src/cmd/compile/internal/types2/cycles.go b/src/cmd/compile/internal/types2/cycles.go
index fa739a2b84..b916219c97 100644
--- a/src/cmd/compile/internal/types2/cycles.go
+++ b/src/cmd/compile/internal/types2/cycles.go
@@ -54,7 +54,6 @@ func (check *Checker) directCycle(tname *TypeName, pathIdx map[*TypeName]int) {
 			// tname is marked grey - we have a cycle on the path beginning at start.
 			// Mark tname as invalid.
 			tname.setType(Typ[Invalid])
-			tname.setColor(black)
 
 			// collect type names on cycle
 			var cycle []Object
diff --git a/src/cmd/compile/internal/types2/decl.go b/src/cmd/compile/internal/types2/decl.go
index 91d2492a53..5cb52fdbe4 100644
--- a/src/cmd/compile/internal/types2/decl.go
+++ b/src/cmd/compile/internal/types2/decl.go
@@ -62,114 +62,77 @@ func (check *Checker) objDecl(obj Object, def *TypeName) {
 		if check.indent == 0 {
 			fmt.Println() // empty line between top-level objects for readability
 		}
-		check.trace(obj.Pos(), "-- checking %s (%s, objPath = %s)", obj, obj.color(), pathString(check.objPath))
+		check.trace(obj.Pos(), "-- checking %s (objPath = %s)", obj, pathString(check.objPath))
 		check.indent++
 		defer func() {
 			check.indent--
-			check.trace(obj.Pos(), "=> %s (%s)", obj, obj.color())
+			check.trace(obj.Pos(), "=> %s", obj)
 		}()
 	}
 
-	// Checking the declaration of obj means inferring its type
-	// (and possibly its value, for constants).
-	// An object's type (and thus the object) may be in one of
-	// three states which are expressed by colors:
+	// Checking the declaration of an object means determining its type
+	// (and also its value for constants). An object (and thus its type)
+	// may be in 1 of 3 states:
 	//
-	// - an object whose type is not yet known is painted white (initial color)
-	// - an object whose type is in the process of being inferred is painted grey
-	// - an object whose type is fully inferred is painted black
+	// - not in Checker.objPathIdx and type == nil : type is not yet known (white)
+	// -     in Checker.objPathIdx                 : type is pending       (grey)
+	// - not in Checker.objPathIdx and type != nil : type is known         (black)
 	//
-	// During type inference, an object's color changes from white to grey
-	// to black (pre-declared objects are painted black from the start).
-	// A black object (i.e., its type) can only depend on (refer to) other black
-	// ones. White and grey objects may depend on white and black objects.
-	// A dependency on a grey object indicates a cycle which may or may not be
-	// valid.
+	// During type-checking, an object changes from white to grey to black.
+	// Predeclared objects start as black (their type is known without checking).
 	//
-	// When objects turn grey, they are pushed on the object path (a stack);
-	// they are popped again when they turn black. Thus, if a grey object (a
-	// cycle) is encountered, it is on the object path, and all the objects
-	// it depends on are the remaining objects on that path. Color encoding
-	// is such that the color value of a grey object indicates the index of
-	// that object in the object path.
-
-	// During type-checking, white objects may be assigned a type without
-	// traversing through objDecl; e.g., when initializing constants and
-	// variables. Update the colors of those objects here (rather than
-	// everywhere where we set the type) to satisfy the color invariants.
-	if obj.color() == white && obj.Type() != nil {
-		obj.setColor(black)
-		return
-	}
-
-	switch obj.color() {
-	case white:
-		assert(obj.Type() == nil)
-		// All color values other than white and black are considered grey.
-		// Because black and white are < grey, all values >= grey are grey.
-		// Use those values to encode the object's index into the object path.
-		obj.setColor(grey + color(check.push(obj)))
-		defer func() {
-			check.pop().setColor(black)
-		}()
-
-	case black:
-		assert(obj.Type() != nil)
-		return
-
-	default:
-		// Color values other than white or black are considered grey.
-		fallthrough
+	// A black object may only depend on (refer to) to other black objects. White
+	// and grey objects may depend on white or black objects. A dependency on a
+	// grey object indicates a (possibly invalid) cycle.
+	//
+	// When an object is marked grey, it is pushed onto the object path (a stack)
+	// and its index in the path is recorded in the path index map. It is popped
+	// and removed from the map when its type is determined (and marked black).
 
-	case grey:
-		// We have a (possibly invalid) cycle.
-		// In the existing code, this is marked by a non-nil type
-		// for the object except for constants and variables whose
-		// type may be non-nil (known), or nil if it depends on the
-		// not-yet known initialization value.
-		// In the former case, set the type to Typ[Invalid] because
-		// we have an initialization cycle. The cycle error will be
-		// reported later, when determining initialization order.
-		// TODO(gri) Report cycle here and simplify initialization
-		// order code.
+	// If this object is grey, we have a (possibly invalid) cycle. This is signaled
+	// by a non-nil type for the object, except for constants and variables whose
+	// type may be non-nil (known), or nil if it depends on a not-yet known
+	// initialization value.
+	//
+	// In the former case, set the type to Typ[Invalid] because we have an
+	// initialization cycle. The cycle error will be reported later, when
+	// determining initialization order.
+	//
+	// TODO(gri) Report cycle here and simplify initialization order code.
+	if _, ok := check.objPathIdx[obj]; ok {
 		switch obj := obj.(type) {
-		case *Const:
-			if !check.validCycle(obj) || obj.typ == nil {
-				obj.typ = Typ[Invalid]
-			}
-
-		case *Var:
-			if !check.validCycle(obj) || obj.typ == nil {
-				obj.typ = Typ[Invalid]
+		case *Const, *Var:
+			if !check.validCycle(obj) || obj.Type() == nil {
+				obj.setType(Typ[Invalid])
 			}
-
 		case *TypeName:
 			if !check.validCycle(obj) {
-				// break cycle
-				// (without this, calling underlying()
-				// below may lead to an endless loop
-				// if we have a cycle for a defined
-				// (*Named) type)
-				obj.typ = Typ[Invalid]
+				obj.setType(Typ[Invalid])
 			}
-
 		case *Func:
 			if !check.validCycle(obj) {
-				// Don't set obj.typ to Typ[Invalid] here
-				// because plenty of code type-asserts that
-				// functions have a *Signature type. Grey
-				// functions have their type set to an empty
-				// signature which makes it impossible to
+				// Don't set type to Typ[Invalid]; plenty of code asserts that
+				// functions have a *Signature type. Instead, leave the type
+				// as an empty signature, which makes it impossible to
 				// initialize a variable with the function.
 			}
-
 		default:
 			panic("unreachable")
 		}
+
 		assert(obj.Type() != nil)
 		return
 	}
 
+	if obj.Type() != nil { // black, meaning it's already type-checked
+		return
+	}
+
+	// white, meaning it must be type-checked
+
+	check.push(obj)
+	defer check.pop()
+
 	d := check.objMap[obj]
 	if d == nil {
 		check.dump("%v: %s should have been declared", obj.Pos(), obj)
@@ -221,8 +184,8 @@ func (check *Checker) validCycle(obj Object) (valid bool) {
 	}
 
 	// Count cycle objects.
-	assert(obj.color() >= grey)
-	start := obj.color() - grey // index of obj in objPath
+	start, found := check.objPathIdx[obj]
+	assert(found)
 	cycle := check.objPath[start:]
 	tparCycle := false // if set, the cycle is through a type parameter list
 	nval := 0          // number of (constant or variable) values in the cycle
@@ -532,11 +495,16 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN
 				check.collectTypeParams(&alias.tparams, tdecl.TParamList)
 			}
 
-			rhs = check.definedType(tdecl.Type, obj)
+			rhs = check.declaredType(tdecl.Type, obj)
 			assert(rhs != nil)
-
 			alias.fromRHS = rhs
-			unalias(alias) // populate alias.actual
+
+			// spec: In an alias declaration the given type cannot be a type parameter declared in the same declaration."
+			// (see also go.dev/issue/75884, go.dev/issue/#75885)
+			if tpar, ok := rhs.(*TypeParam); ok && alias.tparams != nil && slices.Index(alias.tparams.list(), tpar) >= 0 {
+				check.error(tdecl.Type, MisplacedTypeParam, "cannot use type parameter declared in alias declaration as RHS")
+				alias.fromRHS = Typ[Invalid]
+			}
 		} else {
 			if !versionErr && tparam0 != nil {
 				check.error(tdecl, UnsupportedFeature, "generic type alias requires GODEBUG=gotypesalias=1 or unset")
@@ -576,7 +544,7 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN
 		check.collectTypeParams(&named.tparams, tdecl.TParamList)
 	}
 
-	rhs = check.definedType(tdecl.Type, obj)
+	rhs = check.declaredType(tdecl.Type, obj)
 	assert(rhs != nil)
 	named.fromRHS = rhs
 
@@ -764,17 +732,8 @@ func (check *Checker) funcDecl(obj *Func, decl *declInfo) {
 	sig := new(Signature)
 	obj.typ = sig // guard against cycles
 
-	// Avoid cycle error when referring to method while type-checking the signature.
-	// This avoids a nuisance in the best case (non-parameterized receiver type) and
-	// since the method is not a type, we get an error. If we have a parameterized
-	// receiver type, instantiating the receiver type leads to the instantiation of
-	// its methods, and we don't want a cycle error in that case.
-	// TODO(gri) review if this is correct and/or whether we still need this?
-	saved := obj.color_
-	obj.color_ = black
 	fdecl := decl.fdecl
 	check.funcType(sig, fdecl.Recv, fdecl.TParamList, fdecl.Type)
-	obj.color_ = saved
 
 	// Set the scope's extent to the complete "func (...) { ... }"
 	// so that Scope.Innermost works correctly.
@@ -921,10 +880,9 @@ func (check *Checker) declStmt(list []syntax.Decl) {
 			// the innermost containing block."
 			scopePos := s.Name.Pos()
 			check.declare(check.scope, s.Name, obj, scopePos)
-			// mark and unmark type before calling typeDecl; its type is still nil (see Checker.objDecl)
-			obj.setColor(grey + color(check.push(obj)))
+			check.push(obj) // mark as grey
 			check.typeDecl(obj, s, nil)
-			check.pop().setColor(black)
+			check.pop()
 
 		default:
 			check.errorf(s, InvalidSyntaxTree, "unknown syntax.Decl node %T", s)
diff --git a/src/cmd/compile/internal/types2/object.go b/src/cmd/compile/internal/types2/object.go
index ce129dbf59..dd2d415790 100644
--- a/src/cmd/compile/internal/types2/object.go
+++ b/src/cmd/compile/internal/types2/object.go
@@ -42,18 +42,12 @@ type Object interface {
 	// 0 for all other objects (including objects in file scopes).
 	order() uint32
 
-	// color returns the object's color.
-	color() color
-
 	// setType sets the type of the object.
 	setType(Type)
 
 	// setOrder sets the order number of the object. It must be > 0.
 	setOrder(uint32)
 
-	// setColor sets the object's color. It must not be white.
-	setColor(color color)
-
 	// setParent sets the parent scope of the object.
 	setParent(*Scope)
 
@@ -102,41 +96,9 @@ type object struct {
 	name      string
 	typ       Type
 	order_    uint32
-	color_    color
 	scopePos_ syntax.Pos
 }
 
-// color encodes the color of an object (see Checker.objDecl for details).
-type color uint32
-
-// An object may be painted in one of three colors.
-// Color values other than white or black are considered grey.
-const (
-	white color = iota
-	black
-	grey // must be > white and black
-)
-
-func (c color) String() string {
-	switch c {
-	case white:
-		return "white"
-	case black:
-		return "black"
-	default:
-		return "grey"
-	}
-}
-
-// colorFor returns the (initial) color for an object depending on
-// whether its type t is known or not.
-func colorFor(t Type) color {
-	if t != nil {
-		return black
-	}
-	return white
-}
-
 // Parent returns the scope in which the object is declared.
 // The result is nil for methods and struct fields.
 func (obj *object) Parent() *Scope { return obj.parent }
@@ -164,13 +126,11 @@ func (obj *object) Id() string { return Id(obj.pkg, obj.name) }
 
 func (obj *object) String() string       { panic("abstract") }
 func (obj *object) order() uint32        { return obj.order_ }
-func (obj *object) color() color         { return obj.color_ }
 func (obj *object) scopePos() syntax.Pos { return obj.scopePos_ }
 
 func (obj *object) setParent(parent *Scope)    { obj.parent = parent }
 func (obj *object) setType(typ Type)           { obj.typ = typ }
 func (obj *object) setOrder(order uint32)      { assert(order > 0); obj.order_ = order }
-func (obj *object) setColor(color color)       { assert(color != white); obj.color_ = color }
 func (obj *object) setScopePos(pos syntax.Pos) { obj.scopePos_ = pos }
 
 func (obj *object) sameId(pkg *Package, name string, foldCase bool) bool {
@@ -247,7 +207,7 @@ type PkgName struct {
 // NewPkgName returns a new PkgName object representing an imported package.
 // The remaining arguments set the attributes found with all Objects.
 func NewPkgName(pos syntax.Pos, pkg *Package, name string, imported *Package) *PkgName {
-	return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, black, nopos}, imported}
+	return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, nopos}, imported}
 }
 
 // Imported returns the package that was imported.
@@ -263,7 +223,7 @@ type Const struct {
 // NewConst returns a new constant with value val.
 // The remaining arguments set the attributes found with all Objects.
 func NewConst(pos syntax.Pos, pkg *Package, name string, typ Type, val constant.Value) *Const {
-	return &Const{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, val}
+	return &Const{object{nil, pos, pkg, name, typ, 0, nopos}, val}
 }
 
 // Val returns the constant's value.
@@ -288,7 +248,7 @@ type TypeName struct {
 // argument for NewNamed, which will set the TypeName's type as a side-
 // effect.
 func NewTypeName(pos syntax.Pos, pkg *Package, name string, typ Type) *TypeName {
-	return &TypeName{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}}
+	return &TypeName{object{nil, pos, pkg, name, typ, 0, nopos}}
 }
 
 // NewTypeNameLazy returns a new defined type like NewTypeName, but it
@@ -402,7 +362,7 @@ func NewField(pos syntax.Pos, pkg *Package, name string, typ Type, embedded bool
 // newVar returns a new variable.
 // The arguments set the attributes found with all Objects.
 func newVar(kind VarKind, pos syntax.Pos, pkg *Package, name string, typ Type) *Var {
-	return &Var{object: object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, kind: kind}
+	return &Var{object: object{nil, pos, pkg, name, typ, 0, nopos}, kind: kind}
 }
 
 // Anonymous reports whether the variable is an embedded field.
@@ -452,7 +412,7 @@ func NewFunc(pos syntax.Pos, pkg *Package, name string, sig *Signature) *Func {
 		// as this would violate object.{Type,color} invariants.
 		// TODO(adonovan): propose to disallow NewFunc with nil *Signature.
 	}
-	return &Func{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, false, nil}
+	return &Func{object{nil, pos, pkg, name, typ, 0, nopos}, false, nil}
 }
 
 // Signature returns the signature (type) of the function or method.
@@ -534,7 +494,7 @@ type Label struct {
 
 // NewLabel returns a new label.
 func NewLabel(pos syntax.Pos, pkg *Package, name string) *Label {
-	return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid], color_: black}, false}
+	return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid]}, false}
 }
 
 // A Builtin represents a built-in function.
@@ -545,7 +505,7 @@ type Builtin struct {
 }
 
 func newBuiltin(id builtinId) *Builtin {
-	return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid], color_: black}, id}
+	return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid]}, id}
 }
 
 // Nil represents the predeclared value nil.
diff --git a/src/cmd/compile/internal/types2/scope.go b/src/cmd/compile/internal/types2/scope.go
index 566184df73..10c624be2e 100644
--- a/src/cmd/compile/internal/types2/scope.go
+++ b/src/cmd/compile/internal/types2/scope.go
@@ -217,10 +217,8 @@ func (*lazyObject) Exported() bool                     { panic("unreachable") }
 func (*lazyObject) Id() string                         { panic("unreachable") }
 func (*lazyObject) String() string                     { panic("unreachable") }
 func (*lazyObject) order() uint32                      { panic("unreachable") }
-func (*lazyObject) color() color                       { panic("unreachable") }
 func (*lazyObject) setType(Type)                       { panic("unreachable") }
 func (*lazyObject) setOrder(uint32)                    { panic("unreachable") }
-func (*lazyObject) setColor(color color)               { panic("unreachable") }
 func (*lazyObject) setParent(*Scope)                   { panic("unreachable") }
 func (*lazyObject) sameId(*Package, string, bool) bool { panic("unreachable") }
 func (*lazyObject) scopePos() syntax.Pos               { panic("unreachable") }
diff --git a/src/cmd/compile/internal/types2/sizeof_test.go b/src/cmd/compile/internal/types2/sizeof_test.go
index 092e82318a..f206c12fc3 100644
--- a/src/cmd/compile/internal/types2/sizeof_test.go
+++ b/src/cmd/compile/internal/types2/sizeof_test.go
@@ -36,14 +36,14 @@ func TestSizeof(t *testing.T) {
 		{term{}, 12, 24},
 
 		// Objects
-		{PkgName{}, 60, 96},
-		{Const{}, 64, 104},
-		{TypeName{}, 56, 88},
-		{Var{}, 64, 104},
-		{Func{}, 64, 104},
-		{Label{}, 60, 96},
-		{Builtin{}, 60, 96},
-		{Nil{}, 56, 88},
+		{PkgName{}, 56, 96},
+		{Const{}, 60, 104},
+		{TypeName{}, 52, 88},
+		{Var{}, 60, 104},
+		{Func{}, 60, 104},
+		{Label{}, 56, 96},
+		{Builtin{}, 56, 96},
+		{Nil{}, 52, 88},
 
 		// Misc
 		{Scope{}, 60, 104},
diff --git a/src/cmd/compile/internal/types2/typexpr.go b/src/cmd/compile/internal/types2/typexpr.go
index 8601ce6277..303f782ac4 100644
--- a/src/cmd/compile/internal/types2/typexpr.go
+++ b/src/cmd/compile/internal/types2/typexpr.go
@@ -16,7 +16,7 @@ import (
 
 // ident type-checks identifier e and initializes x with the value or type of e.
 // If an error occurred, x.mode is set to invalid.
-// For the meaning of def, see Checker.definedType, below.
+// For the meaning of def, see Checker.declaredType, below.
 // If wantType is set, the identifier e is expected to denote a type.
 func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType bool) {
 	x.mode = invalid
@@ -149,14 +149,14 @@ func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType
 // typ type-checks the type expression e and returns its type, or Typ[Invalid].
 // The type must not be an (uninstantiated) generic type.
 func (check *Checker) typ(e syntax.Expr) Type {
-	return check.definedType(e, nil)
+	return check.declaredType(e, nil)
 }
 
 // varType type-checks the type expression e and returns its type, or Typ[Invalid].
 // The type must not be an (uninstantiated) generic type and it must not be a
 // constraint interface.
 func (check *Checker) varType(e syntax.Expr) Type {
-	typ := check.definedType(e, nil)
+	typ := check.declaredType(e, nil)
 	check.validVarType(e, typ)
 	return typ
 }
@@ -187,11 +187,11 @@ func (check *Checker) validVarType(e syntax.Expr, typ Type) {
 	}).describef(e, "check var type %s", typ)
 }
 
-// definedType is like typ but also accepts a type name def.
-// If def != nil, e is the type specification for the type named def, declared
-// in a type declaration, and def.typ.underlying will be set to the type of e
-// before any components of e are type-checked.
-func (check *Checker) definedType(e syntax.Expr, def *TypeName) Type {
+// declaredType is like typ but also accepts a type name def.
+// If def != nil, e is the type specification for the [Alias] or [Named] type
+// named def, and def.typ.fromRHS will be set to the [Type] of e immediately
+// after its creation.
+func (check *Checker) declaredType(e syntax.Expr, def *TypeName) Type {
 	typ := check.typInternal(e, def)
 	assert(isTyped(typ))
 	if isGeneric(typ) {
@@ -230,7 +230,7 @@ func goTypeName(typ Type) string {
 }
 
 // typInternal drives type checking of types.
-// Must only be called by definedType or genericType.
+// Must only be called by declaredType or genericType.
 func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) {
 	if check.conf.Trace {
 		check.trace(e0.Pos(), "-- type %s", e0)
@@ -296,7 +296,7 @@ func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) {
 	case *syntax.ParenExpr:
 		// Generic types must be instantiated before they can be used in any form.
 		// Consequently, generic types cannot be parenthesized.
-		return check.definedType(e.X, def)
+		return check.declaredType(e.X, def)
 
 	case *syntax.ArrayType:
 		typ := new(Array)
diff --git a/src/cmd/compile/internal/types2/universe.go b/src/cmd/compile/internal/types2/universe.go
index 332cd174f9..1ecef97d51 100644
--- a/src/cmd/compile/internal/types2/universe.go
+++ b/src/cmd/compile/internal/types2/universe.go
@@ -98,7 +98,6 @@ func defPredeclaredTypes() {
 	// interface.
 	{
 		universeAnyNoAlias = NewTypeName(nopos, nil, "any", &Interface{complete: true, tset: &topTypeSet})
-		universeAnyNoAlias.setColor(black)
 		// ensure that the any TypeName reports a consistent Parent, after
 		// hijacking Universe.Lookup with gotypesalias=0.
 		universeAnyNoAlias.setParent(Universe)
@@ -107,7 +106,6 @@ func defPredeclaredTypes() {
 		// into the Universe, but we lean toward the future and insert the Alias
 		// representation.
 		universeAnyAlias = NewTypeName(nopos, nil, "any", nil)
-		universeAnyAlias.setColor(black)
 		_ = NewAlias(universeAnyAlias, universeAnyNoAlias.Type().Underlying()) // Link TypeName and Alias
 		def(universeAnyAlias)
 	}
@@ -115,7 +113,6 @@ func defPredeclaredTypes() {
 	// type error interface{ Error() string }
 	{
 		obj := NewTypeName(nopos, nil, "error", nil)
-		obj.setColor(black)
 		typ := (*Checker)(nil).newNamed(obj, nil, nil)
 
 		// error.Error() string
@@ -136,7 +133,6 @@ func defPredeclaredTypes() {
 	// type comparable interface{} // marked as comparable
 	{
 		obj := NewTypeName(nopos, nil, "comparable", nil)
-		obj.setColor(black)
 		typ := (*Checker)(nil).newNamed(obj, nil, nil)
 
 		// interface{} // marked as comparable
@@ -165,7 +161,7 @@ func defPredeclaredConsts() {
 }
 
 func defPredeclaredNil() {
-	def(&Nil{object{name: "nil", typ: Typ[UntypedNil], color_: black}})
+	def(&Nil{object{name: "nil", typ: Typ[UntypedNil]}})
 }
 
 // A builtinId is the id of a builtin function.
@@ -289,7 +285,7 @@ func init() {
 // a scope. Objects with exported names are inserted in the unsafe package
 // scope; other objects are inserted in the universe scope.
 func def(obj Object) {
-	assert(obj.color() == black)
+	assert(obj.Type() != nil)
 	name := obj.Name()
 	if strings.Contains(name, " ") {
 		return // nothing to do
diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go
index 989ae0a1db..2794671c73 100644
--- a/src/cmd/compile/internal/walk/expr.go
+++ b/src/cmd/compile/internal/walk/expr.go
@@ -351,6 +351,11 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node {
 
 	case ir.OMETHVALUE:
 		return walkMethodValue(n.(*ir.SelectorExpr), init)
+
+	case ir.OMOVE2HEAP:
+		n := n.(*ir.MoveToHeapExpr)
+		n.Slice = walkExpr(n.Slice, init)
+		return n
 	}
 
 	// No return! Each case must return (or panic),
author	Cherry Mui <cherryyz@google.com>	2025-11-20 14:40:43 -0500
committer	Cherry Mui <cherryyz@google.com>	2025-11-20 14:40:43 -0500
commit	e3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree	5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/cmd/compile
parent	95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parent	ca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
download	go-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz