From c3fe874f25ff55f73e4422bea7aa0b0e0e268f3e Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 17 Aug 2020 16:57:22 -0400
Subject: cmd/compile: avoid generating CSEs; do all aggregates; maintain debug
 names

This adds a pass to detect common selection operations,
to avoid generating duplicates.  Duplicate offsets are
also detected.

All aggregate types are now handled; there is some freedom in where
expand_calls is run, though it must run before softfloat.

Debug-name-maintenance is now incremental both in decompose builtin
and in expand_calls; it might be good to push this into all the
decompose passes.

(this is a smash of 5 CLs that rewrote some of the same code several
times to deal with phase-ordering problems, and included an abandoned
attempt.)

For #40724.

Change-Id: I2a0c32f20660bf8b99e2bcecd33545d97d2bd3c6
Reviewed-on: https://go-review.googlesource.com/c/go/+/249458
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
---
 src/cmd/compile/fmtmap_test.go | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/cmd/compile/fmtmap_test.go')

diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index 179c60187f..f8c33ec1f9 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -136,6 +136,7 @@ var knownFormats = map[string]string{
 	"cmd/compile/internal/types.EType %s":             "",
 	"cmd/compile/internal/types.EType %v":             "",
 	"cmd/internal/obj.ABI %v":                         "",
+	"cmd/internal/src.XPos %v":                        "",
 	"error %v":                                        "",
 	"float64 %.2f":                                    "",
 	"float64 %.3f":                                    "",
-- 
cgit v1.3


From 15f01d6ae9853fd51ee8842d9af93d04ce25458c Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 13 Oct 2020 19:24:04 -0400
Subject: cmd/compile: delay expansion of OpArg until expand_calls

As it says, delay expanpsion of OpArg to the expand_calls phase,
to enable (eventually) interprocedural SSA optimizations, and
(sooner) change to a register ABI.

Includes a round of cleanup to function names and comments,
largely to match the expanded scope of the functions.

This CL removes the per-function dependence on GOSSAHASH,
but the go116lateCallExpansion kill switch remains (and was
tested locally to ensure it worked).

Two functions in expand_calls.go that performed overlapping
things were combined into a single function that is called
twice.

Fixes #42236.
For #40724.

Change-Id: Icbb78947eaa39f17f2c1210d5c2caef20abd6571
Reviewed-on: https://go-review.googlesource.com/c/go/+/262117
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
---
 src/cmd/compile/fmtmap_test.go               |   1 -
 src/cmd/compile/internal/gc/ssa.go           |  12 +-
 src/cmd/compile/internal/ssa/compile.go      |   2 +-
 src/cmd/compile/internal/ssa/config.go       |   4 +-
 src/cmd/compile/internal/ssa/expand_calls.go | 406 ++++++++++++++++++++-------
 src/cmd/compile/internal/ssa/gen/dec64.rules |   9 +-
 src/cmd/compile/internal/ssa/rewritedec64.go |  16 +-
 src/cmd/compile/internal/ssa/stackalloc.go   |   3 +
 8 files changed, 329 insertions(+), 124 deletions(-)

(limited to 'src/cmd/compile/fmtmap_test.go')

diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index f8c33ec1f9..179c60187f 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -136,7 +136,6 @@ var knownFormats = map[string]string{
 	"cmd/compile/internal/types.EType %s":             "",
 	"cmd/compile/internal/types.EType %v":             "",
 	"cmd/internal/obj.ABI %v":                         "",
-	"cmd/internal/src.XPos %v":                        "",
 	"error %v":                                        "",
 	"float64 %.2f":                                    "",
 	"float64 %.3f":                                    "",
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index fb9d3e811a..45d628cc5e 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -409,11 +409,17 @@ func buildssa(fn *Node, worker int) *ssa.Func {
 
 	// Generate addresses of local declarations
 	s.decladdrs = map[*Node]*ssa.Value{}
+	var args []ssa.Param
+	var results []ssa.Param
 	for _, n := range fn.Func.Dcl {
 		switch n.Class() {
-		case PPARAM, PPARAMOUT:
+		case PPARAM:
+			s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
+			args = append(args, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+		case PPARAMOUT:
 			s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
-			if n.Class() == PPARAMOUT && s.canSSA(n) {
+			results = append(results, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+			if s.canSSA(n) {
 				// Save ssa-able PPARAMOUT variables so we can
 				// store them back to the stack at the end of
 				// the function.
@@ -4909,7 +4915,7 @@ func (s *state) canSSA(n *Node) bool {
 	if n.Class() == PPARAM && n.Sym != nil && n.Sym.Name == ".this" {
 		// wrappers generated by genwrapper need to update
 		// the .this pointer in place.
-		// TODO: treat as a PPARMOUT?
+		// TODO: treat as a PPARAMOUT?
 		return false
 	}
 	return canSSAType(n.Type)
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index bddd271273..9ddc53060c 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -429,7 +429,7 @@ var passes = [...]pass{
 	{name: "early copyelim", fn: copyelim},
 	{name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
 	{name: "short circuit", fn: shortcircuit},
-	{name: "decompose args", fn: decomposeArgs, required: true},
+	{name: "decompose args", fn: decomposeArgs, required: !go116lateCallExpansion, disabled: go116lateCallExpansion}, // handled by late call lowering
 	{name: "decompose user", fn: decomposeUser, required: true},
 	{name: "pre-opt deadcode", fn: deadcode},
 	{name: "opt", fn: opt, required: true},               // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index cb6f6fe7a1..0fe0337ddf 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -199,9 +199,9 @@ const (
 const go116lateCallExpansion = true
 
 // LateCallExpansionEnabledWithin returns true if late call expansion should be tested
-// within compilation of a function/method triggered by GOSSAHASH (defaults to "yes").
+// within compilation of a function/method.
 func LateCallExpansionEnabledWithin(f *Func) bool {
-	return go116lateCallExpansion && f.DebugTest // Currently set up for GOSSAHASH bug searches
+	return go116lateCallExpansion
 }
 
 // NewConfig returns a new configuration object for the given architecture.
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index 3e3573ff39..fbde19d94c 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -15,7 +15,7 @@ type selKey struct {
 	from   *Value
 	offset int64
 	size   int64
-	typ    types.EType
+	typ    *types.Type
 }
 
 type offsetKey struct {
@@ -27,7 +27,8 @@ type offsetKey struct {
 // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
 // that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
 // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached.
+// reached.  On the callee side, OpArg nodes are not decomposed until this phase is run.
+// TODO results should not be lowered until this phase.
 func expandCalls(f *Func) {
 	// Calls that need lowering have some number of inputs, including a memory input,
 	// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
@@ -42,6 +43,10 @@ func expandCalls(f *Func) {
 	}
 	debug := f.pass.debug > 0
 
+	if debug {
+		fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
+	}
+
 	canSSAType := f.fe.CanSSA
 	regSize := f.Config.RegSize
 	sp, _ := f.spSb()
@@ -58,6 +63,10 @@ func expandCalls(f *Func) {
 
 	namedSelects := make(map[*Value][]namedVal)
 
+	sdom := f.Sdom()
+
+	common := make(map[selKey]*Value)
+
 	// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
 	// that has no 64-bit integer registers.
 	intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
@@ -107,6 +116,7 @@ func expandCalls(f *Func) {
 		return v
 	}
 
+	// splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates.
 	splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
 		var locs []LocalSlot
 		for i := range ls {
@@ -147,21 +157,103 @@ func expandCalls(f *Func) {
 	// With the current ABI, the outputs need to be converted to loads, which will all use the call's
 	// memory output as their input.
 
-	// rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
-	// a chain of Struct/Array Select operations.  If the chain of selectors does not
-	// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
-	// It emits the code necessary to implement the leaf select operation that leads to the call.
+	// rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg)
+	// through a chain of Struct/Array/builtin Select operations.  If the chain of selectors does not
+	// end in an expected root, it does nothing (this can happen depending on compiler phase ordering).
+	// The "leaf" provides the type, the root supplies the container, and the leaf-to-root path
+	// accumulates the offset.
+	// It emits the code necessary to implement the leaf select operation that leads to the root.
+	//
 	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
 	var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
 	rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
+		if debug {
+			fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
+		}
 		var locs []LocalSlot
 		leafType := leaf.Type
+		if len(selector.Args) > 0 {
+			w := selector.Args[0]
+			if w.Op == OpCopy {
+				for w.Op == OpCopy {
+					w = w.Args[0]
+				}
+				selector.SetArg(0, w)
+			}
+		}
 		switch selector.Op {
-		case OpSelectN:
-			// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+		case OpArg:
+			if !isAlreadyExpandedAggregateType(selector.Type) {
+				if leafType == selector.Type { // OpIData leads us here, sometimes.
+					leaf.copyOf(selector)
+				} else {
+					f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
+				}
+				if debug {
+					fmt.Printf("\tOpArg, break\n")
+				}
+				break
+			}
+			if leaf.Op == OpIData {
+				leafType = removeTrivialWrapperTypes(leaf.Type)
+			}
+			aux := selector.Aux
+			auxInt := selector.AuxInt + offset
+			if leaf.Block == selector.Block {
+				leaf.reset(OpArg)
+				leaf.Aux = aux
+				leaf.AuxInt = auxInt
+				leaf.Type = leafType
+			} else {
+				w := selector.Block.NewValue0IA(leaf.Pos, OpArg, leafType, auxInt, aux)
+				leaf.copyOf(w)
+				if debug {
+					fmt.Printf("\tnew %s\n", w.LongString())
+				}
+			}
 			for _, s := range namedSelects[selector] {
 				locs = append(locs, f.Names[s.locIndex])
 			}
+
+		case OpLoad: // We end up here because of IData of immediate structures.
+			// Failure case:
+			// (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as
+			// the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat).
+			//
+			// GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc
+			// cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR
+			// b2: ← b1
+			// v20 (+142) = StaticLECall <interface {},mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1
+			// v21 (142) = SelectN <mem> [1] v20
+			// v22 (142) = SelectN <interface {}> [0] v20
+			// b15: ← b8
+			// v71 (+143) = IData <Nodes> v22 (v[Nodes])
+			// v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21
+			//
+			// translates (w/o the "case OpLoad:" above) to:
+			//
+			// b2: ← b1
+			// v20 (+142) = StaticCall <mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715
+			// v23 (142) = Load <*uintptr> v19 v20
+			// v823 (142) = IsNonNil <bool> v23
+			// v67 (+143) = Load <*[]*Node> v880 v20
+			// b15: ← b8
+			// v827 (146) = StructSelect <*[]*Node> [0] v67
+			// v846 (146) = Store <mem> {*[]*Node} v769 v827 v20
+			// v73 (+146) = StaticCall <mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846
+			// i.e., the struct select is generated and remains in because it is not applied to an actual structure.
+			// The OpLoad was created to load the single field of the IData
+			// This case removes that StructSelect.
+			if leafType != selector.Type {
+				f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString())
+			}
+			leaf.copyOf(selector)
+			for _, s := range namedSelects[selector] {
+				locs = append(locs, f.Names[s.locIndex])
+			}
+
+		case OpSelectN:
+			// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
 			call := selector.Args[0]
 			aux := call.Aux.(*AuxCall)
 			which := selector.AuxInt
@@ -171,10 +263,6 @@ func expandCalls(f *Func) {
 			} else {
 				leafType := removeTrivialWrapperTypes(leaf.Type)
 				if canSSAType(leafType) {
-					for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 {
-						// This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA
-						leafType = leafType.Field(0).Type
-					}
 					pt := types.NewPtr(leafType)
 					off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
 					// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
@@ -185,22 +273,29 @@ func expandCalls(f *Func) {
 					} else {
 						w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
 						leaf.copyOf(w)
+						if debug {
+							fmt.Printf("\tnew %s\n", w.LongString())
+						}
+					}
+					for _, s := range namedSelects[selector] {
+						locs = append(locs, f.Names[s.locIndex])
 					}
 				} else {
 					f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
 				}
 			}
+
 		case OpStructSelect:
 			w := selector.Args[0]
 			var ls []LocalSlot
-			if w.Type.Etype != types.TSTRUCT {
-				f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
-				// Artifact of immediate interface idata
+			if w.Type.Etype != types.TSTRUCT { // IData artifact
 				ls = rewriteSelect(leaf, w, offset)
 			} else {
 				ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
-				for _, l := range ls {
-					locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+				if w.Op != OpIData {
+					for _, l := range ls {
+						locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+					}
 				}
 			}
 
@@ -221,9 +316,7 @@ func expandCalls(f *Func) {
 		case OpStringPtr:
 			ls := rewriteSelect(leaf, selector.Args[0], offset)
 			locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
-			//for i := range ls {
-			//	locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr))
-			//}
+
 		case OpSlicePtr:
 			w := selector.Args[0]
 			ls := rewriteSelect(leaf, w, offset)
@@ -272,32 +365,130 @@ func expandCalls(f *Func) {
 		return locs
 	}
 
-	// storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
-	// smaller types into individual parameter slots.
-	var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
-	storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+	// storeArgOrLoad converts stores of SSA-able aggregate arguments (passed to a call) into a series of primitive-typed
+	// stores of non-aggregate types.  It recursively walks up a chain of selectors until it reaches a Load or an Arg.
+	// If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
+	var storeArgOrLoad func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value
+
+	// decomposeArgOrLoad is a helper for storeArgOrLoad.
+	// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
+	// passed to it, and returns the new mem. If the type does not match one of the expected aggregate types, it returns nil instead.
+	decomposeArgOrLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64,
+		decomposeOne func(pos src.XPos, b *Block, base, source, mem *Value, t1 *types.Type, offArg, offStore int64) *Value,
+		decomposeTwo func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value) *Value {
+		u := source.Type
+		switch u.Etype {
+		case types.TARRAY:
+			elem := u.Elem()
+			for i := int64(0); i < u.NumElem(); i++ {
+				elemOff := i * elem.Size()
+				mem = decomposeOne(pos, b, base, source, mem, elem, source.AuxInt+elemOff, offset+elemOff)
+				pos = pos.WithNotStmt()
+			}
+			return mem
+		case types.TSTRUCT:
+			for i := 0; i < u.NumFields(); i++ {
+				fld := u.Field(i)
+				mem = decomposeOne(pos, b, base, source, mem, fld.Type, source.AuxInt+fld.Offset, offset+fld.Offset)
+				pos = pos.WithNotStmt()
+			}
+			return mem
+		case types.TINT64, types.TUINT64:
+			if t.Width == regSize {
+				break
+			}
+			tHi, tLo := intPairTypes(t.Etype)
+			mem = decomposeOne(pos, b, base, source, mem, tHi, source.AuxInt+hiOffset, offset+hiOffset)
+			pos = pos.WithNotStmt()
+			return decomposeOne(pos, b, base, source, mem, tLo, source.AuxInt+lowOffset, offset+lowOffset)
+		case types.TINTER:
+			return decomposeTwo(pos, b, base, source, mem, typ.Uintptr, typ.BytePtr, source.AuxInt, offset)
+		case types.TSTRING:
+			return decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+		case types.TCOMPLEX64:
+			return decomposeTwo(pos, b, base, source, mem, typ.Float32, typ.Float32, source.AuxInt, offset)
+		case types.TCOMPLEX128:
+			return decomposeTwo(pos, b, base, source, mem, typ.Float64, typ.Float64, source.AuxInt, offset)
+		case types.TSLICE:
+			mem = decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+			return decomposeOne(pos, b, base, source, mem, typ.Int, source.AuxInt+2*ptrSize, offset+2*ptrSize)
+		}
+		return nil
+	}
+
+	// storeOneArg creates a decomposed (one step) arg that is then stored.
+	// pos and b locate the store instruction, base is the base of the store target, source is the "base" of the value input,
+	// mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases.
+	storeOneArg := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+		w := common[selKey{source, offArg, t.Width, t}]
+		if w == nil {
+			w = source.Block.NewValue0IA(source.Pos, OpArg, t, offArg, source.Aux)
+			common[selKey{source, offArg, t.Width, t}] = w
+		}
+		return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+	}
+
+	// storeOneLoad creates a decomposed (one step) load that is then stored.
+	storeOneLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+		from := offsetFrom(source.Args[0], offArg, types.NewPtr(t))
+		w := source.Block.NewValue2(source.Pos, OpLoad, t, from, mem)
+		return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+	}
+
+	storeTwoArg := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+		mem = storeOneArg(pos, b, base, source, mem, t1, offArg, offStore)
+		pos = pos.WithNotStmt()
+		t1Size := t1.Size()
+		return storeOneArg(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+	}
+
+	storeTwoLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+		mem = storeOneLoad(pos, b, base, source, mem, t1, offArg, offStore)
+		pos = pos.WithNotStmt()
+		t1Size := t1.Size()
+		return storeOneLoad(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+	}
+
+	storeArgOrLoad = func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value {
 		if debug {
-			fmt.Printf("\tstoreArg(%s;  %s;  %v;  %d;  %s)\n", b, a.LongString(), t, offset, mem.String())
+			fmt.Printf("\tstoreArgOrLoad(%s;  %s;  %s;  %s; %d)\n", base.LongString(), source.LongString(), mem.String(), t.String(), offset)
 		}
 
-		switch a.Op {
+		switch source.Op {
+		case OpCopy:
+			return storeArgOrLoad(pos, b, base, source.Args[0], mem, t, offset)
+
+		case OpLoad:
+			ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneLoad, storeTwoLoad)
+			if ret != nil {
+				return ret
+			}
+
+		case OpArg:
+			ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneArg, storeTwoArg)
+			if ret != nil {
+				return ret
+			}
+
 		case OpArrayMake0, OpStructMake0:
 			return mem
 
 		case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
 			for i := 0; i < t.NumFields(); i++ {
 				fld := t.Field(i)
-				mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
+				mem = storeArgOrLoad(pos, b, base, source.Args[i], mem, fld.Type, offset+fld.Offset)
+				pos = pos.WithNotStmt()
 			}
 			return mem
 
 		case OpArrayMake1:
-			return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
+			return storeArgOrLoad(pos, b, base, source.Args[0], mem, t.Elem(), offset)
 
 		case OpInt64Make:
 			tHi, tLo := intPairTypes(t.Etype)
-			mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
-			return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tHi, offset+hiOffset)
+			pos = pos.WithNotStmt()
+			return storeArgOrLoad(pos, b, base, source.Args[1], mem, tLo, offset+lowOffset)
 
 		case OpComplexMake:
 			tPart := typ.Float32
@@ -305,59 +496,45 @@ func expandCalls(f *Func) {
 			if wPart == 8 {
 				tPart = typ.Float64
 			}
-			mem = storeArg(pos, b, a.Args[0], tPart, offset, mem)
-			return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem)
+			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tPart, offset)
+			pos = pos.WithNotStmt()
+			return storeArgOrLoad(pos, b, base, source.Args[1], mem, tPart, offset+wPart)
 
 		case OpIMake:
-			mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem)
-			return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem)
+			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.Uintptr, offset)
+			pos = pos.WithNotStmt()
+			return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.BytePtr, offset+ptrSize)
 
 		case OpStringMake:
-			mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
-			return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+			pos = pos.WithNotStmt()
+			return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
 
 		case OpSliceMake:
-			mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
-			mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
-			return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem)
+			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+			pos = pos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
+			return storeArgOrLoad(pos, b, base, source.Args[2], mem, typ.Int, offset+2*ptrSize)
 		}
 
-		dst := offsetFrom(sp, offset, types.NewPtr(t))
-		x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
-		if debug {
-			fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
-		}
-		return x
-	}
-
-	// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
-	// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
-	// This has to handle aggregate types that have already been lowered by an earlier phase.
-	var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
-	splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
-		if debug {
-			fmt.Printf("\tsplitStore(%s;  %s;  %s;  %s;  %v;  %d;  %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos)
-		}
-		pos := v.Pos.WithNotStmt()
+		// For nodes that cannot be taken apart -- OpSelectN, other structure selectors.
 		switch t.Etype {
 		case types.TARRAY:
 			elt := t.Elem()
-			if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
+			if source.Type != t && t.NumElem() == 1 && elt.Width == t.Width && t.Width == regSize {
 				t = removeTrivialWrapperTypes(t)
-				if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
-					f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
-				}
-				break // handle the leaf type.
+				// it could be a leaf type, but the "leaf" could be complex64 (for example)
+				return storeArgOrLoad(pos, b, base, source, mem, t, offset)
 			}
 			for i := int64(0); i < t.NumElem(); i++ {
 				sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
-				mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
-				firstStorePos = firstStorePos.WithNotStmt()
+				mem = storeArgOrLoad(pos, b, base, sel, mem, elt, offset+i*elt.Width)
+				pos = pos.WithNotStmt()
 			}
 			return mem
 
 		case types.TSTRUCT:
-			if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+			if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
 				// This peculiar test deals with accesses to immediate interface data.
 				// It works okay because everything is the same size.
 				// Example code that triggers this can be found in go/constant/value.go, function ToComplex
@@ -377,16 +554,15 @@ func expandCalls(f *Func) {
 				// v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
 				// of a *uint8, which does not succeed.
 				t = removeTrivialWrapperTypes(t)
-
 				// it could be a leaf type, but the "leaf" could be complex64 (for example)
-				return splitStore(dest, source, mem, v, t, offset, firstStorePos)
+				return storeArgOrLoad(pos, b, base, source, mem, t, offset)
 			}
 
 			for i := 0; i < t.NumFields(); i++ {
 				fld := t.Field(i)
 				sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
-				mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
-				firstStorePos = firstStorePos.WithNotStmt()
+				mem = storeArgOrLoad(pos, b, base, sel, mem, fld.Type, offset+fld.Offset)
+				pos = pos.WithNotStmt()
 			}
 			return mem
 
@@ -396,56 +572,55 @@ func expandCalls(f *Func) {
 			}
 			tHi, tLo := intPairTypes(t.Etype)
 			sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
-			mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, tHi, offset+hiOffset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
-			return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, tLo, offset+lowOffset)
 
 		case types.TINTER:
 			sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
-			mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
-			return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset+ptrSize)
 
 		case types.TSTRING:
 			sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
-			mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
-			return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
 
 		case types.TSLICE:
 			et := types.NewPtr(t.Elem())
 			sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
-			mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, et, offset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
-			mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
 			sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
-			return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+2*ptrSize)
 
 		case types.TCOMPLEX64:
 			sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
-			mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
-			return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset+4)
 
 		case types.TCOMPLEX128:
 			sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
-			mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos)
-			firstStorePos = firstStorePos.WithNotStmt()
+			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset)
+			pos = pos.WithNotStmt()
 			sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
-			return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos)
+			return storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset+8)
 		}
-		// Default, including for aggregates whose single element exactly fills their container
-		// TODO this will be a problem for cast interfaces containing floats when we move to registers.
-		x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem)
+
+		dst := offsetFrom(base, offset, types.NewPtr(t))
+		x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
 		if debug {
-			fmt.Printf("\t\tsplitStore returns %s\n", x.LongString())
+			fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
 		}
-
 		return x
 	}
 
@@ -490,7 +665,7 @@ func expandCalls(f *Func) {
 				if debug {
 					fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
 				}
-				mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
+				mem = storeArgOrLoad(pos, v.Block, sp, a, mem, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
 			}
 		}
 		v.resetArgs()
@@ -523,7 +698,7 @@ func expandCalls(f *Func) {
 		t := name.Type
 		if isAlreadyExpandedAggregateType(t) {
 			for j, v := range f.NamedValues[name] {
-				if v.Op == OpSelectN {
+				if v.Op == OpSelectN || v.Op == OpArg && isAlreadyExpandedAggregateType(v.Type) {
 					ns := namedSelects[v]
 					namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
 				}
@@ -531,17 +706,19 @@ func expandCalls(f *Func) {
 		}
 	}
 
-	// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
+	// Step 1: any stores of aggregates remaining are believed to be sourced from call results or args.
 	// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			if v.Op == OpStore {
 				t := v.Aux.(*types.Type)
+				source := v.Args[1]
+				tSrc := source.Type
 				iAEATt := isAlreadyExpandedAggregateType(t)
+
 				if !iAEATt {
 					// guarding against store immediate struct into interface data field -- store type is *uint8
 					// TODO can this happen recursively?
-					tSrc := v.Args[1].Type
 					iAEATt = isAlreadyExpandedAggregateType(tSrc)
 					if iAEATt {
 						t = tSrc
@@ -551,8 +728,8 @@ func expandCalls(f *Func) {
 					if debug {
 						fmt.Printf("Splitting store %s\n", v.LongString())
 					}
-					dst, source, mem := v.Args[0], v.Args[1], v.Args[2]
-					mem = splitStore(dst, source, mem, v, t, 0, v.Pos)
+					dst, mem := v.Args[0], v.Args[2]
+					mem = storeArgOrLoad(v.Pos, b, dst, source, mem, t, 0)
 					v.copyOf(mem)
 				}
 			}
@@ -579,7 +756,7 @@ func expandCalls(f *Func) {
 				OpInt64Hi, OpInt64Lo:
 				w := v.Args[0]
 				switch w.Op {
-				case OpStructSelect, OpArraySelect, OpSelectN:
+				case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
 					val2Preds[w] += 1
 					if debug {
 						fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
@@ -595,6 +772,17 @@ func expandCalls(f *Func) {
 					}
 				}
 
+			case OpArg:
+				if !isAlreadyExpandedAggregateType(v.Type) {
+					continue
+				}
+				if _, ok := val2Preds[v]; !ok {
+					val2Preds[v] = 0
+					if debug {
+						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+					}
+				}
+
 			case OpSelectNAddr:
 				// Do these directly, there are no chains of selectors.
 				call := v.Args[0]
@@ -612,7 +800,6 @@ func expandCalls(f *Func) {
 	// then forwards to rewrite selectors.
 	//
 	// All chains of selectors end up in same block as the call.
-	sdom := f.Sdom()
 
 	// Compilation must be deterministic, so sort after extracting first zeroes from map.
 	// Sorting allows dominators-last order within each batch,
@@ -640,8 +827,11 @@ func expandCalls(f *Func) {
 		last = len(allOrdered)
 		sort.SliceStable(toProcess, less)
 		for _, v := range toProcess {
-			w := v.Args[0]
 			delete(val2Preds, v)
+			if v.Op == OpArg {
+				continue // no Args[0], hence done.
+			}
+			w := v.Args[0]
 			n, ok := val2Preds[w]
 			if !ok {
 				continue
@@ -655,13 +845,19 @@ func expandCalls(f *Func) {
 		}
 	}
 
-	common := make(map[selKey]*Value)
+	common = make(map[selKey]*Value)
 	// Rewrite duplicate selectors as copies where possible.
 	for i := len(allOrdered) - 1; i >= 0; i-- {
 		v := allOrdered[i]
+		if v.Op == OpArg {
+			continue
+		}
 		w := v.Args[0]
-		for w.Op == OpCopy {
-			w = w.Args[0]
+		if w.Op == OpCopy {
+			for w.Op == OpCopy {
+				w = w.Args[0]
+			}
+			v.SetArg(0, w)
 		}
 		typ := v.Type
 		if typ.IsMemory() {
@@ -691,7 +887,7 @@ func expandCalls(f *Func) {
 		case OpComplexImag:
 			offset = size
 		}
-		sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype}
+		sk := selKey{from: w, size: size, offset: offset, typ: typ}
 		dupe := common[sk]
 		if dupe == nil {
 			common[sk] = v
diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules
index 07607960fa..9297ed8d2e 100644
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -41,20 +41,21 @@
 		lo
 		(Store {hi.Type} dst hi mem))
 
-(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() =>
+// These are not enabled during decomposeBuiltin if late call expansion, but they are always enabled for softFloat
+(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.Int32> {n} [off+4])
     (Arg <typ.UInt32> {n} [off]))
-(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")  =>
   (Int64Make
     (Arg <typ.UInt32> {n} [off+4])
     (Arg <typ.UInt32> {n} [off]))
 
-(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.Int32> {n} [off])
     (Arg <typ.UInt32> {n} [off+4]))
-(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.UInt32> {n} [off])
     (Arg <typ.UInt32> {n} [off+4]))
diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go
index 8b9753414f..c49bc8043e 100644
--- a/src/cmd/compile/internal/ssa/rewritedec64.go
+++ b/src/cmd/compile/internal/ssa/rewritedec64.go
@@ -184,12 +184,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
 	config := b.Func.Config
 	typ := &b.Func.Config.Types
 	// match: (Arg {n} [off])
-	// cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned()
+	// cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
 	// result: (Int64Make (Arg <typ.Int32> {n} [off+4]) (Arg <typ.UInt32> {n} [off]))
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		n := auxToSym(v.Aux)
-		if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned()) {
+		if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
 			break
 		}
 		v.reset(OpInt64Make)
@@ -203,12 +203,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
 		return true
 	}
 	// match: (Arg {n} [off])
-	// cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned()
+	// cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
 	// result: (Int64Make (Arg <typ.UInt32> {n} [off+4]) (Arg <typ.UInt32> {n} [off]))
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		n := auxToSym(v.Aux)
-		if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned()) {
+		if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
 			break
 		}
 		v.reset(OpInt64Make)
@@ -222,12 +222,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
 		return true
 	}
 	// match: (Arg {n} [off])
-	// cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned()
+	// cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
 	// result: (Int64Make (Arg <typ.Int32> {n} [off]) (Arg <typ.UInt32> {n} [off+4]))
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		n := auxToSym(v.Aux)
-		if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned()) {
+		if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
 			break
 		}
 		v.reset(OpInt64Make)
@@ -241,12 +241,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
 		return true
 	}
 	// match: (Arg {n} [off])
-	// cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned()
+	// cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
 	// result: (Int64Make (Arg <typ.UInt32> {n} [off]) (Arg <typ.UInt32> {n} [off+4]))
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		n := auxToSym(v.Aux)
-		if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned()) {
+		if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
 			break
 		}
 		v.reset(OpInt64Make)
diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go
index 7612585136..406a3c3ea5 100644
--- a/src/cmd/compile/internal/ssa/stackalloc.go
+++ b/src/cmd/compile/internal/ssa/stackalloc.go
@@ -153,6 +153,9 @@ func (s *stackAllocState) stackalloc() {
 		if v.Op != OpArg {
 			continue
 		}
+		if v.Aux == nil {
+			f.Fatalf("%s has nil Aux\n", v.LongString())
+		}
 		loc := LocalSlot{N: v.Aux.(GCNode), Type: v.Type, Off: v.AuxInt}
 		if f.pass.debug > stackDebug {
 			fmt.Printf("stackalloc %s to %s\n", v, loc)
-- 
cgit v1.3


From 84d7a85089009332756c18e876ec91f96b362ebf Mon Sep 17 00:00:00 2001
From: Cherry Zhang <cherryyz@google.com>
Date: Wed, 21 Oct 2020 20:15:48 -0400
Subject: cmd/compile: delete register maps, completely

Remove go115ReduceLiveness feature gating flag, along with code
that only needed when go115ReduceLiveness is false.

Change-Id: I7571913cc74cbd17b330a0ee0160fefc9eeee66e
Reviewed-on: https://go-review.googlesource.com/c/go/+/264338
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/cmd/compile/fmtmap_test.go       |   2 -
 src/cmd/compile/internal/gc/gsubr.go |  40 +---
 src/cmd/compile/internal/gc/obj.go   |   2 +-
 src/cmd/compile/internal/gc/plive.go | 421 ++++++-----------------------------
 src/cmd/compile/internal/gc/ssa.go   |   2 +-
 src/cmd/internal/obj/link.go         |   1 -
 src/cmd/internal/obj/plist.go        |  22 +-
 src/cmd/internal/objabi/funcdata.go  |   9 +-
 8 files changed, 87 insertions(+), 412 deletions(-)

(limited to 'src/cmd/compile/fmtmap_test.go')

diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index 179c60187f..0811df7f7b 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -105,10 +105,8 @@ var knownFormats = map[string]string{
 	"cmd/compile/internal/ssa.GCNode %v":              "",
 	"cmd/compile/internal/ssa.ID %d":                  "",
 	"cmd/compile/internal/ssa.ID %v":                  "",
-	"cmd/compile/internal/ssa.LocPair %s":             "",
 	"cmd/compile/internal/ssa.LocalSlot %s":           "",
 	"cmd/compile/internal/ssa.LocalSlot %v":           "",
-	"cmd/compile/internal/ssa.Location %T":            "",
 	"cmd/compile/internal/ssa.Location %s":            "",
 	"cmd/compile/internal/ssa.Op %s":                  "",
 	"cmd/compile/internal/ssa.Op %v":                  "",
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index ce5182f203..864ada1d3c 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -70,12 +70,8 @@ func newProgs(fn *Node, worker int) *Progs {
 	pp.pos = fn.Pos
 	pp.settext(fn)
 	// PCDATA tables implicitly start with index -1.
-	pp.prevLive = LivenessIndex{-1, -1, false}
-	if go115ReduceLiveness {
-		pp.nextLive = pp.prevLive
-	} else {
-		pp.nextLive = LivenessInvalid
-	}
+	pp.prevLive = LivenessIndex{-1, false}
+	pp.nextLive = pp.prevLive
 	return pp
 }
 
@@ -120,31 +116,15 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
 		Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
 		Addrconst(&p.To, int64(idx))
 	}
-	if !go115ReduceLiveness {
+	if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
+		// Emit unsafe-point marker.
+		pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
+		p := pp.Prog(obj.APCDATA)
+		Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
 		if pp.nextLive.isUnsafePoint {
-			// Unsafe points are encoded as a special value in the
-			// register map.
-			pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
-		}
-		if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
-			// Emit register map index change.
-			idx := pp.nextLive.regMapIndex
-			pp.prevLive.regMapIndex = idx
-			p := pp.Prog(obj.APCDATA)
-			Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
-			Addrconst(&p.To, int64(idx))
-		}
-	} else {
-		if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
-			// Emit unsafe-point marker.
-			pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
-			p := pp.Prog(obj.APCDATA)
-			Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
-			if pp.nextLive.isUnsafePoint {
-				Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
-			} else {
-				Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
-			}
+			Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
+		} else {
+			Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
 		}
 	}
 
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index 226eb45252..32aa7c5bb1 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -312,7 +312,7 @@ func addGCLocals() {
 		if fn == nil {
 			continue
 		}
-		for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} {
+		for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals} {
 			if gcsym != nil && !gcsym.OnList() {
 				ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK)
 			}
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index b471accb65..a48173e0d6 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -24,16 +24,6 @@ import (
 	"strings"
 )
 
-// go115ReduceLiveness disables register maps and only produces stack
-// maps at call sites.
-//
-// In Go 1.15, we changed debug call injection to use conservative
-// scanning instead of precise pointer maps, so these are no longer
-// necessary.
-//
-// Keep in sync with runtime/preempt.go:go115ReduceLiveness.
-const go115ReduceLiveness = true
-
 // OpVarDef is an annotation for the liveness analysis, marking a place
 // where a complete initialization (definition) of a variable begins.
 // Since the liveness analysis can see initialization of single-word
@@ -96,15 +86,15 @@ type BlockEffects struct {
 	//
 	//	uevar: upward exposed variables (used before set in block)
 	//	varkill: killed variables (set in block)
-	uevar   varRegVec
-	varkill varRegVec
+	uevar   bvec
+	varkill bvec
 
 	// Computed during Liveness.solve using control flow information:
 	//
 	//	livein: variables live at block entry
 	//	liveout: variables live at block exit
-	livein  varRegVec
-	liveout varRegVec
+	livein  bvec
+	liveout bvec
 }
 
 // A collection of global state used by liveness analysis.
@@ -128,16 +118,14 @@ type Liveness struct {
 	// current Block during Liveness.epilogue. Indexed in Value
 	// order for that block. Additionally, for the entry block
 	// livevars[0] is the entry bitmap. Liveness.compact moves
-	// these to stackMaps and regMaps.
-	livevars []varRegVec
+	// these to stackMaps.
+	livevars []bvec
 
 	// livenessMap maps from safe points (i.e., CALLs) to their
 	// liveness map indexes.
 	livenessMap LivenessMap
 	stackMapSet bvecSet
 	stackMaps   []bvec
-	regMapSet   map[liveRegMask]int
-	regMaps     []liveRegMask
 
 	cache progeffectscache
 }
@@ -158,7 +146,7 @@ func (m *LivenessMap) reset() {
 			delete(m.vals, k)
 		}
 	}
-	m.deferreturn = LivenessInvalid
+	m.deferreturn = LivenessDontCare
 }
 
 func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
@@ -166,27 +154,17 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
 }
 
 func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
-	if !go115ReduceLiveness {
-		// All safe-points are in the map, so if v isn't in
-		// the map, it's an unsafe-point.
-		if idx, ok := m.vals[v.ID]; ok {
-			return idx
-		}
-		return LivenessInvalid
-	}
-
 	// If v isn't in the map, then it's a "don't care" and not an
 	// unsafe-point.
 	if idx, ok := m.vals[v.ID]; ok {
 		return idx
 	}
-	return LivenessIndex{StackMapDontCare, StackMapDontCare, false}
+	return LivenessIndex{StackMapDontCare, false}
 }
 
 // LivenessIndex stores the liveness map information for a Value.
 type LivenessIndex struct {
 	stackMapIndex int
-	regMapIndex   int // only for !go115ReduceLiveness
 
 	// isUnsafePoint indicates that this is an unsafe-point.
 	//
@@ -197,8 +175,10 @@ type LivenessIndex struct {
 	isUnsafePoint bool
 }
 
-// LivenessInvalid indicates an unsafe point with no stack map.
-var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness
+// LivenessDontCare indicates that the liveness information doesn't
+// matter. Currently it is used in deferreturn liveness when we don't
+// actually need it. It should never be emitted to the PCDATA stream.
+var LivenessDontCare = LivenessIndex{StackMapDontCare, true}
 
 // StackMapDontCare indicates that the stack map index at a Value
 // doesn't matter.
@@ -212,46 +192,12 @@ func (idx LivenessIndex) StackMapValid() bool {
 	return idx.stackMapIndex != StackMapDontCare
 }
 
-func (idx LivenessIndex) RegMapValid() bool {
-	return idx.regMapIndex != StackMapDontCare
-}
-
 type progeffectscache struct {
 	retuevar    []int32
 	tailuevar   []int32
 	initialized bool
 }
 
-// varRegVec contains liveness bitmaps for variables and registers.
-type varRegVec struct {
-	vars bvec
-	regs liveRegMask
-}
-
-func (v *varRegVec) Eq(v2 varRegVec) bool {
-	return v.vars.Eq(v2.vars) && v.regs == v2.regs
-}
-
-func (v *varRegVec) Copy(v2 varRegVec) {
-	v.vars.Copy(v2.vars)
-	v.regs = v2.regs
-}
-
-func (v *varRegVec) Clear() {
-	v.vars.Clear()
-	v.regs = 0
-}
-
-func (v *varRegVec) Or(v1, v2 varRegVec) {
-	v.vars.Or(v1.vars, v2.vars)
-	v.regs = v1.regs | v2.regs
-}
-
-func (v *varRegVec) AndNot(v1, v2 varRegVec) {
-	v.vars.AndNot(v1.vars, v2.vars)
-	v.regs = v1.regs &^ v2.regs
-}
-
 // livenessShouldTrack reports whether the liveness analysis
 // should track the variable n.
 // We don't care about variables that have no pointers,
@@ -400,110 +346,6 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
 	}
 }
 
-// regEffects returns the registers affected by v.
-func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
-	if go115ReduceLiveness {
-		return 0, 0
-	}
-	if v.Op == ssa.OpPhi {
-		// All phi node arguments must come from the same
-		// register and the result must also go to that
-		// register, so there's no overall effect.
-		return 0, 0
-	}
-	addLocs := func(mask liveRegMask, v *ssa.Value, ptrOnly bool) liveRegMask {
-		if int(v.ID) >= len(lv.f.RegAlloc) {
-			// v has no allocated registers.
-			return mask
-		}
-		loc := lv.f.RegAlloc[v.ID]
-		if loc == nil {
-			// v has no allocated registers.
-			return mask
-		}
-		if v.Op == ssa.OpGetG {
-			// GetG represents the G register, which is a
-			// pointer, but not a valid GC register. The
-			// current G is always reachable, so it's okay
-			// to ignore this register.
-			return mask
-		}
-
-		// Collect registers and types from v's location.
-		var regs [2]*ssa.Register
-		nreg := 0
-		switch loc := loc.(type) {
-		case ssa.LocalSlot:
-			return mask
-		case *ssa.Register:
-			if ptrOnly && !v.Type.HasPointers() {
-				return mask
-			}
-			regs[0] = loc
-			nreg = 1
-		case ssa.LocPair:
-			// The value will have TTUPLE type, and the
-			// children are nil or *ssa.Register.
-			if v.Type.Etype != types.TTUPLE {
-				v.Fatalf("location pair %s has non-tuple type %v", loc, v.Type)
-			}
-			for i, loc1 := range &loc {
-				if loc1 == nil {
-					continue
-				}
-				if ptrOnly && !v.Type.FieldType(i).HasPointers() {
-					continue
-				}
-				regs[nreg] = loc1.(*ssa.Register)
-				nreg++
-			}
-		default:
-			v.Fatalf("weird RegAlloc location: %s (%T)", loc, loc)
-		}
-
-		// Add register locations to vars.
-		for _, reg := range regs[:nreg] {
-			if reg.GCNum() == -1 {
-				if ptrOnly {
-					v.Fatalf("pointer in non-pointer register %v", reg)
-				} else {
-					continue
-				}
-			}
-			mask |= 1 << uint(reg.GCNum())
-		}
-		return mask
-	}
-
-	// v clobbers all registers it writes to (whether or not the
-	// write is pointer-typed).
-	kill = addLocs(0, v, false)
-	for _, arg := range v.Args {
-		// v uses all registers is reads from, but we only
-		// care about marking those containing pointers.
-		uevar = addLocs(uevar, arg, true)
-	}
-	return uevar, kill
-}
-
-type liveRegMask uint32 // only if !go115ReduceLiveness
-
-func (m liveRegMask) niceString(config *ssa.Config) string {
-	if m == 0 {
-		return "<none>"
-	}
-	str := ""
-	for i, reg := range config.GCRegMap {
-		if m&(1<<uint(i)) != 0 {
-			if str != "" {
-				str += ","
-			}
-			str += reg.String()
-		}
-	}
-	return str
-}
-
 type livenessFuncCache struct {
 	be          []BlockEffects
 	livenessMap LivenessMap
@@ -519,8 +361,6 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 		vars:       vars,
 		idx:        idx,
 		stkptrsize: stkptrsize,
-
-		regMapSet: make(map[liveRegMask]int),
 	}
 
 	// Significant sources of allocation are kept in the ssa.Cache
@@ -533,7 +373,7 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 		if cap(lc.be) >= f.NumBlocks() {
 			lv.be = lc.be[:f.NumBlocks()]
 		}
-		lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessInvalid}
+		lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessDontCare}
 		lc.livenessMap.vals = nil
 	}
 	if lv.be == nil {
@@ -546,10 +386,10 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 	for _, b := range f.Blocks {
 		be := lv.blockEffects(b)
 
-		be.uevar = varRegVec{vars: bulk.next()}
-		be.varkill = varRegVec{vars: bulk.next()}
-		be.livein = varRegVec{vars: bulk.next()}
-		be.liveout = varRegVec{vars: bulk.next()}
+		be.uevar = bulk.next()
+		be.varkill = bulk.next()
+		be.livein = bulk.next()
+		be.liveout = bulk.next()
 	}
 	lv.livenessMap.reset()
 
@@ -637,20 +477,6 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) {
 	}
 }
 
-// usedRegs returns the maximum width of the live register map.
-func (lv *Liveness) usedRegs() int32 {
-	var any liveRegMask
-	for _, live := range lv.regMaps {
-		any |= live
-	}
-	i := int32(0)
-	for any != 0 {
-		any >>= 1
-		i++
-	}
-	return i
-}
-
 // Generates live pointer value maps for arguments and local variables. The
 // this argument and the in arguments are always assumed live. The vars
 // argument is a slice of *Nodes.
@@ -851,31 +677,16 @@ func (lv *Liveness) markUnsafePoints() {
 // particular, call Values can have a stack map in case the callee
 // grows the stack, but not themselves be a safe-point.
 func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
-	// The runtime only has safe-points in function prologues, so
-	// we only need stack maps at call sites. go:nosplit functions
-	// are similar.
-	if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit {
-		if !v.Op.IsCall() {
-			return false
-		}
-		// typedmemclr and typedmemmove are write barriers and
-		// deeply non-preemptible. They are unsafe points and
-		// hence should not have liveness maps.
-		if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) {
-			return false
-		}
-		return true
+	if !v.Op.IsCall() {
+		return false
 	}
-
-	switch v.Op {
-	case ssa.OpInitMem, ssa.OpArg, ssa.OpSP, ssa.OpSB,
-		ssa.OpSelect0, ssa.OpSelect1, ssa.OpGetG,
-		ssa.OpVarDef, ssa.OpVarLive, ssa.OpKeepAlive,
-		ssa.OpPhi:
-		// These don't produce code (see genssa).
+	// typedmemclr and typedmemmove are write barriers and
+	// deeply non-preemptible. They are unsafe points and
+	// hence should not have liveness maps.
+	if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) {
 		return false
 	}
-	return !lv.unsafePoints.Get(int32(v.ID))
+	return true
 }
 
 // Initializes the sets for solving the live variables. Visits all the
@@ -891,17 +702,13 @@ func (lv *Liveness) prologue() {
 		// effects with the each prog effects.
 		for j := len(b.Values) - 1; j >= 0; j-- {
 			pos, e := lv.valueEffects(b.Values[j])
-			regUevar, regKill := lv.regEffects(b.Values[j])
 			if e&varkill != 0 {
-				be.varkill.vars.Set(pos)
-				be.uevar.vars.Unset(pos)
+				be.varkill.Set(pos)
+				be.uevar.Unset(pos)
 			}
-			be.varkill.regs |= regKill
-			be.uevar.regs &^= regKill
 			if e&uevar != 0 {
-				be.uevar.vars.Set(pos)
+				be.uevar.Set(pos)
 			}
-			be.uevar.regs |= regUevar
 		}
 	}
 }
@@ -911,8 +718,8 @@ func (lv *Liveness) solve() {
 	// These temporary bitvectors exist to avoid successive allocations and
 	// frees within the loop.
 	nvars := int32(len(lv.vars))
-	newlivein := varRegVec{vars: bvalloc(nvars)}
-	newliveout := varRegVec{vars: bvalloc(nvars)}
+	newlivein := bvalloc(nvars)
+	newliveout := bvalloc(nvars)
 
 	// Walk blocks in postorder ordering. This improves convergence.
 	po := lv.f.Postorder()
@@ -930,11 +737,11 @@ func (lv *Liveness) solve() {
 			switch b.Kind {
 			case ssa.BlockRet:
 				for _, pos := range lv.cache.retuevar {
-					newliveout.vars.Set(pos)
+					newliveout.Set(pos)
 				}
 			case ssa.BlockRetJmp:
 				for _, pos := range lv.cache.tailuevar {
-					newliveout.vars.Set(pos)
+					newliveout.Set(pos)
 				}
 			case ssa.BlockExit:
 				// panic exit - nothing to do
@@ -969,7 +776,7 @@ func (lv *Liveness) solve() {
 // variables at each safe point locations.
 func (lv *Liveness) epilogue() {
 	nvars := int32(len(lv.vars))
-	liveout := varRegVec{vars: bvalloc(nvars)}
+	liveout := bvalloc(nvars)
 	livedefer := bvalloc(nvars) // always-live variables
 
 	// If there is a defer (that could recover), then all output
@@ -1025,12 +832,11 @@ func (lv *Liveness) epilogue() {
 	{
 		// Reserve an entry for function entry.
 		live := bvalloc(nvars)
-		lv.livevars = append(lv.livevars, varRegVec{vars: live})
+		lv.livevars = append(lv.livevars, live)
 	}
 
 	for _, b := range lv.f.Blocks {
 		be := lv.blockEffects(b)
-		firstBitmapIndex := len(lv.livevars)
 
 		// Walk forward through the basic block instructions and
 		// allocate liveness maps for those instructions that need them.
@@ -1040,7 +846,7 @@ func (lv *Liveness) epilogue() {
 			}
 
 			live := bvalloc(nvars)
-			lv.livevars = append(lv.livevars, varRegVec{vars: live})
+			lv.livevars = append(lv.livevars, live)
 		}
 
 		// walk backward, construct maps at each safe point
@@ -1056,21 +862,18 @@ func (lv *Liveness) epilogue() {
 
 				live := &lv.livevars[index]
 				live.Or(*live, liveout)
-				live.vars.Or(live.vars, livedefer) // only for non-entry safe points
+				live.Or(*live, livedefer) // only for non-entry safe points
 				index--
 			}
 
 			// Update liveness information.
 			pos, e := lv.valueEffects(v)
-			regUevar, regKill := lv.regEffects(v)
 			if e&varkill != 0 {
-				liveout.vars.Unset(pos)
+				liveout.Unset(pos)
 			}
-			liveout.regs &^= regKill
 			if e&uevar != 0 {
-				liveout.vars.Set(pos)
+				liveout.Set(pos)
 			}
-			liveout.regs |= regUevar
 		}
 
 		if b == lv.f.Entry {
@@ -1080,7 +883,7 @@ func (lv *Liveness) epilogue() {
 
 			// Check to make sure only input variables are live.
 			for i, n := range lv.vars {
-				if !liveout.vars.Get(int32(i)) {
+				if !liveout.Get(int32(i)) {
 					continue
 				}
 				if n.Class() == PPARAM {
@@ -1094,32 +897,16 @@ func (lv *Liveness) epilogue() {
 			live.Or(*live, liveout)
 		}
 
-		// Check that no registers are live across calls.
-		// For closure calls, the CALLclosure is the last use
-		// of the context register, so it's dead after the call.
-		index = int32(firstBitmapIndex)
-		for _, v := range b.Values {
-			if lv.hasStackMap(v) {
-				live := lv.livevars[index]
-				if v.Op.IsCall() && live.regs != 0 {
-					lv.printDebug()
-					v.Fatalf("%v register %s recorded as live at call", lv.fn.Func.Nname, live.regs.niceString(lv.f.Config))
-				}
-				index++
-			}
-		}
-
 		// The liveness maps for this block are now complete. Compact them.
 		lv.compact(b)
 	}
 
 	// If we have an open-coded deferreturn call, make a liveness map for it.
 	if lv.fn.Func.OpenCodedDeferDisallowed() {
-		lv.livenessMap.deferreturn = LivenessInvalid
+		lv.livenessMap.deferreturn = LivenessDontCare
 	} else {
 		lv.livenessMap.deferreturn = LivenessIndex{
 			stackMapIndex: lv.stackMapSet.add(livedefer),
-			regMapIndex:   0, // entry regMap, containing no live registers
 			isUnsafePoint: false,
 		}
 	}
@@ -1136,20 +923,10 @@ func (lv *Liveness) epilogue() {
 			lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
 		}
 	}
-	if !go115ReduceLiveness {
-		// Check that no registers are live at function entry.
-		// The context register, if any, comes from a
-		// LoweredGetClosurePtr operation first thing in the function,
-		// so it doesn't appear live at entry.
-		if regs := lv.regMaps[0]; regs != 0 {
-			lv.printDebug()
-			lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
-		}
-	}
 }
 
 // Compact coalesces identical bitmaps from lv.livevars into the sets
-// lv.stackMapSet and lv.regMaps.
+// lv.stackMapSet.
 //
 // Compact clears lv.livevars.
 //
@@ -1165,45 +942,23 @@ func (lv *Liveness) epilogue() {
 // PCDATA tables cost about 100k. So for now we keep using a single index for
 // both bitmap lists.
 func (lv *Liveness) compact(b *ssa.Block) {
-	add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness
-		// Deduplicate the stack map.
-		stackIndex := lv.stackMapSet.add(live.vars)
-		// Deduplicate the register map.
-		regIndex, ok := lv.regMapSet[live.regs]
-		if !ok {
-			regIndex = len(lv.regMapSet)
-			lv.regMapSet[live.regs] = regIndex
-			lv.regMaps = append(lv.regMaps, live.regs)
-		}
-		return LivenessIndex{stackIndex, regIndex, isUnsafePoint}
-	}
 	pos := 0
 	if b == lv.f.Entry {
 		// Handle entry stack map.
-		if !go115ReduceLiveness {
-			add(lv.livevars[0], false)
-		} else {
-			lv.stackMapSet.add(lv.livevars[0].vars)
-		}
+		lv.stackMapSet.add(lv.livevars[0])
 		pos++
 	}
 	for _, v := range b.Values {
-		if go115ReduceLiveness {
-			hasStackMap := lv.hasStackMap(v)
-			isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
-			idx := LivenessIndex{StackMapDontCare, StackMapDontCare, isUnsafePoint}
-			if hasStackMap {
-				idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
-				pos++
-			}
-			if hasStackMap || isUnsafePoint {
-				lv.livenessMap.set(v, idx)
-			}
-		} else if lv.hasStackMap(v) {
-			isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
-			lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint))
+		hasStackMap := lv.hasStackMap(v)
+		isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
+		idx := LivenessIndex{StackMapDontCare, isUnsafePoint}
+		if hasStackMap {
+			idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos])
 			pos++
 		}
+		if hasStackMap || isUnsafePoint {
+			lv.livenessMap.set(v, idx)
+		}
 	}
 
 	// Reset livevars.
@@ -1250,8 +1005,8 @@ func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
 	Warnl(pos, s)
 }
 
-func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool {
-	if live.vars.IsEmpty() && live.regs == 0 {
+func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool {
+	if live.IsEmpty() {
 		return printed
 	}
 
@@ -1264,19 +1019,18 @@ func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool {
 
 	comma := ""
 	for i, n := range lv.vars {
-		if !live.vars.Get(int32(i)) {
+		if !live.Get(int32(i)) {
 			continue
 		}
 		fmt.Printf("%s%s", comma, n.Sym.Name)
 		comma = ","
 	}
-	fmt.Printf("%s%s", comma, live.regs.niceString(lv.f.Config))
 	return true
 }
 
-// printeffect is like printbvec, but for valueEffects and regEffects.
-func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, regMask liveRegMask) bool {
-	if !x && regMask == 0 {
+// printeffect is like printbvec, but for valueEffects.
+func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool {
+	if !x {
 		return printed
 	}
 	if !printed {
@@ -1288,15 +1042,7 @@ func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, re
 	if x {
 		fmt.Printf("%s", lv.vars[pos].Sym.Name)
 	}
-	for j, reg := range lv.f.Config.GCRegMap {
-		if regMask&(1<<uint(j)) != 0 {
-			if x {
-				fmt.Printf(",")
-			}
-			x = true
-			fmt.Printf("%v", reg)
-		}
-	}
+
 	return true
 }
 
@@ -1364,15 +1110,14 @@ func (lv *Liveness) printDebug() {
 			pcdata := lv.livenessMap.Get(v)
 
 			pos, effect := lv.valueEffects(v)
-			regUevar, regKill := lv.regEffects(v)
 			printed = false
-			printed = lv.printeffect(printed, "uevar", pos, effect&uevar != 0, regUevar)
-			printed = lv.printeffect(printed, "varkill", pos, effect&varkill != 0, regKill)
+			printed = lv.printeffect(printed, "uevar", pos, effect&uevar != 0)
+			printed = lv.printeffect(printed, "varkill", pos, effect&varkill != 0)
 			if printed {
 				fmt.Printf("\n")
 			}
 
-			if pcdata.StackMapValid() || pcdata.RegMapValid() {
+			if pcdata.StackMapValid() {
 				fmt.Printf("\tlive=")
 				printed = false
 				if pcdata.StackMapValid() {
@@ -1388,16 +1133,6 @@ func (lv *Liveness) printDebug() {
 						printed = true
 					}
 				}
-				if pcdata.RegMapValid() { // only if !go115ReduceLiveness
-					regLive := lv.regMaps[pcdata.regMapIndex]
-					if regLive != 0 {
-						if printed {
-							fmt.Printf(",")
-						}
-						fmt.Printf("%s", regLive.niceString(lv.f.Config))
-						printed = true
-					}
-				}
 				fmt.Printf("\n")
 			}
 
@@ -1423,7 +1158,7 @@ func (lv *Liveness) printDebug() {
 // first word dumped is the total number of bitmaps. The second word is the
 // length of the bitmaps. All bitmaps are assumed to be of equal length. The
 // remaining bytes are the raw bitmaps.
-func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
+func (lv *Liveness) emit() (argsSym, liveSym *obj.LSym) {
 	// Size args bitmaps to be just large enough to hold the largest pointer.
 	// First, find the largest Xoffset node we care about.
 	// (Nodes without pointers aren't in lv.vars; see livenessShouldTrack.)
@@ -1452,7 +1187,7 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
 	maxLocals := lv.stkptrsize
 
 	// Temporary symbols for encoding bitmaps.
-	var argsSymTmp, liveSymTmp, regsSymTmp obj.LSym
+	var argsSymTmp, liveSymTmp obj.LSym
 
 	args := bvalloc(int32(maxArgs / int64(Widthptr)))
 	aoff := duint32(&argsSymTmp, 0, uint32(len(lv.stackMaps))) // number of bitmaps
@@ -1472,24 +1207,6 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
 		loff = dbvec(&liveSymTmp, loff, locals)
 	}
 
-	if !go115ReduceLiveness {
-		regs := bvalloc(lv.usedRegs())
-		roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
-		roff = duint32(&regsSymTmp, roff, uint32(regs.n))        // number of bits in each bitmap
-		if regs.n > 32 {
-			// Our uint32 conversion below won't work.
-			Fatalf("GP registers overflow uint32")
-		}
-
-		if regs.n > 0 {
-			for _, live := range lv.regMaps {
-				regs.Clear()
-				regs.b[0] = uint32(live)
-				roff = dbvec(&regsSymTmp, roff, regs)
-			}
-		}
-	}
-
 	// Give these LSyms content-addressable names,
 	// so that they can be de-duplicated.
 	// This provides significant binary size savings.
@@ -1502,11 +1219,7 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
 			lsym.Set(obj.AttrContentAddressable, true)
 		})
 	}
-	if !go115ReduceLiveness {
-		return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
-	}
-	// TODO(go115ReduceLiveness): Remove regsSym result
-	return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil
+	return makeSym(&argsSymTmp), makeSym(&liveSymTmp)
 }
 
 // Entry pointer for liveness analysis. Solves for the liveness of
@@ -1553,7 +1266,7 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
 	// Emit the live pointer map data structures
 	ls := e.curfn.Func.lsym
 	fninfo := ls.Func()
-	fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit()
+	fninfo.GCArgs, fninfo.GCLocals = lv.emit()
 
 	p := pp.Prog(obj.AFUNCDATA)
 	Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps)
@@ -1567,14 +1280,6 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
 	p.To.Name = obj.NAME_EXTERN
 	p.To.Sym = fninfo.GCLocals
 
-	if !go115ReduceLiveness {
-		p = pp.Prog(obj.AFUNCDATA)
-		Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = fninfo.GCRegs
-	}
-
 	return lv.livenessMap
 }
 
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 7388e4e3e8..67484904a9 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -6265,7 +6265,7 @@ func genssa(f *ssa.Func, pp *Progs) {
 		// instruction. We won't use the actual liveness map on a
 		// control instruction. Just mark it something that is
 		// preemptible, unless this function is "all unsafe".
-		s.pp.nextLive = LivenessIndex{-1, -1, allUnsafe(f)}
+		s.pp.nextLive = LivenessIndex{-1, allUnsafe(f)}
 
 		// Emit values in block
 		thearch.SSAMarkMoves(&s, b)
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index c652e3adbb..8c8ff587ff 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -460,7 +460,6 @@ type FuncInfo struct {
 
 	GCArgs             *LSym
 	GCLocals           *LSym
-	GCRegs             *LSym // Only if !go115ReduceLiveness
 	StackObjects       *LSym
 	OpenCodedDeferInfo *LSym
 
diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go
index eb54c67f6a..2b096996f7 100644
--- a/src/cmd/internal/obj/plist.go
+++ b/src/cmd/internal/obj/plist.go
@@ -178,7 +178,7 @@ func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
 // Prog generated.
 func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
 	pcdata := ctxt.EmitEntryStackMap(s, p, newprog)
-	pcdata = ctxt.EmitEntryRegMap(s, pcdata, newprog)
+	pcdata = ctxt.EmitEntryUnsafePoint(s, pcdata, newprog)
 	return pcdata
 }
 
@@ -195,13 +195,13 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
 	return pcdata
 }
 
-// Similar to EmitEntryLiveness, but just emit register map.
-func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
+// Similar to EmitEntryLiveness, but just emit unsafe point map.
+func (ctxt *Link) EmitEntryUnsafePoint(s *LSym, p *Prog, newprog ProgAlloc) *Prog {
 	pcdata := Appendp(p, newprog)
 	pcdata.Pos = s.Func().Text.Pos
 	pcdata.As = APCDATA
 	pcdata.From.Type = TYPE_CONST
-	pcdata.From.Offset = objabi.PCDATA_RegMapIndex
+	pcdata.From.Offset = objabi.PCDATA_UnsafePoint
 	pcdata.To.Type = TYPE_CONST
 	pcdata.To.Offset = -1
 
@@ -216,9 +216,9 @@ func (ctxt *Link) StartUnsafePoint(p *Prog, newprog ProgAlloc) *Prog {
 	pcdata := Appendp(p, newprog)
 	pcdata.As = APCDATA
 	pcdata.From.Type = TYPE_CONST
-	pcdata.From.Offset = objabi.PCDATA_RegMapIndex
+	pcdata.From.Offset = objabi.PCDATA_UnsafePoint
 	pcdata.To.Type = TYPE_CONST
-	pcdata.To.Offset = objabi.PCDATA_RegMapUnsafe
+	pcdata.To.Offset = objabi.PCDATA_UnsafePointUnsafe
 
 	return pcdata
 }
@@ -231,7 +231,7 @@ func (ctxt *Link) EndUnsafePoint(p *Prog, newprog ProgAlloc, oldval int64) *Prog
 	pcdata := Appendp(p, newprog)
 	pcdata.As = APCDATA
 	pcdata.From.Type = TYPE_CONST
-	pcdata.From.Offset = objabi.PCDATA_RegMapIndex
+	pcdata.From.Offset = objabi.PCDATA_UnsafePoint
 	pcdata.To.Type = TYPE_CONST
 	pcdata.To.Offset = oldval
 
@@ -257,11 +257,11 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is
 	prevPcdata := int64(-1) // entry PC data value
 	prevRestart := int64(0)
 	for p := prev.Link; p != nil; p, prev = p.Link, p {
-		if p.As == APCDATA && p.From.Offset == objabi.PCDATA_RegMapIndex {
+		if p.As == APCDATA && p.From.Offset == objabi.PCDATA_UnsafePoint {
 			prevPcdata = p.To.Offset
 			continue
 		}
-		if prevPcdata == objabi.PCDATA_RegMapUnsafe {
+		if prevPcdata == objabi.PCDATA_UnsafePointUnsafe {
 			continue // already unsafe
 		}
 		if isUnsafePoint(p) {
@@ -288,7 +288,7 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is
 			q := Appendp(prev, newprog)
 			q.As = APCDATA
 			q.From.Type = TYPE_CONST
-			q.From.Offset = objabi.PCDATA_RegMapIndex
+			q.From.Offset = objabi.PCDATA_UnsafePoint
 			q.To.Type = TYPE_CONST
 			q.To.Offset = val
 			q.Pc = p.Pc
@@ -305,7 +305,7 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is
 			p = Appendp(p, newprog)
 			p.As = APCDATA
 			p.From.Type = TYPE_CONST
-			p.From.Offset = objabi.PCDATA_RegMapIndex
+			p.From.Offset = objabi.PCDATA_UnsafePoint
 			p.To.Type = TYPE_CONST
 			p.To.Offset = prevPcdata
 			p.Pc = p.Link.Pc
diff --git a/src/cmd/internal/objabi/funcdata.go b/src/cmd/internal/objabi/funcdata.go
index c9480bf2f0..1c5e5e1c8c 100644
--- a/src/cmd/internal/objabi/funcdata.go
+++ b/src/cmd/internal/objabi/funcdata.go
@@ -11,14 +11,12 @@ package objabi
 // ../../../runtime/symtab.go.
 
 const (
-	PCDATA_RegMapIndex   = 0 // if !go115ReduceLiveness
-	PCDATA_UnsafePoint   = 0 // if go115ReduceLiveness
+	PCDATA_UnsafePoint   = 0
 	PCDATA_StackMapIndex = 1
 	PCDATA_InlTreeIndex  = 2
 
 	FUNCDATA_ArgsPointerMaps    = 0
 	FUNCDATA_LocalsPointerMaps  = 1
-	FUNCDATA_RegPointerMaps     = 2 // if !go115ReduceLiveness
 	FUNCDATA_StackObjects       = 3
 	FUNCDATA_InlTree            = 4
 	FUNCDATA_OpenCodedDeferInfo = 5
@@ -32,11 +30,6 @@ const (
 
 // Special PCDATA values.
 const (
-	// PCDATA_RegMapIndex values.
-	//
-	// Only if !go115ReduceLiveness.
-	PCDATA_RegMapUnsafe = PCDATA_UnsafePointUnsafe // Unsafe for async preemption
-
 	// PCDATA_UnsafePoint values.
 	PCDATA_UnsafePointSafe   = -1 // Safe for async preemption
 	PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption
-- 
cgit v1.3