cmd/internal/obj: use prefix insn in MOV* opcodes for GOPPC64=power10

As background, Power10 adds prefixed load, store, and add immediate instructions which encode 34b signed displacements. Likewise, they also give the option to compute addresses against the PC. This enables using simpler PC relative (PC-rel) relocations instead of maintaining a dedicated pointer (the TOC) to the code/data blob on PPC64/linux. Similary, there are several Go opcodes where it can be advantageous to use prefixed instructions instead of composite sequences like oris/ori/add to implement "MOVD <big const>, Rx" or "ADD <big const>, Rx, Ry", or large offset load/stores like "MOVD <big constant>(Rx), Ry" using the same framework which dynamically configures optab. When selecting prefixed instruction forms, the assembler must also use new relocations. These new relocations are always PC-rel by design, thus code assembled as such has no implicit requirement to maintain a TOC pointer when assembling shared objects. Thus, we can safely avoid situations where some Go objects use a TOC pointer, and some do not. This greatly simplifies linking Go objects. For more details about the challenges of linking TOC and PC-rel compiled code, see the PPC64 ELFv2 ABI. The TOC pointer in R2 is still maintained in those build configurations which previously required it (e.x buildmode=pie). However, Go code built with PC-rel relocations does not require the TOC pointer. A future change could remove the overhead of maintaining a TOC pointer in those build configurations. This is enabled only for power10/ppc64le/linux. A final noteworthy difference between the prefixed and regular load/store instruction forms is the removal of the DS/DQ form restrictions. That is, the immediate operand does not need to be aligned. Updates #44549 Change-Id: If59c216d203c3eed963bfa08855e21771e6ed669 Reviewed-on: https://go-review.googlesource.com/c/go/+/355150 Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Paul Murphy <murp@ibm.com>
author: Paul E. Murphy <murp@ibm.com> 2021-03-09 16:55:22 -0600
committer: Paul Murphy <murp@ibm.com> 2023-02-28 15:06:46 +0000
commit: 72301a9863fb43ff26e9779a086e02cf02031ceb (patch)
tree: b7e140db740e0b104df15ba90f1de8a74b930d69 /src/cmd/internal
parent: e8fbad5de87f34d2e7632f94cac418c7436174ce (diff)
download: go-72301a9863fb43ff26e9779a086e02cf02031ceb.tar.xz
2 files changed, 269 insertions, 56 deletions
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 9469edaf4c..40258ca0b2 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -34,6 +34,7 @@ import (
 	"cmd/internal/objabi"
 	"encoding/binary"
 	"fmt"
+	"internal/buildcfg"
 	"log"
 	"math"
 	"math/bits"
@@ -58,6 +59,12 @@ const (
 	r0iszero = 1
 )
 
+const (
+	// R bit option in prefixed load/store/add D-form operations
+	PFX_R_ABS   = 0 // Offset is absolute
+	PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0
+)
+
 type Optab struct {
 	as    obj.As // Opcode
 	a1    uint8  // p.From argument (obj.Addr). p is of type obj.Prog.
@@ -108,8 +115,6 @@ var optab = []Optab{
 	{as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4},
 	{as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8},
 	{as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8},
-	{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12},
-	{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12},
 	{as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
 	{as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4},
 	{as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
@@ -211,64 +216,42 @@ var optab = []Optab{
 	{as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
 	{as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
 
-	{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12},
-	{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12},
 	{as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8},
 	{as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8},
-	{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
 	{as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-	{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
 	{as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
 	{as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4},
 
-	{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
-	{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
 	{as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
 	{as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
-	{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
 	{as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-	{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
 	{as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
 	{as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4},
 
 	{as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
 	{as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
 	{as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-	{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
 	{as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-	{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-	{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
 	{as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
 	{as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
 	{as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8},
-	{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
-	{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8},
-	{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12},
 	{as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-	{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
 	{as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
 	{as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
 	{as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8},
-	{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
 	{as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
 	{as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4},
 
 	{as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
 	{as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
 	{as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-	{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
 	{as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-	{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-	{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
 	{as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4},
 	{as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
-	{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
 	{as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
 	{as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-	{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
 	{as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4},
 	{as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-	{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
 	{as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
 	{as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
 	{as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4},
@@ -276,14 +259,10 @@ var optab = []Optab{
 	{as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8},
 	{as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4},
 	{as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4},
-	{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8},
 	{as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4},
-	{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8},
 	{as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4},
 	{as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4},
 	{as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4},
-	{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8},
-	{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8},
 
 	{as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4},
 	{as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4},
@@ -535,10 +514,71 @@ var optab = []Optab{
 	{as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0},   // align code
 }
 
+// These are opcodes above which may generate different sequences depending on whether prefix opcode support
+// is available
+type PrefixableOptab struct {
+	Optab
+	minGOPPC64 int  // Minimum GOPPC64 required to support this.
+	pfxsize    int8 // Instruction sequence size when prefixed opcodes are used
+}
+
+// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate
+// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add).
+//
+// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative
+// sequence. It also encompasses several transformations which do not involve relocations, those could be
+// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding
+// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x
+//
+//	MOVD something-byte-aligned(Rx), Ry
+//	MOVD 3(Rx), Ry
+//
+// is allowed when the prefixed forms are used.
+//
+// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant.
+var prefixableOptab = []PrefixableOptab{
+	{Optab: Optab{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12},
+	{Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+	{Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+	{Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12},
+	{Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12},
+	{Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+	{Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+	{Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+	{Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+	{Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+}
+
 var oprange [ALAST & obj.AMask][]Optab
 
 var xcmp [C_NCLASS][C_NCLASS]bool
 
+var pfxEnabled = false // ISA 3.1 prefixed instructions are supported.
+var buildOpCfg = ""    // Save the os/cpu/arch tuple used to configure the assembler in buildop
+
 // padding bytes to add to align code as requested.
 func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
 	// For 16 and 32 byte alignment, there is a tradeoff
@@ -1256,12 +1296,34 @@ func opset(a, b0 obj.As) {
 
 // Build the opcode table
 func buildop(ctxt *obj.Link) {
-	if oprange[AANDN&obj.AMask] != nil {
-		// Already initialized; stop now.
+	// PC-rel relocation support is available only for targets which support
+	// ELFv2 1.5 (only power10/ppc64le/linux today).
+	pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" && buildcfg.GOARCH == "ppc64le"
+	cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS)
+	if cfg == buildOpCfg {
+		// Already initialized to correct OS/cpu; stop now.
 		// This happens in the cmd/asm tests,
 		// each of which re-initializes the arch.
 		return
 	}
+	buildOpCfg = cfg
+
+	// Configure the optab entries which may generate prefix opcodes.
+	prefixOptab := make([]Optab, 0, len(prefixableOptab))
+	for _, entry := range prefixableOptab {
+		entry := entry
+		if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 {
+			// Enable prefix opcode generation and resize.
+			entry.ispfx = true
+			entry.size = entry.pfxsize
+		}
+		// Use the legacy assembler function if none provided.
+		if entry.asmout == nil {
+			entry.asmout = asmout
+		}
+		prefixOptab = append(prefixOptab, entry.Optab)
+
+	}
 
 	for i := 0; i < C_NCLASS; i++ {
 		for n := 0; n < C_NCLASS; n++ {
@@ -1278,7 +1340,9 @@ func buildop(ctxt *obj.Link) {
 	}
 	// Append the generated entries, sort, and fill out oprange.
 	optab = append(optab, optabGen...)
+	optab = append(optab, prefixOptab...)
 	sort.Slice(optab, optabLess)
+
 	for i := 0; i < len(optab); {
 		r := optab[i].as
 		r0 := r & obj.AMask
@@ -2227,6 +2291,13 @@ func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
 	return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
 }
 
+func AOP_PFX_00_8LS(r, ie uint32) uint32 {
+	return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+func AOP_PFX_10_MLS(r, ie uint32) uint32 {
+	return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+
 const (
 	/* each rhs is OPVCC(_, _, _, _) */
 	OP_ADD      = 31<<26 | 266<<1 | 0<<10 | 0
@@ -2266,6 +2337,52 @@ const (
 	OP_EXTSWSLI = 31<<26 | 445<<2
 )
 
+func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) {
+	return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32))
+}
+
+func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+	switch a {
+	case AMOVH:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0)
+	case AMOVW:
+		return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0)
+	case AMOVD:
+		return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0)
+	case AMOVBZ, AMOVB:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0)
+	case AMOVHZ:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0)
+	case AMOVWZ:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0)
+	case AFMOVS:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0)
+	case AFMOVD:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0)
+	}
+	log.Fatalf("Error no pfxload for %v\n", a)
+	return 0, 0
+}
+
+func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+	switch a {
+	case AMOVD:
+		return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0)
+	case AMOVBZ, AMOVB:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0)
+	case AMOVHZ, AMOVH:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0)
+	case AMOVWZ, AMOVW:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0)
+	case AFMOVS:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0)
+	case AFMOVD:
+		return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0)
+	}
+	log.Fatalf("Error no pfxstore for %v\n", a)
+	return 0, 0
+}
+
 func oclass(a *obj.Addr) int {
 	return int(a.Class) - 1
 }
@@ -2324,7 +2441,7 @@ func (c *ctxt9) opform(insn uint32) int {
 
 // Encode instructions and create relocation for accessing s+d according to the
 // instruction op with source or destination (as appropriate) register reg.
-func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) {
+func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) {
 	if c.ctxt.Headtype == objabi.Haix {
 		// Every symbol access must be made via a TOC anchor.
 		c.ctxt.Diag("symbolAccess called for %s", s.Name)
@@ -2345,7 +2462,7 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse b
 		o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0)
 		o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0)
 	}
-	rel := obj.Addrel(c.cursym)
+	rel = obj.Addrel(c.cursym)
 	rel.Off = int32(c.pc)
 	rel.Siz = 8
 	rel.Sym = s
@@ -2841,8 +2958,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 
 	case 19: /* mov $lcon,r ==> cau+or */
 		d := c.vregoff(&p.From)
-		o1 = loadu32(int(p.To.Reg), d)
-		o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+		if o.ispfx {
+			o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d)
+		} else {
+			o1 = loadu32(int(p.To.Reg), d)
+			o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+		}
 
 	case 20: /* add $ucon,,r | addis $addcon,r,r */
 		v := c.regoff(&p.From)
@@ -2883,6 +3004,10 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 			o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
 		}
 
+		if o.ispfx {
+			o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d)
+		}
+
 	case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */
 		if p.To.Reg == REGTMP || p.Reg == REGTMP {
 			c.ctxt.Diag("can't synthesize large constant\n%v", p)
@@ -2962,11 +3087,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 	case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
 		v := c.vregoff(&p.From)
 		r := int(p.From.Reg)
+		var rel *obj.Reloc
 
 		switch p.From.Name {
 		case obj.NAME_EXTERN, obj.NAME_STATIC:
 			// Load a 32 bit constant, or relocation depending on if a symbol is attached
-			o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
+			o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
 		default:
 			if r == 0 {
 				r = c.getimpliedreg(&p.From, p)
@@ -2976,6 +3102,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 			o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
 		}
 
+		if o.ispfx {
+			if rel == nil {
+				o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v)
+			} else {
+				o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0)
+				rel.Type = objabi.R_ADDRPOWER_PCREL34
+			}
+		}
+
 	case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */
 		v := c.regoff(p.GetFrom3())
 
@@ -3118,12 +3253,18 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 			r = c.getimpliedreg(&p.To, p)
 		}
 		// Offsets in DS form stores must be a multiple of 4
-		inst := c.opstore(p.As)
-		if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-			log.Fatalf("invalid offset for DS form load/store %v", p)
+		if o.ispfx {
+			o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS)
+			o1 |= uint32((v >> 16) & 0x3FFFF)
+			o2 |= uint32(v & 0xFFFF)
+		} else {
+			inst := c.opstore(p.As)
+			if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+				log.Fatalf("invalid offset for DS form load/store %v", p)
+			}
+			o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
+			o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
 		}
-		o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
-		o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
 
 	case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */
 		v := c.regoff(&p.From)
@@ -3132,8 +3273,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 		if r == 0 {
 			r = c.getimpliedreg(&p.From, p)
 		}
-		o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
-		o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+
+		if o.ispfx {
+			o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS)
+			o1 |= uint32((v >> 16) & 0x3FFFF)
+			o2 |= uint32(v & 0xFFFF)
+		} else {
+			o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
+			o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+		}
 
 		// Sign extend MOVB if needed
 		o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
@@ -3484,23 +3632,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 
 	/* relocation operations */
 	case 74:
+		var rel *obj.Reloc
 		v := c.vregoff(&p.To)
 		// Offsets in DS form stores must be a multiple of 4
 		inst := c.opstore(p.As)
-		if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+
+		// Can't reuse base for store instructions.
+		o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
+
+		// Rewrite as a prefixed store if supported.
+		if o.ispfx {
+			o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL)
+			rel.Type = objabi.R_ADDRPOWER_PCREL34
+		} else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
 			log.Fatalf("invalid offset for DS form load/store %v", p)
 		}
-		// Can't reuse base for store instructions.
-		o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
 
 	case 75: // 32 bit offset symbol loads (got/toc/addr)
+		var rel *obj.Reloc
 		v := p.From.Offset
 
 		// Offsets in DS form loads must be a multiple of 4
 		inst := c.opload(p.As)
-		if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-			log.Fatalf("invalid offset for DS form load/store %v", p)
-		}
 		switch p.From.Name {
 		case obj.NAME_GOTREF, obj.NAME_TOCREF:
 			if v != 0 {
@@ -3508,7 +3661,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 			}
 			o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
 			o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0)
-			rel := obj.Addrel(c.cursym)
+			rel = obj.Addrel(c.cursym)
 			rel.Off = int32(c.pc)
 			rel.Siz = 8
 			rel.Sym = p.From.Sym
@@ -3521,7 +3674,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 		default:
 			reuseBaseReg := p.As != AFMOVD && p.As != AFMOVS
 			// Reuse To.Reg as base register if not FP move.
-			o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+			o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+		}
+
+		// Convert to prefixed forms if supported.
+		if o.ispfx {
+			switch rel.Type {
+			case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS,
+				objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS:
+				o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+				rel.Type = objabi.R_ADDRPOWER_PCREL34
+			case objabi.R_POWER_TLS_IE:
+				o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+				rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+			case objabi.R_ADDRPOWER_GOT:
+				o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+				rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34
+			default:
+				// We've failed to convert a TOC-relative relocation to a PC-relative one.
+				log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type)
+			}
+		} else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+			log.Fatalf("invalid offset for DS form load/store %v", p)
 		}
 
 		o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
@@ -3530,26 +3704,36 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
 		if p.From.Offset != 0 {
 			c.ctxt.Diag("invalid offset against tls var %v", p)
 		}
-		o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
-		o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
 		rel := obj.Addrel(c.cursym)
 		rel.Off = int32(c.pc)
 		rel.Siz = 8
 		rel.Sym = p.From.Sym
-		rel.Type = objabi.R_POWER_TLS_LE
+		if !o.ispfx {
+			o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
+			o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+			rel.Type = objabi.R_POWER_TLS_LE
+		} else {
+			o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0)
+			rel.Type = objabi.R_POWER_TLS_LE_TPREL34
+		}
 
 	case 80:
 		if p.From.Offset != 0 {
 			c.ctxt.Diag("invalid offset against tls var %v", p)
 		}
-		o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
-		o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
-		o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
 		rel := obj.Addrel(c.cursym)
 		rel.Off = int32(c.pc)
 		rel.Siz = 8
 		rel.Sym = p.From.Sym
 		rel.Type = objabi.R_POWER_TLS_IE
+		if !o.ispfx {
+			o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
+			o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
+		} else {
+			o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+			rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+		}
+		o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
 		rel = obj.Addrel(c.cursym)
 		rel.Off = int32(c.pc) + 8
 		rel.Siz = 4
diff --git a/src/cmd/internal/obj/ppc64/doc.go b/src/cmd/internal/obj/ppc64/doc.go
index 28340e425d..835182bcc6 100644
--- a/src/cmd/internal/obj/ppc64/doc.go
+++ b/src/cmd/internal/obj/ppc64/doc.go
@@ -250,5 +250,34 @@ Register names:
 	CRnGT represents CR bit 1 of CR field n. (0-7)
 	CRnEQ represents CR bit 2 of CR field n. (0-7)
 	CRnSO represents CR bit 3 of CR field n. (0-7)
+
+# GOPPC64 >= power10 and its effects on Go asm
+
+When GOPPC64=power10 is used to compile a Go program for ppc64le/linux, MOV*, FMOV*, and ADD
+opcodes which would require 2 or more machine instructions to emulate a 32 bit constant, or
+symbolic reference are implemented using prefixed instructions.
+
+A user who wishes granular control over the generated machine code is advised to use Go asm
+opcodes which explicitly translate to one PPC64 machine instruction. Most common opcodes
+are supported.
+
+Some examples of how pseudo-op assembly changes with GOPPC64:
+
+	Go asm                       GOPPC64 <= power9          GOPPC64 >= power10
+	MOVD mypackage·foo(SB), R3   addis r2, r3, ...          pld r3, ...
+	                             ld    r3, r3, ...
+
+	MOVD 131072(R3), R4          addis r31, r4, 2           pld r4, 131072(r3)
+	                             ld    r4, 0(R3)
+
+	ADD $131073, R3              lis  r31, 2                paddi r3, r3, 131073
+	                             addi r31, 1
+	                             add  r3,r31,r3
+
+	MOVD $131073, R3             lis  r3, 2                 pli r3, 131073
+	                             addi r3, 1
+
+	MOVD $mypackage·foo(SB), R3  addis r2, r3, ...          pla r3, ...
+	                             addi  r3, r3, ...
 */
 package ppc64
author	Paul E. Murphy <murp@ibm.com>	2021-03-09 16:55:22 -0600
committer	Paul Murphy <murp@ibm.com>	2023-02-28 15:06:46 +0000
commit	72301a9863fb43ff26e9779a086e02cf02031ceb (patch)
tree	b7e140db740e0b104df15ba90f1de8a74b930d69 /src/cmd/internal
parent	e8fbad5de87f34d2e7632f94cac418c7436174ce (diff)
download	go-72301a9863fb43ff26e9779a086e02cf02031ceb.tar.xz