diff options
| author | Paul E. Murphy <murp@ibm.com> | 2021-03-09 16:55:22 -0600 |
|---|---|---|
| committer | Paul Murphy <murp@ibm.com> | 2023-02-28 15:06:46 +0000 |
| commit | 72301a9863fb43ff26e9779a086e02cf02031ceb (patch) | |
| tree | b7e140db740e0b104df15ba90f1de8a74b930d69 /src/cmd/internal | |
| parent | e8fbad5de87f34d2e7632f94cac418c7436174ce (diff) | |
| download | go-72301a9863fb43ff26e9779a086e02cf02031ceb.tar.xz | |
cmd/internal/obj: use prefix insn in MOV* opcodes for GOPPC64=power10
As background, Power10 adds prefixed load, store, and add immediate
instructions which encode 34b signed displacements. Likewise, they
also give the option to compute addresses against the PC. This enables
using simpler PC relative (PC-rel) relocations instead of maintaining a
dedicated pointer (the TOC) to the code/data blob on PPC64/linux.
Similary, there are several Go opcodes where it can be advantageous to
use prefixed instructions instead of composite sequences like oris/ori/add
to implement "MOVD <big const>, Rx" or "ADD <big const>, Rx, Ry", or
large offset load/stores like "MOVD <big constant>(Rx), Ry" using the same
framework which dynamically configures optab.
When selecting prefixed instruction forms, the assembler must also use
new relocations. These new relocations are always PC-rel by design, thus
code assembled as such has no implicit requirement to maintain a TOC
pointer when assembling shared objects. Thus, we can safely avoid
situations where some Go objects use a TOC pointer, and some do not. This
greatly simplifies linking Go objects. For more details about the
challenges of linking TOC and PC-rel compiled code, see the PPC64 ELFv2
ABI.
The TOC pointer in R2 is still maintained in those build configurations
which previously required it (e.x buildmode=pie). However, Go code built
with PC-rel relocations does not require the TOC pointer. A future
change could remove the overhead of maintaining a TOC pointer in those
build configurations.
This is enabled only for power10/ppc64le/linux.
A final noteworthy difference between the prefixed and regular load/store
instruction forms is the removal of the DS/DQ form restrictions. That
is, the immediate operand does not need to be aligned.
Updates #44549
Change-Id: If59c216d203c3eed963bfa08855e21771e6ed669
Reviewed-on: https://go-review.googlesource.com/c/go/+/355150
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
Diffstat (limited to 'src/cmd/internal')
| -rw-r--r-- | src/cmd/internal/obj/ppc64/asm9.go | 296 | ||||
| -rw-r--r-- | src/cmd/internal/obj/ppc64/doc.go | 29 |
2 files changed, 269 insertions, 56 deletions
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9469edaf4c..40258ca0b2 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -34,6 +34,7 @@ import ( "cmd/internal/objabi" "encoding/binary" "fmt" + "internal/buildcfg" "log" "math" "math/bits" @@ -58,6 +59,12 @@ const ( r0iszero = 1 ) +const ( + // R bit option in prefixed load/store/add D-form operations + PFX_R_ABS = 0 // Offset is absolute + PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0 +) + type Optab struct { as obj.As // Opcode a1 uint8 // p.From argument (obj.Addr). p is of type obj.Prog. @@ -108,8 +115,6 @@ var optab = []Optab{ {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4}, {as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8}, {as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8}, - {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, - {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, {as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4}, {as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4}, {as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, @@ -211,64 +216,42 @@ var optab = []Optab{ {as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4}, {as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4}, - {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, - {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8}, {as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8}, - {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4}, - {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, - {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, {as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, {as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, - {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4}, {as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, {as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, - {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, {as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, {as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, {as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8}, - {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, - {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, - {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, {as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, - {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, {as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8}, - {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, {as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4}, {as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, {as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, - {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, {as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4}, {as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, - {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, {as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, {as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, - {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4}, {as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, {as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4}, @@ -276,14 +259,10 @@ var optab = []Optab{ {as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8}, {as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4}, {as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4}, - {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, {as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4}, - {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, {as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4}, {as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4}, {as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4}, - {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, - {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, {as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4}, {as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4}, @@ -535,10 +514,71 @@ var optab = []Optab{ {as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0}, // align code } +// These are opcodes above which may generate different sequences depending on whether prefix opcode support +// is available +type PrefixableOptab struct { + Optab + minGOPPC64 int // Minimum GOPPC64 required to support this. + pfxsize int8 // Instruction sequence size when prefixed opcodes are used +} + +// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate +// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add). +// +// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative +// sequence. It also encompasses several transformations which do not involve relocations, those could be +// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding +// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x +// +// MOVD something-byte-aligned(Rx), Ry +// MOVD 3(Rx), Ry +// +// is allowed when the prefixed forms are used. +// +// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant. +var prefixableOptab = []PrefixableOptab{ + {Optab: Optab{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, +} + var oprange [ALAST & obj.AMask][]Optab var xcmp [C_NCLASS][C_NCLASS]bool +var pfxEnabled = false // ISA 3.1 prefixed instructions are supported. +var buildOpCfg = "" // Save the os/cpu/arch tuple used to configure the assembler in buildop + // padding bytes to add to align code as requested. func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // For 16 and 32 byte alignment, there is a tradeoff @@ -1256,12 +1296,34 @@ func opset(a, b0 obj.As) { // Build the opcode table func buildop(ctxt *obj.Link) { - if oprange[AANDN&obj.AMask] != nil { - // Already initialized; stop now. + // PC-rel relocation support is available only for targets which support + // ELFv2 1.5 (only power10/ppc64le/linux today). + pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" && buildcfg.GOARCH == "ppc64le" + cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS) + if cfg == buildOpCfg { + // Already initialized to correct OS/cpu; stop now. // This happens in the cmd/asm tests, // each of which re-initializes the arch. return } + buildOpCfg = cfg + + // Configure the optab entries which may generate prefix opcodes. + prefixOptab := make([]Optab, 0, len(prefixableOptab)) + for _, entry := range prefixableOptab { + entry := entry + if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 { + // Enable prefix opcode generation and resize. + entry.ispfx = true + entry.size = entry.pfxsize + } + // Use the legacy assembler function if none provided. + if entry.asmout == nil { + entry.asmout = asmout + } + prefixOptab = append(prefixOptab, entry.Optab) + + } for i := 0; i < C_NCLASS; i++ { for n := 0; n < C_NCLASS; n++ { @@ -1278,7 +1340,9 @@ func buildop(ctxt *obj.Link) { } // Append the generated entries, sort, and fill out oprange. optab = append(optab, optabGen...) + optab = append(optab, prefixOptab...) sort.Slice(optab, optabLess) + for i := 0; i < len(optab); { r := optab[i].as r0 := r & obj.AMask @@ -2227,6 +2291,13 @@ func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } +func AOP_PFX_00_8LS(r, ie uint32) uint32 { + return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF) +} +func AOP_PFX_10_MLS(r, ie uint32) uint32 { + return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF) +} + const ( /* each rhs is OPVCC(_, _, _, _) */ OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 @@ -2266,6 +2337,52 @@ const ( OP_EXTSWSLI = 31<<26 | 445<<2 ) +func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) { + return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32)) +} + +func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) { + switch a { + case AMOVH: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0) + case AMOVW: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0) + case AMOVD: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0) + case AMOVBZ, AMOVB: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0) + case AMOVHZ: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0) + case AMOVWZ: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0) + case AFMOVS: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0) + case AFMOVD: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0) + } + log.Fatalf("Error no pfxload for %v\n", a) + return 0, 0 +} + +func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) { + switch a { + case AMOVD: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0) + case AMOVBZ, AMOVB: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0) + case AMOVHZ, AMOVH: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0) + case AMOVWZ, AMOVW: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0) + case AFMOVS: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0) + case AFMOVD: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0) + } + log.Fatalf("Error no pfxstore for %v\n", a) + return 0, 0 +} + func oclass(a *obj.Addr) int { return int(a.Class) - 1 } @@ -2324,7 +2441,7 @@ func (c *ctxt9) opform(insn uint32) int { // Encode instructions and create relocation for accessing s+d according to the // instruction op with source or destination (as appropriate) register reg. -func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) { +func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) { if c.ctxt.Headtype == objabi.Haix { // Every symbol access must be made via a TOC anchor. c.ctxt.Diag("symbolAccess called for %s", s.Name) @@ -2345,7 +2462,7 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse b o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0) o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0) } - rel := obj.Addrel(c.cursym) + rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = s @@ -2841,8 +2958,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { case 19: /* mov $lcon,r ==> cau+or */ d := c.vregoff(&p.From) - o1 = loadu32(int(p.To.Reg), d) - o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) + if o.ispfx { + o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d) + } else { + o1 = loadu32(int(p.To.Reg), d) + o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) + } case 20: /* add $ucon,,r | addis $addcon,r,r */ v := c.regoff(&p.From) @@ -2883,6 +3004,10 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) } + if o.ispfx { + o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d) + } + case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p) @@ -2962,11 +3087,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */ v := c.vregoff(&p.From) r := int(p.From.Reg) + var rel *obj.Reloc switch p.From.Name { case obj.NAME_EXTERN, obj.NAME_STATIC: // Load a 32 bit constant, or relocation depending on if a symbol is attached - o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true) + o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true) default: if r == 0 { r = c.getimpliedreg(&p.From, p) @@ -2976,6 +3102,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) } + if o.ispfx { + if rel == nil { + o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v) + } else { + o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + } + } + case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */ v := c.regoff(p.GetFrom3()) @@ -3118,12 +3253,18 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { r = c.getimpliedreg(&p.To, p) } // Offsets in DS form stores must be a multiple of 4 - inst := c.opstore(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { - log.Fatalf("invalid offset for DS form load/store %v", p) + if o.ispfx { + o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS) + o1 |= uint32((v >> 16) & 0x3FFFF) + o2 |= uint32(v & 0xFFFF) + } else { + inst := c.opstore(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v)) } - o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v))) - o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v)) case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */ v := c.regoff(&p.From) @@ -3132,8 +3273,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { if r == 0 { r = c.getimpliedreg(&p.From, p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v))) - o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + + if o.ispfx { + o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS) + o1 |= uint32((v >> 16) & 0x3FFFF) + o2 |= uint32(v & 0xFFFF) + } else { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + } // Sign extend MOVB if needed o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) @@ -3484,23 +3632,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { /* relocation operations */ case 74: + var rel *obj.Reloc v := c.vregoff(&p.To) // Offsets in DS form stores must be a multiple of 4 inst := c.opstore(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { + + // Can't reuse base for store instructions. + o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false) + + // Rewrite as a prefixed store if supported. + if o.ispfx { + o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + } else if c.opform(inst) == DS_FORM && v&0x3 != 0 { log.Fatalf("invalid offset for DS form load/store %v", p) } - // Can't reuse base for store instructions. - o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false) case 75: // 32 bit offset symbol loads (got/toc/addr) + var rel *obj.Reloc v := p.From.Offset // Offsets in DS form loads must be a multiple of 4 inst := c.opload(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { - log.Fatalf("invalid offset for DS form load/store %v", p) - } switch p.From.Name { case obj.NAME_GOTREF, obj.NAME_TOCREF: if v != 0 { @@ -3508,7 +3661,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { } o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0) - rel := obj.Addrel(c.cursym) + rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym @@ -3521,7 +3674,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { default: reuseBaseReg := p.As != AFMOVD && p.As != AFMOVS // Reuse To.Reg as base register if not FP move. - o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg) + o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg) + } + + // Convert to prefixed forms if supported. + if o.ispfx { + switch rel.Type { + case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS, + objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + case objabi.R_POWER_TLS_IE: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_POWER_TLS_IE_PCREL34 + case objabi.R_ADDRPOWER_GOT: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34 + default: + // We've failed to convert a TOC-relative relocation to a PC-relative one. + log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type) + } + } else if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) } o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) @@ -3530,26 +3704,36 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { if p.From.Offset != 0 { c.ctxt.Diag("invalid offset against tls var %v", p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0) - o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym - rel.Type = objabi.R_POWER_TLS_LE + if !o.ispfx { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0) + o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel.Type = objabi.R_POWER_TLS_LE + } else { + o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0) + rel.Type = objabi.R_POWER_TLS_LE_TPREL34 + } case 80: if p.From.Offset != 0 { c.ctxt.Diag("invalid offset against tls var %v", p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) - o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0) - o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym rel.Type = objabi.R_POWER_TLS_IE + if !o.ispfx { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0) + } else { + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_POWER_TLS_IE_PCREL34 + } + o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13) rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) + 8 rel.Siz = 4 diff --git a/src/cmd/internal/obj/ppc64/doc.go b/src/cmd/internal/obj/ppc64/doc.go index 28340e425d..835182bcc6 100644 --- a/src/cmd/internal/obj/ppc64/doc.go +++ b/src/cmd/internal/obj/ppc64/doc.go @@ -250,5 +250,34 @@ Register names: CRnGT represents CR bit 1 of CR field n. (0-7) CRnEQ represents CR bit 2 of CR field n. (0-7) CRnSO represents CR bit 3 of CR field n. (0-7) + +# GOPPC64 >= power10 and its effects on Go asm + +When GOPPC64=power10 is used to compile a Go program for ppc64le/linux, MOV*, FMOV*, and ADD +opcodes which would require 2 or more machine instructions to emulate a 32 bit constant, or +symbolic reference are implemented using prefixed instructions. + +A user who wishes granular control over the generated machine code is advised to use Go asm +opcodes which explicitly translate to one PPC64 machine instruction. Most common opcodes +are supported. + +Some examples of how pseudo-op assembly changes with GOPPC64: + + Go asm GOPPC64 <= power9 GOPPC64 >= power10 + MOVD mypackage·foo(SB), R3 addis r2, r3, ... pld r3, ... + ld r3, r3, ... + + MOVD 131072(R3), R4 addis r31, r4, 2 pld r4, 131072(r3) + ld r4, 0(R3) + + ADD $131073, R3 lis r31, 2 paddi r3, r3, 131073 + addi r31, 1 + add r3,r31,r3 + + MOVD $131073, R3 lis r3, 2 pli r3, 131073 + addi r3, 1 + + MOVD $mypackage·foo(SB), R3 addis r2, r3, ... pla r3, ... + addi r3, r3, ... */ package ppc64 |
