From b24aec598b9777de6044cf4f52042d3b3acc0f35 Mon Sep 17 00:00:00 2001 From: Mark Ryan Date: Wed, 26 Feb 2025 08:57:30 +0100 Subject: doc, cmd/internal/obj/riscv: document the riscv64 assembler Add documentation for the riscv64 assembler with a link to the documentation from asm.html. Architecture specific assembler documentation is provided for the other architectures but has been missing for riscv64 until now. Change-Id: I62ed7e6a2a4b52e0720d869e964b29e2a980223a Reviewed-on: https://go-review.googlesource.com/c/go/+/652717 Reviewed-by: Joel Sing Reviewed-by: Michael Pratt Reviewed-by: Meng Zhuo Auto-Submit: Joel Sing Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- src/cmd/internal/obj/riscv/doc.go | 297 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 src/cmd/internal/obj/riscv/doc.go (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/riscv/doc.go b/src/cmd/internal/obj/riscv/doc.go new file mode 100644 index 0000000000..365bedd299 --- /dev/null +++ b/src/cmd/internal/obj/riscv/doc.go @@ -0,0 +1,297 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package riscv implements the riscv64 assembler. + +# Register naming + +The integer registers are named X0 through to X31, however X4 must be accessed +through its RISC-V ABI name, TP, and X27, which holds a pointer to the Go +routine structure, must be referred to as g. Additionally, when building in +shared mode, X3 is unavailable and must be accessed via its RISC-V ABI name, +GP. + +The floating-point registers are named F0 through to F31. + +The vector registers are named V0 through to V31. + +Both integer and floating-point registers can be referred to by their RISC-V +ABI names, e.g., A0 or FT0, with the exception that X27 cannot be referred to +by its RISC-V ABI name, S11. It must be referred to as g. + +Some of the integer registers are used by the Go runtime and assembler - X26 is +the closure pointer, X27 points to the Go routine structure and X31 is a +temporary register used by the Go assembler. Use of X31 should be avoided in +hand written assembly code as its value could be altered by the instruction +sequences emitted by the assembler. + +# Instruction naming + +Many RISC-V instructions contain one or more suffixes in their names. In the +[RISC-V ISA Manual] these suffixes are separated from themselves and the +name of the instruction mnemonic with a dot ('.'). In the Go assembler, the +separators are omitted and the suffixes are written in upper case. + +Example: + + FMVWX <=> fmv.w.x + +# Rounding modes + +The Go toolchain does not set the FCSR register and requires the desired +rounding mode to be explicitly encoded within floating-point instructions. +The syntax the Go assembler uses to specify the rounding modes differs +from the syntax in the RISC-V specifications. In the [RISC-V ISA Manual] +the rounding mode is given as an extra operand at the end of an +assembly language instruction. In the Go assembler, the rounding modes are +converted to uppercase and follow the instruction mnemonic from which they +are separated with a dot ('.'). + +Example: + + FCVTLUS.RNE F0, X5 <=> fcvt.lu.s x5, f0, rne + +RTZ is assumed if the rounding mode is omitted. + +# RISC-V extensions + +By default the Go compiler targets the [rva20u64] profile. This profile mandates +all the general RISC-V instructions, allowing Go to use integer, multiplication, +division, floating-point and atomic instructions without having to +perform compile time or runtime checks to verify that their use is appropriate +for the target hardware. All widely available riscv64 devices support at least +[rva20u64]. The Go toolchain can be instructed to target later RISC-V profiles, +including, [rva22u64] and [rva23u64], via the GORISCV64 environment variable. +Instructions that are provided by newer profiles cannot typically be used in +handwritten assembly code without compile time guards (or runtime checks) +that ensure they are hardware supported. + +The file asm_riscv64.h defines macros for each RISC-V extension that is enabled +by setting the GORISCV64 environment variable to a value other than [rva20u64]. +For example, if GORISCV64=rva22u64 the macros hasZba, hasZbb and hasZbs will be +defined. If GORISCV64=rva23u64 hasV will be defined in addition to hasZba, +hasZbb and hasZbs. These macros can be used to determine whether it's safe +to use an instruction in hand-written assembly. + +It is not always necessary to include asm_riscv64.h and use #ifdefs in your +code to safely take advantage of instructions present in the [rva22u64] +profile. In some cases the assembler can generate [rva20u64] compatible code +even when an [rva22u64] instruction is used in an assembly source file. When +GORISCV64=rva20u64 the assembler will synthesize certain [rva22u64] +instructions, e.g., ANDN, using multiple [rva20u64] instructions. Instructions +such as ANDN can then be freely used in assembly code without checking to see +whether the instruction is supported by the target profile. When building a +source file containing the ANDN instruction with GORISCV64=rva22u64 the +assembler will emit the Zbb ANDN instruction directly. When building the same +source file with GORISCV64=rva20u64 the assembler will emit multiple [rva20u64] +instructions to synthesize ANDN. + +The assembler will also use [rva22u64] instructions to implement the zero and +sign extension instructions, e.g., MOVB and MOVHU, when GORISCV64=rva22u64 or +greater. + +The instructions not implemented in the default profile ([rva20u64]) that can +be safely used in assembly code without compile time checks are: + + - ANDN + - MAX + - MAXU + - MIN + - MINU + - MOVB + - MOVH + - MOVHU + - MOVWU + - ORN + - ROL + - ROLW + - ROR + - RORI + - RORIW + - RORW + - XNOR + +# Operand ordering + +The ordering used for instruction operands in the Go assembler differs from the +ordering defined in the [RISC-V ISA Manual]. + +1. R-Type instructions + +R-Type instructions are written in the reverse order to that given in the +[RISC-V ISA Manual], with the register order being rs2, rs1, rd. + +Examples: + + ADD X10, X11, X12 <=> add x12, x11, x10 + FADDD F10, F11, F12 <=> fadd.d f12, f11, f10 + +2. I-Type arithmetic instructions + +I-Type arithmetic instructions (not loads, fences, ebreak, ecall) use the same +ordering as the R-Type instructions, typically, imm12, rs1, rd. + +Examples: + + ADDI $1, X11, X12 <=> add x12, x11, 1 + SLTI $1, X11, X12 <=> slti x12, x11, 1 + +3. Loads and Stores + +Load instructions are written with the source operand (whether it be a register +or a memory address), first followed by the destination operand. + +Examples: + + MOV 16(X2), X10 <=> ld x10, 16(x2) + MOV X10, (X2) <=> sd x10, 0(x2) + +4. Branch instructions + +The branch instructions use the same operand ordering as is given in the +[RISC-V ISA Manual], e.g., rs1, rs2, label. + +Example: + + BLT X12, X23, loop1 <=> blt x12, x23, loop1 + +BLT X12, X23, label will jump to label if X12 < X23. Note this is not the +same ordering as is used for the SLT instructions. + +5. FMA instructions + +The Go assembler uses a different ordering for the RISC-V FMA operands to +the ordering given in the [RISC-V ISA Manual]. The operands are rotated one +place to the left, so that the destination operand comes last. + +Example: + + FMADDS F1, F2, F3, F4 <=> fmadd.s f4, f1, f2, f3 + +6. AMO instructions + +The ordering used for the AMO operations is rs2, rs1, rd, i.e., the operands +as specified in the [RISC-V ISA Manual] are rotated one place to the left. + +Example: + + AMOSWAPW X5, (X6), X7 <=> amoswap.w x7, x5, (x6) + +7. Vector instructions + +The VSETVLI instruction uses the same symbolic names as the [RISC-V ISA Manual] +to represent the components of vtype, with the exception +that they are written in upper case. The ordering of the operands in the Go +assembler differs from the [RISC-V ISA Manual] in that the operands are +rotated one place to the left so that the destination register, the register +that holds the new vl, is the last operand. + +Example: + + VSETVLI X10, E8, M1, TU, MU, X12 <=> vsetvli x12, x10, e8, m1, tu, mu + +Vector load and store instructions follow the pattern set by scalar loads and +stores, i.e., the source is always the first operand and the destination the +last. However, the ordering of the operands of these instructions is +complicated by the optional mask register and, in some cases, the use of an +additional stride or index register. In the Go assembler the index and stride +registers appear as the second operand in indexed or strided loads and stores, +while the mask register, if present, is always the penultimate operand. + +Examples: + + VLE8V (X10), V3 <=> vle8.v v3, (x10) + VSE8V V3, (X10) <=> vse8.v v3, (x10) + VLE8V (X10), V0, V3 <=> vle8.v v3, (x10), v0.t + VSE8V V3, V0, (X10) <=> vse8.v v3, (x10), v0.t + VLSE8V (X10), X11, V3 <=> vlse8.v v3, (x10), x11 + VSSE8V V3, X11, (X10) <=> vsse8.v v3, (x10), x11 + VLSE8V (X10), X11, V0, V3 <=> vlse8.v v3, (x10), x11, v0.t + VSSE8V V3, X11, V0, (X10) <=> vsse8.v v3, (x10), x11, v0.t + VLUXEI8V (X10), V2, V3 <=> vluxei8.v v3, (x10), v2 + VSUXEI8V V3, V2, (X10) <=> vsuxei8.v v3, (x10), v2 + VLUXEI8V (X10), V2, V0, V3 <=> vluxei8.v v3, (x10), v2, v0.t + VSUXEI8V V3, V2, V0, (X10) <=> vsuxei8.v v3, (x10), v2, v0.t + VL1RE8V (X10), V3 <=> vl1re8.v v3, (x10) + VS1RV V3, (X11) <=> vs1r.v v3, (x11) + +The ordering of operands for two and three argument vector arithmetic instructions is +reversed in the Go assembler. + +Examples: + + VMVVV V2, V3 <=> vmv.v.v v3, v2 + VADDVV V1, V2, V3 <=> vadd.vv v3, v2, v1 + VADDVX X10, V2, V3 <=> vadd.vx v3, v2, x10 + VMADCVI $15, V2, V3 <=> vmadc.vi v3, v2, 15 + +The mask register, when specified, is always the penultimate operand in a vector +arithmetic instruction, appearing before the destination register. + +Examples: + + VANDVV V1, V2, V0, V3 <=> vand.vv v3, v2, v1, v0.t + +# Ternary instructions + +The Go assembler allows the second operand to be omitted from most ternary +instructions if it matches the third (destination) operand. + +Examples: + + ADD X10, X12, X12 <=> ADD X10, X12 + ANDI $3, X12, X12 <=> ANDI $3, X12 + +The use of this abbreviated syntax is encouraged. + +# Ordering of atomic instructions + +It is not possible to specify the ordering bits in the FENCE, LR, SC or AMO +instructions. The FENCE instruction is always emitted as a full fence, the +acquire and release bits are always set for the AMO instructions, the acquire +bit is always set for the LR instructions while the release bit is set for +the SC instructions. + +# Immediate operands + +In many cases, where an R-Type instruction has a corresponding I-Type +instruction, the R-Type mnemonic can be used in place of the I-Type mnemonic. +The assembler assumes that the immediate form of the instruction was intended +when the first operand is given as an immediate value rather than a register. + +Example: + + AND $3, X12, X13 <=> ANDI $3, X12, X13 + +# Integer constant materialization + +The MOV instruction can be used to set a register to the value of any 64 bit +constant literal. The way this is achieved by the assembler varies depending +on the value of the constant. Where possible the assembler will synthesize the +constant using one or more RISC-V arithmetic instructions. If it is unable +to easily materialize the constant it will load the 64 bit literal from memory. + +A 32 bit constant literal can be specified as an argument to ADDI, ANDI, ORI and +XORI. If the specified literal does not fit into 12 bits the assembler will +generate extra instructions to synthesize it. + +Integer constants provided as operands to all other instructions must fit into +the number of bits allowed by the instructions' encodings for immediate values. +Otherwise, an error will be generated. + +# Floating point constant materialization + +The MOVF and MOVD instructions can be used to set a register to the value +of any 32 bit or 64 bit floating point constant literal, respectively. Unless +the constant literal is 0.0, MOVF and MOVD will be encoded as FLW and FLD +instructions that load the constant from a location within the program's +binary. + +[RISC-V ISA Manual]: https://github.com/riscv/riscv-isa-manual +[rva20u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#51-rva20u64-profile +[rva22u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22u64-profile +[rva23u64]: https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile +*/ +package riscv -- cgit v1.3 From b9ef0633f6117c74fabcd7247a76b4feb86df086 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sun, 27 Aug 2023 19:35:33 +1000 Subject: cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 Enable the use of compressed instructions on riscv64 by reducing the PC quantum to two bytes and reducing the minimum instruction length to two bytes. Change gostartcall on riscv64 to land at two times the PC quantum into goexit, so that we retain four byte alignment and revise the NOP instructions in goexit to ensure that they are never compressed. Additionally, adjust PCALIGN so that it correctly handles two byte offsets. Fixes #47560 Updates #71105 Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 Change-Id: I4329a8fbfcb4de636aadaeadabb826bc22698640 Reviewed-on: https://go-review.googlesource.com/c/go/+/523477 Reviewed-by: Junyang Shao Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Mark Ryan --- src/cmd/internal/obj/riscv/obj.go | 11 +++++++++-- src/cmd/internal/sys/arch.go | 2 +- src/internal/goarch/goarch_riscv64.go | 2 +- src/runtime/asm_riscv64.s | 10 +++++----- src/runtime/sys_riscv64.go | 11 +++++++++-- 5 files changed, 25 insertions(+), 11 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 3deab34d31..8b9be5d78b 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -4799,10 +4799,17 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { v := pcAlignPadLength(p.Pc, alignedValue) offset := p.Pc for ; v >= 4; v -= 4 { - // NOP - cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0}) + // NOP (ADDI $0, X0, X0) + cursym.WriteBytes(ctxt, offset, []byte{0x13, 0x00, 0x00, 0x00}) offset += 4 } + if v == 2 { + // CNOP + cursym.WriteBytes(ctxt, offset, []byte{0x01, 0x00}) + offset += 2 + } else if v != 0 { + ctxt.Diag("bad PCALIGN pad length") + } continue } diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go index 3c92a6bbf2..14b1cde22b 100644 --- a/src/cmd/internal/sys/arch.go +++ b/src/cmd/internal/sys/arch.go @@ -236,7 +236,7 @@ var ArchRISCV64 = &Arch{ ByteOrder: binary.LittleEndian, PtrSize: 8, RegSize: 8, - MinLC: 4, + MinLC: 2, Alignment: 8, // riscv unaligned loads work, but are really slow (trap + simulated by OS) CanMergeLoads: false, HasLR: true, diff --git a/src/internal/goarch/goarch_riscv64.go b/src/internal/goarch/goarch_riscv64.go index 3b6da1e02f..468f9a6374 100644 --- a/src/internal/goarch/goarch_riscv64.go +++ b/src/internal/goarch/goarch_riscv64.go @@ -7,7 +7,7 @@ package goarch const ( _ArchFamily = RISCV64 _DefaultPhysPageSize = 4096 - _PCQuantum = 4 + _PCQuantum = 2 _MinFrameSize = 8 _StackAlign = PtrSize ) diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index 5bd16181ee..428701a503 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -623,14 +623,14 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$8 RET // func goexit(neverCallThisFunction) -// The top-most function running on a goroutine -// returns to goexit+PCQuantum. +// The top-most function running on a goroutine, returns to goexit+PCQuantum*2. +// Note that the NOPs are written in a manner that will not be compressed, +// since the offset must be known by the runtime. TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 - MOV ZERO, ZERO // NOP + WORD $0x00000013 // NOP JMP runtime·goexit1(SB) // does not return // traceback from goexit1 must hit code range of goexit - MOV ZERO, ZERO // NOP - + WORD $0x00000013 // NOP // This is called from .init_array and follows the platform, not the Go ABI. TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 diff --git a/src/runtime/sys_riscv64.go b/src/runtime/sys_riscv64.go index e710840819..65dc684c33 100644 --- a/src/runtime/sys_riscv64.go +++ b/src/runtime/sys_riscv64.go @@ -4,7 +4,12 @@ package runtime -import "unsafe" +import ( + "unsafe" + + "internal/abi" + "internal/runtime/sys" +) // adjust Gobuf as if it executed a call to fn with context ctxt // and then did an immediate Gosave. @@ -12,7 +17,9 @@ func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) { if buf.lr != 0 { throw("invalid use of gostartcall") } - buf.lr = buf.pc + // Use double the PC quantum on riscv64, so that we retain + // four byte alignment and use non-compressed instructions. + buf.lr = abi.FuncPCABI0(goexit) + sys.PCQuantum*2 buf.pc = uintptr(fn) buf.ctxt = ctxt } -- cgit v1.3 From 9859b436430aac382b337964a1b380bc4bfcda70 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Fri, 26 Sep 2025 05:05:49 +1000 Subject: cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 Make use of compressed instructions on riscv64 - add a compress pass to the end of the assembler, which replaces non-compressed instructions with compressed alternatives if possible. Provide a `compressinstructions` compiler and assembler debug flag, such that the compression pass can be disabled via `-asmflags=all=-d=compressinstructions=0` and `-gcflags=all=-d=compressinstructions=0`. Note that this does not prevent the explicit use of compressed instructions via assembly. Note that this does not make use of compressed control transfer instructions - this will be implemented in later changes. Reduces the text size of a hello world binary by ~121KB and reduces the text size of the go binary on riscv64 by ~1.21MB (between 8-10% in both cases). Updates #71105 Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 Change-Id: I24258353688554042c2a836deed4830cc673e985 Reviewed-on: https://go-review.googlesource.com/c/go/+/523478 Reviewed-by: Mark Ryan Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/asm/internal/flags/flags.go | 7 +- src/cmd/asm/main.go | 1 + src/cmd/compile/internal/base/debug.go | 1 + src/cmd/compile/internal/base/flag.go | 2 + src/cmd/internal/obj/link.go | 61 +++++------ src/cmd/internal/obj/riscv/asm_test.go | 16 +-- src/cmd/internal/obj/riscv/cpu.go | 3 + src/cmd/internal/obj/riscv/obj.go | 178 +++++++++++++++++++++++++++++++-- src/cmd/link/internal/ld/ld_test.go | 4 +- 9 files changed, 225 insertions(+), 48 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index e15a062749..19aa65630f 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -29,8 +29,9 @@ var ( ) var DebugFlags struct { - MayMoreStack string `help:"call named function before all stack growth checks"` - PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"` + CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"` + MayMoreStack string `help:"call named function before all stack growth checks"` + PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"` } var ( @@ -47,6 +48,8 @@ func init() { flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help") objabi.AddVersionFlag() // -V objabi.Flagcount("S", "print assembly and machine code", &PrintOut) + + DebugFlags.CompressInstructions = 1 } // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2. diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index f2697db516..25cf307140 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -40,6 +40,7 @@ func main() { log.Fatalf("unrecognized architecture %s", GOARCH) } ctxt := obj.Linknew(architecture.LinkArch) + ctxt.CompressInstructions = flags.DebugFlags.CompressInstructions != 0 ctxt.Debugasm = flags.PrintOut ctxt.Debugvlog = flags.DebugV ctxt.Flag_dynlink = *flags.Dynlink diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index 9e8ab2f488..b532bf435e 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -20,6 +20,7 @@ type DebugFlags struct { Append int `help:"print information about append compilation"` Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"` Closure int `help:"print information about closure compilation"` + CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"` Converthash string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"` Defer int `help:"print information about defer compilation"` DisableNil int `help:"disable nil checks" concurrent:"ok"` diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go index 1d211e0a2d..63cae41524 100644 --- a/src/cmd/compile/internal/base/flag.go +++ b/src/cmd/compile/internal/base/flag.go @@ -177,6 +177,7 @@ func ParseFlags() { Flag.WB = true Debug.ConcurrentOk = true + Debug.CompressInstructions = 1 Debug.MaxShapeLen = 500 Debug.AlignHot = 1 Debug.InlFuncsWithClosures = 1 @@ -299,6 +300,7 @@ func ParseFlags() { } parseSpectre(Flag.Spectre) // left as string for RecordFlags + Ctxt.CompressInstructions = Debug.CompressInstructions != 0 Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared Ctxt.Flag_optimize = Flag.N == 0 Ctxt.Debugasm = int(Flag.S) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 85dca33d27..c70c1d9438 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -1153,36 +1153,37 @@ type Func interface { // Link holds the context for writing object code from a compiler // to be linker input or for reading that input into the linker. type Link struct { - Headtype objabi.HeadType - Arch *LinkArch - Debugasm int - Debugvlog bool - Debugpcln string - Flag_shared bool - Flag_dynlink bool - Flag_linkshared bool - Flag_optimize bool - Flag_locationlists bool - Flag_noRefName bool // do not include referenced symbol names in object file - Retpoline bool // emit use of retpoline stubs for indirect jmp/call - Flag_maymorestack string // If not "", call this function before stack checks - Bso *bufio.Writer - Pathname string - Pkgpath string // the current package's import path - hashmu sync.Mutex // protects hash, funchash - hash map[string]*LSym // name -> sym mapping - funchash map[string]*LSym // name -> sym mapping for ABIInternal syms - statichash map[string]*LSym // name -> sym mapping for static syms - PosTable src.PosTable - InlTree InlTree // global inlining tree used by gc/inl.go - DwFixups *DwarfFixupTable - DwTextCount int - Imports []goobj.ImportedPkg - DiagFunc func(string, ...any) - DiagFlush func() - DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) - GenAbstractFunc func(fn *LSym) - Errors int + Headtype objabi.HeadType + Arch *LinkArch + CompressInstructions bool // use compressed instructions where possible (if supported by architecture) + Debugasm int + Debugvlog bool + Debugpcln string + Flag_shared bool + Flag_dynlink bool + Flag_linkshared bool + Flag_optimize bool + Flag_locationlists bool + Flag_noRefName bool // do not include referenced symbol names in object file + Retpoline bool // emit use of retpoline stubs for indirect jmp/call + Flag_maymorestack string // If not "", call this function before stack checks + Bso *bufio.Writer + Pathname string + Pkgpath string // the current package's import path + hashmu sync.Mutex // protects hash, funchash + hash map[string]*LSym // name -> sym mapping + funchash map[string]*LSym // name -> sym mapping for ABIInternal syms + statichash map[string]*LSym // name -> sym mapping for static syms + PosTable src.PosTable + InlTree InlTree // global inlining tree used by gc/inl.go + DwFixups *DwarfFixupTable + DwTextCount int + Imports []goobj.ImportedPkg + DiagFunc func(string, ...any) + DiagFlush func() + DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) + GenAbstractFunc func(fn *LSym) + Errors int InParallel bool // parallel backend phase in effect UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go index f40e57fa64..5b50d1533a 100644 --- a/src/cmd/internal/obj/riscv/asm_test.go +++ b/src/cmd/internal/obj/riscv/asm_test.go @@ -11,8 +11,8 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" - "strings" "testing" ) @@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") fmt.Fprintln(buf, "BEQ X0, X0, label") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "label:") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } // TestLargeCall generates a large function (>1MB of text) with a call to @@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0") fmt.Fprintln(buf, "CALL ·y(SB)") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "RET") fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") fmt.Fprintln(buf, "RET") } @@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0 // FENCE // NOP // FENCE - // RET - want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00" - if !strings.Contains(string(out), want) { + // RET (CJALR or JALR) + want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ") + if !want.Match(out) { t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want) } } diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index 60174a0b3a..a91395dd38 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -326,6 +326,9 @@ const ( NEED_GOT_PCREL_ITYPE_RELOC ) +const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC | + NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC + // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files // at https://github.com/riscv/riscv-opcodes. // diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 8b9be5d78b..043be17c07 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool { // setPCs sets the Pc field in all instructions reachable from p. // It uses pc as the initial value and returns the next available pc. -func setPCs(p *obj.Prog, pc int64) int64 { +func setPCs(p *obj.Prog, pc int64, compress bool) int64 { for ; p != nil; p = p.Link { p.Pc = pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, compress) { pc += int64(ins.length()) } @@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { big, rescan := false, false - maxPC := setPCs(cursym.Func().Text, 0) + maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions) if maxPC+maxTrampSize > (1 << 20) { big = true } @@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Validate all instructions - this provides nice error messages. for p := cursym.Func().Text; p != nil; p = p.Link { - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { ins.validate(ctxt) } } @@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { } } +func isScaledImmI(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, true) == nil && imm%scale == 0 +} + +func isScaledImmU(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, false) == nil && imm%scale == 0 +} + func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) { if err := immFits(imm, nbits, signed); err != nil { ctxt.Diag("%v: %v", ins, err) @@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31) } +func isIntPrimeReg(r uint32) bool { + return r >= REG_X8 && r <= REG_X15 +} + // wantIntPrimeReg checks that r is an integer register that can be used // in a prime register field of a compressed instruction. func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31) } +func isFloatPrimeReg(r uint32) bool { + return r >= REG_F8 && r <= REG_F15 +} + // wantFloatPrimeReg checks that r is an floating-point register that can // be used in a prime register field of a compressed instruction. func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool { return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP } +func (ins *instruction) compress() { + switch ins.as { + case ALW: + if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as = ACLW + } + + case ALD: + if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACLD + } + + case AFLD: + if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1 + } else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACFLD + } + + case ASW: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1 + } + + case ASD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AFSD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AADDI: + if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) { + ins.as = ACADDI16SP + } else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDI + } else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) { + ins.as = ACADDI4SPN + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil { + ins.as, ins.rs1 = ACLI, obj.REG_NONE + } else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 { + ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1 + } else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 { + ins.as, ins.rs1 = ACNOP, ins.rd + } + + case AADDIW: + if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDIW + } + + case ALUI: + if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACLUI + } + + case ASLLI: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSLLI + } + + case ASRLI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRLI + } + + case ASRAI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRAI + } + + case AANDI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACANDI + } + + case AADD: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 { + ins.as = ACADD + } else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 { + ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1 + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 { + ins.as = ACMV + } + + case AADDW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACADDW + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1 + } + + case ASUB: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUB + } + + case ASUBW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUBW + } + + case AAND: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACAND + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1 + } + + case AOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1 + } + + case AXOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACXOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1 + } + + case AEBREAK: + ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE + } +} + // instructionForProg returns the default *obj.Prog to instruction mapping. func instructionForProg(p *obj.Prog) *instruction { ins := &instruction{ @@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction { } // instructionsForProg returns the machine instructions for an *obj.Prog. -func instructionsForProg(p *obj.Prog) []*instruction { +func instructionsForProg(p *obj.Prog, compress bool) []*instruction { ins := instructionForProg(p) inss := []*instruction{ins} @@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction { ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0 } + // Only compress instructions when there is no relocation, since + // relocation relies on knowledge about the exact instructions that + // are in use. + if compress && p.Mark&NEED_RELOC == 0 { + for _, ins := range inss { + ins.compress() + } + } + for _, ins := range inss { ins.p = p } @@ -4814,7 +4980,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } offset := p.Pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { if ic, err := ins.encode(); err == nil { cursym.WriteInt(ctxt, offset, ins.length(), int64(ic)) offset += int64(ins.length()) diff --git a/src/cmd/link/internal/ld/ld_test.go b/src/cmd/link/internal/ld/ld_test.go index 9a27ac8c76..64b86f3a0b 100644 --- a/src/cmd/link/internal/ld/ld_test.go +++ b/src/cmd/link/internal/ld/ld_test.go @@ -387,7 +387,7 @@ func TestRISCVTrampolines(t *testing.T) { buf := new(bytes.Buffer) fmt.Fprintf(buf, "TEXT a(SB),$0-0\n") for i := 0; i < 1<<17; i++ { - fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + fmt.Fprintf(buf, "\tADD $0, X5, X0\n") } fmt.Fprintf(buf, "\tCALL b(SB)\n") fmt.Fprintf(buf, "\tRET\n") @@ -398,7 +398,7 @@ func TestRISCVTrampolines(t *testing.T) { fmt.Fprintf(buf, "\tRET\n") fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n") for i := 0; i < 1<<17; i++ { - fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + fmt.Fprintf(buf, "\tADD $0, X5, X0\n") } fmt.Fprintf(buf, "\tCALL a(SB)\n") fmt.Fprintf(buf, "\tCALL c(SB)\n") -- cgit v1.3 From 7f2ae21fb481e527086aafee6da3dafdca444f7a Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Tue, 18 Nov 2025 10:55:19 +0800 Subject: cmd/internal/obj/loong64: add MULW.D.W[U] instructions Go asm syntax: MULWVW RK, RJ, RD MULWVWU RK, RJ, RD Equivalent platform assembler syntax: mulw.d.w rd, rj, rk mulw.d.wu rd, rj, rk Change-Id: Ie46a21904a4c25d04200b0663f83072c38a76c6f Reviewed-on: https://go-review.googlesource.com/c/go/+/721521 LUCI-TryBot-Result: Go LUCI Reviewed-by: abner chenc Reviewed-by: Meidan Li Reviewed-by: Mark Freeman Reviewed-by: Keith Randall --- src/cmd/asm/internal/asm/testdata/loong64enc1.s | 6 ++++++ src/cmd/internal/obj/loong64/a.out.go | 4 ++++ src/cmd/internal/obj/loong64/anames.go | 2 ++ src/cmd/internal/obj/loong64/asm.go | 6 ++++++ 4 files changed, 18 insertions(+) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index c820a0a5a1..277396bf27 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -212,6 +212,12 @@ lable2: SRLV $32, R4, R5 // 85804500 SRLV $32, R4 // 84804500 + // MULW.D.W[U] instructions + MULWVW R4, R5 // a5101f00 + MULWVW R4, R5, R6 // a6101f00 + MULWVWU R4, R5 // a5901f00 + MULWVWU R4, R5, R6 // a6901f00 + MASKEQZ R4, R5, R6 // a6101300 MASKNEZ R4, R5, R6 // a6901300 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 73f145df14..5b8bffc9f1 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -589,6 +589,10 @@ const ( AORN AANDN + // 2.2.1.12 + AMULWVW + AMULWVWU + // 2.2.7. Atomic Memory Access Instructions AAMSWAPB AAMSWAPH diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index ab85c52a21..1749b43bf6 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -131,6 +131,8 @@ var Anames = []string{ "ALSLV", "ORN", "ANDN", + "MULWVW", + "MULWVWU", "AMSWAPB", "AMSWAPH", "AMSWAPW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 38b075d77e..b35e49a1b6 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1503,6 +1503,8 @@ func buildop(ctxt *obj.Link) { opset(AREMU, r0) opset(ADIV, r0) opset(ADIVU, r0) + opset(AMULWVW, r0) + opset(AMULWVWU, r0) case AMULV: opset(AMULVU, r0) @@ -3230,6 +3232,10 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0x3c << 15 // mulh.d case AMULHVU: return 0x3d << 15 // mulhu.d + case AMULWVW: + return 0x3e << 15 // mulw.d.w + case AMULWVWU: + return 0x3f << 15 // mulw.d.wu case ADIV: return 0x40 << 15 // div.w case ADIVU: -- cgit v1.3 From 4b740af56a864eeaf3504d7f873c3d3cb3fcd72e Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Wed, 19 Nov 2025 17:32:12 -0500 Subject: cmd/internal/obj/x86: handle global reference in From3 in dynlink mode In dynlink mode, we rewrite reference to a global variable to a load from the GOT. Currently this code does not handle the case that the global reference is in From3 of a Prog. Most instructions don't expect a memory operand in From3, but some do, like VGF2P8AFFINEQB. Handle this case. Change-Id: Ibb6773606e6967bcc629d9ef5dac6e050f4008ef Reviewed-on: https://go-review.googlesource.com/c/go/+/722181 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- .../asm/internal/asm/testdata/amd64dynlinkerror.s | 5 +++++ src/cmd/internal/obj/x86/obj6.go | 25 +++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s b/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s index 4bf58a39a4..8b104307cd 100644 --- a/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s +++ b/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s @@ -169,3 +169,8 @@ TEXT ·a34(SB), 0, $0-0 SHLXQ AX, CX, R15 ADDQ $1, R15 RET + +// Ensure from3 get GOT-rewritten without errors. +TEXT ·a35(SB), 0, $0-0 + VGF2P8AFFINEQB $0, runtime·writeBarrier(SB), Z1, Z1 + RET diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 48287546b3..b2023ee48f 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -423,8 +423,12 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { q.From.Reg = reg } } - if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { - ctxt.Diag("don't know how to handle %v with -dynlink", p) + from3 := p.GetFrom3() + for i := range p.RestArgs { + a := &p.RestArgs[i].Addr + if a != from3 && a.Name == obj.NAME_EXTERN && !a.Sym.Local() { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } } var source *obj.Addr // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry @@ -434,9 +438,17 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) } + if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p) + } source = &p.From } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p) + } source = &p.To + } else if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + source = from3 } else { return } @@ -501,9 +513,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p2.As = p.As p2.From = p.From p2.To = p.To - if from3 := p.GetFrom3(); from3 != nil { - p2.AddRestSource(*from3) - } + p2.RestArgs = p.RestArgs if p.From.Name == obj.NAME_EXTERN { p2.From.Reg = reg p2.From.Name = obj.NAME_NONE @@ -512,6 +522,11 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p2.To.Reg = reg p2.To.Name = obj.NAME_NONE p2.To.Sym = nil + } else if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + from3 = p2.GetFrom3() + from3.Reg = reg + from3.Name = obj.NAME_NONE + from3.Sym = nil } else { return } -- cgit v1.3