aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
committerCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
commite3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/cmd/internal/obj
parent95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parentca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
downloadgo-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz
[dev.simd] all: merge master (ca37d24) into dev.simd
Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/link.go61
-rw-r--r--src/cmd/internal/obj/loong64/a.out.go4
-rw-r--r--src/cmd/internal/obj/loong64/anames.go2
-rw-r--r--src/cmd/internal/obj/loong64/asm.go6
-rw-r--r--src/cmd/internal/obj/riscv/asm_test.go16
-rw-r--r--src/cmd/internal/obj/riscv/cpu.go3
-rw-r--r--src/cmd/internal/obj/riscv/doc.go297
-rw-r--r--src/cmd/internal/obj/riscv/obj.go189
-rw-r--r--src/cmd/internal/obj/x86/obj6.go25
9 files changed, 552 insertions, 51 deletions
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index 85dca33d27..c70c1d9438 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -1153,36 +1153,37 @@ type Func interface {
// Link holds the context for writing object code from a compiler
// to be linker input or for reading that input into the linker.
type Link struct {
- Headtype objabi.HeadType
- Arch *LinkArch
- Debugasm int
- Debugvlog bool
- Debugpcln string
- Flag_shared bool
- Flag_dynlink bool
- Flag_linkshared bool
- Flag_optimize bool
- Flag_locationlists bool
- Flag_noRefName bool // do not include referenced symbol names in object file
- Retpoline bool // emit use of retpoline stubs for indirect jmp/call
- Flag_maymorestack string // If not "", call this function before stack checks
- Bso *bufio.Writer
- Pathname string
- Pkgpath string // the current package's import path
- hashmu sync.Mutex // protects hash, funchash
- hash map[string]*LSym // name -> sym mapping
- funchash map[string]*LSym // name -> sym mapping for ABIInternal syms
- statichash map[string]*LSym // name -> sym mapping for static syms
- PosTable src.PosTable
- InlTree InlTree // global inlining tree used by gc/inl.go
- DwFixups *DwarfFixupTable
- DwTextCount int
- Imports []goobj.ImportedPkg
- DiagFunc func(string, ...any)
- DiagFlush func()
- DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls)
- GenAbstractFunc func(fn *LSym)
- Errors int
+ Headtype objabi.HeadType
+ Arch *LinkArch
+ CompressInstructions bool // use compressed instructions where possible (if supported by architecture)
+ Debugasm int
+ Debugvlog bool
+ Debugpcln string
+ Flag_shared bool
+ Flag_dynlink bool
+ Flag_linkshared bool
+ Flag_optimize bool
+ Flag_locationlists bool
+ Flag_noRefName bool // do not include referenced symbol names in object file
+ Retpoline bool // emit use of retpoline stubs for indirect jmp/call
+ Flag_maymorestack string // If not "", call this function before stack checks
+ Bso *bufio.Writer
+ Pathname string
+ Pkgpath string // the current package's import path
+ hashmu sync.Mutex // protects hash, funchash
+ hash map[string]*LSym // name -> sym mapping
+ funchash map[string]*LSym // name -> sym mapping for ABIInternal syms
+ statichash map[string]*LSym // name -> sym mapping for static syms
+ PosTable src.PosTable
+ InlTree InlTree // global inlining tree used by gc/inl.go
+ DwFixups *DwarfFixupTable
+ DwTextCount int
+ Imports []goobj.ImportedPkg
+ DiagFunc func(string, ...any)
+ DiagFlush func()
+ DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls)
+ GenAbstractFunc func(fn *LSym)
+ Errors int
InParallel bool // parallel backend phase in effect
UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges
diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go
index 73f145df14..5b8bffc9f1 100644
--- a/src/cmd/internal/obj/loong64/a.out.go
+++ b/src/cmd/internal/obj/loong64/a.out.go
@@ -589,6 +589,10 @@ const (
AORN
AANDN
+ // 2.2.1.12
+ AMULWVW
+ AMULWVWU
+
// 2.2.7. Atomic Memory Access Instructions
AAMSWAPB
AAMSWAPH
diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go
index ab85c52a21..1749b43bf6 100644
--- a/src/cmd/internal/obj/loong64/anames.go
+++ b/src/cmd/internal/obj/loong64/anames.go
@@ -131,6 +131,8 @@ var Anames = []string{
"ALSLV",
"ORN",
"ANDN",
+ "MULWVW",
+ "MULWVWU",
"AMSWAPB",
"AMSWAPH",
"AMSWAPW",
diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go
index 38b075d77e..b35e49a1b6 100644
--- a/src/cmd/internal/obj/loong64/asm.go
+++ b/src/cmd/internal/obj/loong64/asm.go
@@ -1503,6 +1503,8 @@ func buildop(ctxt *obj.Link) {
opset(AREMU, r0)
opset(ADIV, r0)
opset(ADIVU, r0)
+ opset(AMULWVW, r0)
+ opset(AMULWVWU, r0)
case AMULV:
opset(AMULVU, r0)
@@ -3230,6 +3232,10 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return 0x3c << 15 // mulh.d
case AMULHVU:
return 0x3d << 15 // mulhu.d
+ case AMULWVW:
+ return 0x3e << 15 // mulw.d.w
+ case AMULWVWU:
+ return 0x3f << 15 // mulw.d.wu
case ADIV:
return 0x40 << 15 // div.w
case ADIVU:
diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go
index f40e57fa64..5b50d1533a 100644
--- a/src/cmd/internal/obj/riscv/asm_test.go
+++ b/src/cmd/internal/obj/riscv/asm_test.go
@@ -11,8 +11,8 @@ import (
"os"
"os/exec"
"path/filepath"
+ "regexp"
"runtime"
- "strings"
"testing"
)
@@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) {
fmt.Fprintln(buf, "TEXT f(SB),0,$0-0")
fmt.Fprintln(buf, "BEQ X0, X0, label")
for i := 0; i < 1<<19; i++ {
- fmt.Fprintln(buf, "ADD $0, X0, X0")
+ fmt.Fprintln(buf, "ADD $0, X5, X0")
}
fmt.Fprintln(buf, "label:")
- fmt.Fprintln(buf, "ADD $0, X0, X0")
+ fmt.Fprintln(buf, "ADD $0, X5, X0")
}
// TestLargeCall generates a large function (>1MB of text) with a call to
@@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) {
fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0")
fmt.Fprintln(buf, "CALL ·y(SB)")
for i := 0; i < 1<<19; i++ {
- fmt.Fprintln(buf, "ADD $0, X0, X0")
+ fmt.Fprintln(buf, "ADD $0, X5, X0")
}
fmt.Fprintln(buf, "RET")
fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0")
- fmt.Fprintln(buf, "ADD $0, X0, X0")
+ fmt.Fprintln(buf, "ADD $0, X5, X0")
fmt.Fprintln(buf, "RET")
}
@@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0
// FENCE
// NOP
// FENCE
- // RET
- want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00"
- if !strings.Contains(string(out), want) {
+ // RET (CJALR or JALR)
+ want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ")
+ if !want.Match(out) {
t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want)
}
}
diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go
index 60174a0b3a..a91395dd38 100644
--- a/src/cmd/internal/obj/riscv/cpu.go
+++ b/src/cmd/internal/obj/riscv/cpu.go
@@ -326,6 +326,9 @@ const (
NEED_GOT_PCREL_ITYPE_RELOC
)
+const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC |
+ NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC
+
// RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files
// at https://github.com/riscv/riscv-opcodes.
//
diff --git a/src/cmd/internal/obj/riscv/doc.go b/src/cmd/internal/obj/riscv/doc.go
new file mode 100644
index 0000000000..365bedd299
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/doc.go
@@ -0,0 +1,297 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package riscv implements the riscv64 assembler.
+
+# Register naming
+
+The integer registers are named X0 through to X31, however X4 must be accessed
+through its RISC-V ABI name, TP, and X27, which holds a pointer to the Go
+routine structure, must be referred to as g. Additionally, when building in
+shared mode, X3 is unavailable and must be accessed via its RISC-V ABI name,
+GP.
+
+The floating-point registers are named F0 through to F31.
+
+The vector registers are named V0 through to V31.
+
+Both integer and floating-point registers can be referred to by their RISC-V
+ABI names, e.g., A0 or FT0, with the exception that X27 cannot be referred to
+by its RISC-V ABI name, S11. It must be referred to as g.
+
+Some of the integer registers are used by the Go runtime and assembler - X26 is
+the closure pointer, X27 points to the Go routine structure and X31 is a
+temporary register used by the Go assembler. Use of X31 should be avoided in
+hand written assembly code as its value could be altered by the instruction
+sequences emitted by the assembler.
+
+# Instruction naming
+
+Many RISC-V instructions contain one or more suffixes in their names. In the
+[RISC-V ISA Manual] these suffixes are separated from themselves and the
+name of the instruction mnemonic with a dot ('.'). In the Go assembler, the
+separators are omitted and the suffixes are written in upper case.
+
+Example:
+
+ FMVWX <=> fmv.w.x
+
+# Rounding modes
+
+The Go toolchain does not set the FCSR register and requires the desired
+rounding mode to be explicitly encoded within floating-point instructions.
+The syntax the Go assembler uses to specify the rounding modes differs
+from the syntax in the RISC-V specifications. In the [RISC-V ISA Manual]
+the rounding mode is given as an extra operand at the end of an
+assembly language instruction. In the Go assembler, the rounding modes are
+converted to uppercase and follow the instruction mnemonic from which they
+are separated with a dot ('.').
+
+Example:
+
+ FCVTLUS.RNE F0, X5 <=> fcvt.lu.s x5, f0, rne
+
+RTZ is assumed if the rounding mode is omitted.
+
+# RISC-V extensions
+
+By default the Go compiler targets the [rva20u64] profile. This profile mandates
+all the general RISC-V instructions, allowing Go to use integer, multiplication,
+division, floating-point and atomic instructions without having to
+perform compile time or runtime checks to verify that their use is appropriate
+for the target hardware. All widely available riscv64 devices support at least
+[rva20u64]. The Go toolchain can be instructed to target later RISC-V profiles,
+including, [rva22u64] and [rva23u64], via the GORISCV64 environment variable.
+Instructions that are provided by newer profiles cannot typically be used in
+handwritten assembly code without compile time guards (or runtime checks)
+that ensure they are hardware supported.
+
+The file asm_riscv64.h defines macros for each RISC-V extension that is enabled
+by setting the GORISCV64 environment variable to a value other than [rva20u64].
+For example, if GORISCV64=rva22u64 the macros hasZba, hasZbb and hasZbs will be
+defined. If GORISCV64=rva23u64 hasV will be defined in addition to hasZba,
+hasZbb and hasZbs. These macros can be used to determine whether it's safe
+to use an instruction in hand-written assembly.
+
+It is not always necessary to include asm_riscv64.h and use #ifdefs in your
+code to safely take advantage of instructions present in the [rva22u64]
+profile. In some cases the assembler can generate [rva20u64] compatible code
+even when an [rva22u64] instruction is used in an assembly source file. When
+GORISCV64=rva20u64 the assembler will synthesize certain [rva22u64]
+instructions, e.g., ANDN, using multiple [rva20u64] instructions. Instructions
+such as ANDN can then be freely used in assembly code without checking to see
+whether the instruction is supported by the target profile. When building a
+source file containing the ANDN instruction with GORISCV64=rva22u64 the
+assembler will emit the Zbb ANDN instruction directly. When building the same
+source file with GORISCV64=rva20u64 the assembler will emit multiple [rva20u64]
+instructions to synthesize ANDN.
+
+The assembler will also use [rva22u64] instructions to implement the zero and
+sign extension instructions, e.g., MOVB and MOVHU, when GORISCV64=rva22u64 or
+greater.
+
+The instructions not implemented in the default profile ([rva20u64]) that can
+be safely used in assembly code without compile time checks are:
+
+ - ANDN
+ - MAX
+ - MAXU
+ - MIN
+ - MINU
+ - MOVB
+ - MOVH
+ - MOVHU
+ - MOVWU
+ - ORN
+ - ROL
+ - ROLW
+ - ROR
+ - RORI
+ - RORIW
+ - RORW
+ - XNOR
+
+# Operand ordering
+
+The ordering used for instruction operands in the Go assembler differs from the
+ordering defined in the [RISC-V ISA Manual].
+
+1. R-Type instructions
+
+R-Type instructions are written in the reverse order to that given in the
+[RISC-V ISA Manual], with the register order being rs2, rs1, rd.
+
+Examples:
+
+ ADD X10, X11, X12 <=> add x12, x11, x10
+ FADDD F10, F11, F12 <=> fadd.d f12, f11, f10
+
+2. I-Type arithmetic instructions
+
+I-Type arithmetic instructions (not loads, fences, ebreak, ecall) use the same
+ordering as the R-Type instructions, typically, imm12, rs1, rd.
+
+Examples:
+
+ ADDI $1, X11, X12 <=> add x12, x11, 1
+ SLTI $1, X11, X12 <=> slti x12, x11, 1
+
+3. Loads and Stores
+
+Load instructions are written with the source operand (whether it be a register
+or a memory address), first followed by the destination operand.
+
+Examples:
+
+ MOV 16(X2), X10 <=> ld x10, 16(x2)
+ MOV X10, (X2) <=> sd x10, 0(x2)
+
+4. Branch instructions
+
+The branch instructions use the same operand ordering as is given in the
+[RISC-V ISA Manual], e.g., rs1, rs2, label.
+
+Example:
+
+ BLT X12, X23, loop1 <=> blt x12, x23, loop1
+
+BLT X12, X23, label will jump to label if X12 < X23. Note this is not the
+same ordering as is used for the SLT instructions.
+
+5. FMA instructions
+
+The Go assembler uses a different ordering for the RISC-V FMA operands to
+the ordering given in the [RISC-V ISA Manual]. The operands are rotated one
+place to the left, so that the destination operand comes last.
+
+Example:
+
+ FMADDS F1, F2, F3, F4 <=> fmadd.s f4, f1, f2, f3
+
+6. AMO instructions
+
+The ordering used for the AMO operations is rs2, rs1, rd, i.e., the operands
+as specified in the [RISC-V ISA Manual] are rotated one place to the left.
+
+Example:
+
+ AMOSWAPW X5, (X6), X7 <=> amoswap.w x7, x5, (x6)
+
+7. Vector instructions
+
+The VSETVLI instruction uses the same symbolic names as the [RISC-V ISA Manual]
+to represent the components of vtype, with the exception
+that they are written in upper case. The ordering of the operands in the Go
+assembler differs from the [RISC-V ISA Manual] in that the operands are
+rotated one place to the left so that the destination register, the register
+that holds the new vl, is the last operand.
+
+Example:
+
+ VSETVLI X10, E8, M1, TU, MU, X12 <=> vsetvli x12, x10, e8, m1, tu, mu
+
+Vector load and store instructions follow the pattern set by scalar loads and
+stores, i.e., the source is always the first operand and the destination the
+last. However, the ordering of the operands of these instructions is
+complicated by the optional mask register and, in some cases, the use of an
+additional stride or index register. In the Go assembler the index and stride
+registers appear as the second operand in indexed or strided loads and stores,
+while the mask register, if present, is always the penultimate operand.
+
+Examples:
+
+ VLE8V (X10), V3 <=> vle8.v v3, (x10)
+ VSE8V V3, (X10) <=> vse8.v v3, (x10)
+ VLE8V (X10), V0, V3 <=> vle8.v v3, (x10), v0.t
+ VSE8V V3, V0, (X10) <=> vse8.v v3, (x10), v0.t
+ VLSE8V (X10), X11, V3 <=> vlse8.v v3, (x10), x11
+ VSSE8V V3, X11, (X10) <=> vsse8.v v3, (x10), x11
+ VLSE8V (X10), X11, V0, V3 <=> vlse8.v v3, (x10), x11, v0.t
+ VSSE8V V3, X11, V0, (X10) <=> vsse8.v v3, (x10), x11, v0.t
+ VLUXEI8V (X10), V2, V3 <=> vluxei8.v v3, (x10), v2
+ VSUXEI8V V3, V2, (X10) <=> vsuxei8.v v3, (x10), v2
+ VLUXEI8V (X10), V2, V0, V3 <=> vluxei8.v v3, (x10), v2, v0.t
+ VSUXEI8V V3, V2, V0, (X10) <=> vsuxei8.v v3, (x10), v2, v0.t
+ VL1RE8V (X10), V3 <=> vl1re8.v v3, (x10)
+ VS1RV V3, (X11) <=> vs1r.v v3, (x11)
+
+The ordering of operands for two and three argument vector arithmetic instructions is
+reversed in the Go assembler.
+
+Examples:
+
+ VMVVV V2, V3 <=> vmv.v.v v3, v2
+ VADDVV V1, V2, V3 <=> vadd.vv v3, v2, v1
+ VADDVX X10, V2, V3 <=> vadd.vx v3, v2, x10
+ VMADCVI $15, V2, V3 <=> vmadc.vi v3, v2, 15
+
+The mask register, when specified, is always the penultimate operand in a vector
+arithmetic instruction, appearing before the destination register.
+
+Examples:
+
+ VANDVV V1, V2, V0, V3 <=> vand.vv v3, v2, v1, v0.t
+
+# Ternary instructions
+
+The Go assembler allows the second operand to be omitted from most ternary
+instructions if it matches the third (destination) operand.
+
+Examples:
+
+ ADD X10, X12, X12 <=> ADD X10, X12
+ ANDI $3, X12, X12 <=> ANDI $3, X12
+
+The use of this abbreviated syntax is encouraged.
+
+# Ordering of atomic instructions
+
+It is not possible to specify the ordering bits in the FENCE, LR, SC or AMO
+instructions. The FENCE instruction is always emitted as a full fence, the
+acquire and release bits are always set for the AMO instructions, the acquire
+bit is always set for the LR instructions while the release bit is set for
+the SC instructions.
+
+# Immediate operands
+
+In many cases, where an R-Type instruction has a corresponding I-Type
+instruction, the R-Type mnemonic can be used in place of the I-Type mnemonic.
+The assembler assumes that the immediate form of the instruction was intended
+when the first operand is given as an immediate value rather than a register.
+
+Example:
+
+ AND $3, X12, X13 <=> ANDI $3, X12, X13
+
+# Integer constant materialization
+
+The MOV instruction can be used to set a register to the value of any 64 bit
+constant literal. The way this is achieved by the assembler varies depending
+on the value of the constant. Where possible the assembler will synthesize the
+constant using one or more RISC-V arithmetic instructions. If it is unable
+to easily materialize the constant it will load the 64 bit literal from memory.
+
+A 32 bit constant literal can be specified as an argument to ADDI, ANDI, ORI and
+XORI. If the specified literal does not fit into 12 bits the assembler will
+generate extra instructions to synthesize it.
+
+Integer constants provided as operands to all other instructions must fit into
+the number of bits allowed by the instructions' encodings for immediate values.
+Otherwise, an error will be generated.
+
+# Floating point constant materialization
+
+The MOVF and MOVD instructions can be used to set a register to the value
+of any 32 bit or 64 bit floating point constant literal, respectively. Unless
+the constant literal is 0.0, MOVF and MOVD will be encoded as FLW and FLD
+instructions that load the constant from a location within the program's
+binary.
+
+[RISC-V ISA Manual]: https://github.com/riscv/riscv-isa-manual
+[rva20u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#51-rva20u64-profile
+[rva22u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22u64-profile
+[rva23u64]: https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile
+*/
+package riscv
diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go
index 3deab34d31..043be17c07 100644
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool {
// setPCs sets the Pc field in all instructions reachable from p.
// It uses pc as the initial value and returns the next available pc.
-func setPCs(p *obj.Prog, pc int64) int64 {
+func setPCs(p *obj.Prog, pc int64, compress bool) int64 {
for ; p != nil; p = p.Link {
p.Pc = pc
- for _, ins := range instructionsForProg(p) {
+ for _, ins := range instructionsForProg(p, compress) {
pc += int64(ins.length())
}
@@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
// a fixed point will be reached). No attempt to handle functions > 2GiB.
for {
big, rescan := false, false
- maxPC := setPCs(cursym.Func().Text, 0)
+ maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions)
if maxPC+maxTrampSize > (1 << 20) {
big = true
}
@@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
// Validate all instructions - this provides nice error messages.
for p := cursym.Func().Text; p != nil; p = p.Link {
- for _, ins := range instructionsForProg(p) {
+ for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
ins.validate(ctxt)
}
}
@@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
}
}
+func isScaledImmI(imm int64, nbits uint, scale int64) bool {
+ return immFits(imm, nbits, true) == nil && imm%scale == 0
+}
+
+func isScaledImmU(imm int64, nbits uint, scale int64) bool {
+ return immFits(imm, nbits, false) == nil && imm%scale == 0
+}
+
func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) {
if err := immFits(imm, nbits, signed); err != nil {
ctxt.Diag("%v: %v", ins, err)
@@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
}
+func isIntPrimeReg(r uint32) bool {
+ return r >= REG_X8 && r <= REG_X15
+}
+
// wantIntPrimeReg checks that r is an integer register that can be used
// in a prime register field of a compressed instruction.
func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
}
+func isFloatPrimeReg(r uint32) bool {
+ return r >= REG_F8 && r <= REG_F15
+}
+
// wantFloatPrimeReg checks that r is an floating-point register that can
// be used in a prime register field of a compressed instruction.
func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool {
return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
}
+func (ins *instruction) compress() {
+ switch ins.as {
+ case ALW:
+ if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) {
+ ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
+ ins.as = ACLW
+ }
+
+ case ALD:
+ if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) {
+ ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+ ins.as = ACLD
+ }
+
+ case AFLD:
+ if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+ ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1
+ } else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+ ins.as = ACFLD
+ }
+
+ case ASW:
+ if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) {
+ ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
+ ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1
+ }
+
+ case ASD:
+ if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+ ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+ ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1
+ }
+
+ case AFSD:
+ if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+ ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1
+ } else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+ ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1
+ }
+
+ case AADDI:
+ if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) {
+ ins.as = ACADDI16SP
+ } else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
+ ins.as = ACADDI
+ } else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) {
+ ins.as = ACADDI4SPN
+ } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil {
+ ins.as, ins.rs1 = ACLI, obj.REG_NONE
+ } else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 {
+ ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1
+ } else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 {
+ ins.as, ins.rs1 = ACNOP, ins.rd
+ }
+
+ case AADDIW:
+ if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
+ ins.as = ACADDIW
+ }
+
+ case ALUI:
+ if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
+ ins.as = ACLUI
+ }
+
+ case ASLLI:
+ if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 {
+ ins.as = ACSLLI
+ }
+
+ case ASRLI:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
+ ins.as = ACSRLI
+ }
+
+ case ASRAI:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
+ ins.as = ACSRAI
+ }
+
+ case AANDI:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
+ ins.as = ACANDI
+ }
+
+ case AADD:
+ if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 {
+ ins.as = ACADD
+ } else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 {
+ ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1
+ } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 {
+ ins.as = ACMV
+ }
+
+ case AADDW:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACADDW
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+ ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1
+ }
+
+ case ASUB:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACSUB
+ }
+
+ case ASUBW:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACSUBW
+ }
+
+ case AAND:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACAND
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+ ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1
+ }
+
+ case AOR:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACOR
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+ ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1
+ }
+
+ case AXOR:
+ if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+ ins.as = ACXOR
+ } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+ ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1
+ }
+
+ case AEBREAK:
+ ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE
+ }
+}
+
// instructionForProg returns the default *obj.Prog to instruction mapping.
func instructionForProg(p *obj.Prog) *instruction {
ins := &instruction{
@@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction {
}
// instructionsForProg returns the machine instructions for an *obj.Prog.
-func instructionsForProg(p *obj.Prog) []*instruction {
+func instructionsForProg(p *obj.Prog, compress bool) []*instruction {
ins := instructionForProg(p)
inss := []*instruction{ins}
@@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction {
ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0
}
+ // Only compress instructions when there is no relocation, since
+ // relocation relies on knowledge about the exact instructions that
+ // are in use.
+ if compress && p.Mark&NEED_RELOC == 0 {
+ for _, ins := range inss {
+ ins.compress()
+ }
+ }
+
for _, ins := range inss {
ins.p = p
}
@@ -4799,15 +4965,22 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
v := pcAlignPadLength(p.Pc, alignedValue)
offset := p.Pc
for ; v >= 4; v -= 4 {
- // NOP
- cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
+ // NOP (ADDI $0, X0, X0)
+ cursym.WriteBytes(ctxt, offset, []byte{0x13, 0x00, 0x00, 0x00})
offset += 4
}
+ if v == 2 {
+ // CNOP
+ cursym.WriteBytes(ctxt, offset, []byte{0x01, 0x00})
+ offset += 2
+ } else if v != 0 {
+ ctxt.Diag("bad PCALIGN pad length")
+ }
continue
}
offset := p.Pc
- for _, ins := range instructionsForProg(p) {
+ for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
if ic, err := ins.encode(); err == nil {
cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
offset += int64(ins.length())
diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go
index 9c8e5e96f8..ed41d81388 100644
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -423,8 +423,12 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
q.From.Reg = reg
}
}
- if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
- ctxt.Diag("don't know how to handle %v with -dynlink", p)
+ from3 := p.GetFrom3()
+ for i := range p.RestArgs {
+ a := &p.RestArgs[i].Addr
+ if a != from3 && a.Name == obj.NAME_EXTERN && !a.Sym.Local() {
+ ctxt.Diag("don't know how to handle %v with -dynlink", p)
+ }
}
var source *obj.Addr
// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
@@ -434,9 +438,17 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
}
+ if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+ ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
+ }
source = &p.From
} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
+ if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+ ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
+ }
source = &p.To
+ } else if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+ source = from3
} else {
return
}
@@ -501,9 +513,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
p2.As = p.As
p2.From = p.From
p2.To = p.To
- if from3 := p.GetFrom3(); from3 != nil {
- p2.AddRestSource(*from3)
- }
+ p2.RestArgs = p.RestArgs
if p.From.Name == obj.NAME_EXTERN {
p2.From.Reg = reg
p2.From.Name = obj.NAME_NONE
@@ -512,6 +522,11 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
p2.To.Reg = reg
p2.To.Name = obj.NAME_NONE
p2.To.Sym = nil
+ } else if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
+ from3 = p2.GetFrom3()
+ from3.Reg = reg
+ from3.Name = obj.NAME_NONE
+ from3.Sym = nil
} else {
return
}