diff options
Diffstat (limited to 'src/cmd/internal/obj')
| -rw-r--r-- | src/cmd/internal/obj/link.go | 61 | ||||
| -rw-r--r-- | src/cmd/internal/obj/loong64/a.out.go | 4 | ||||
| -rw-r--r-- | src/cmd/internal/obj/loong64/anames.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/loong64/asm.go | 6 | ||||
| -rw-r--r-- | src/cmd/internal/obj/riscv/asm_test.go | 16 | ||||
| -rw-r--r-- | src/cmd/internal/obj/riscv/cpu.go | 3 | ||||
| -rw-r--r-- | src/cmd/internal/obj/riscv/doc.go | 297 | ||||
| -rw-r--r-- | src/cmd/internal/obj/riscv/obj.go | 189 | ||||
| -rw-r--r-- | src/cmd/internal/obj/x86/obj6.go | 25 |
9 files changed, 552 insertions, 51 deletions
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 85dca33d27..c70c1d9438 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -1153,36 +1153,37 @@ type Func interface { // Link holds the context for writing object code from a compiler // to be linker input or for reading that input into the linker. type Link struct { - Headtype objabi.HeadType - Arch *LinkArch - Debugasm int - Debugvlog bool - Debugpcln string - Flag_shared bool - Flag_dynlink bool - Flag_linkshared bool - Flag_optimize bool - Flag_locationlists bool - Flag_noRefName bool // do not include referenced symbol names in object file - Retpoline bool // emit use of retpoline stubs for indirect jmp/call - Flag_maymorestack string // If not "", call this function before stack checks - Bso *bufio.Writer - Pathname string - Pkgpath string // the current package's import path - hashmu sync.Mutex // protects hash, funchash - hash map[string]*LSym // name -> sym mapping - funchash map[string]*LSym // name -> sym mapping for ABIInternal syms - statichash map[string]*LSym // name -> sym mapping for static syms - PosTable src.PosTable - InlTree InlTree // global inlining tree used by gc/inl.go - DwFixups *DwarfFixupTable - DwTextCount int - Imports []goobj.ImportedPkg - DiagFunc func(string, ...any) - DiagFlush func() - DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) - GenAbstractFunc func(fn *LSym) - Errors int + Headtype objabi.HeadType + Arch *LinkArch + CompressInstructions bool // use compressed instructions where possible (if supported by architecture) + Debugasm int + Debugvlog bool + Debugpcln string + Flag_shared bool + Flag_dynlink bool + Flag_linkshared bool + Flag_optimize bool + Flag_locationlists bool + Flag_noRefName bool // do not include referenced symbol names in object file + Retpoline bool // emit use of retpoline stubs for indirect jmp/call + Flag_maymorestack string // If not "", call this function before stack checks + Bso *bufio.Writer + Pathname string + Pkgpath string // the current package's import path + hashmu sync.Mutex // protects hash, funchash + hash map[string]*LSym // name -> sym mapping + funchash map[string]*LSym // name -> sym mapping for ABIInternal syms + statichash map[string]*LSym // name -> sym mapping for static syms + PosTable src.PosTable + InlTree InlTree // global inlining tree used by gc/inl.go + DwFixups *DwarfFixupTable + DwTextCount int + Imports []goobj.ImportedPkg + DiagFunc func(string, ...any) + DiagFlush func() + DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) + GenAbstractFunc func(fn *LSym) + Errors int InParallel bool // parallel backend phase in effect UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 73f145df14..5b8bffc9f1 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -589,6 +589,10 @@ const ( AORN AANDN + // 2.2.1.12 + AMULWVW + AMULWVWU + // 2.2.7. Atomic Memory Access Instructions AAMSWAPB AAMSWAPH diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index ab85c52a21..1749b43bf6 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -131,6 +131,8 @@ var Anames = []string{ "ALSLV", "ORN", "ANDN", + "MULWVW", + "MULWVWU", "AMSWAPB", "AMSWAPH", "AMSWAPW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 38b075d77e..b35e49a1b6 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1503,6 +1503,8 @@ func buildop(ctxt *obj.Link) { opset(AREMU, r0) opset(ADIV, r0) opset(ADIVU, r0) + opset(AMULWVW, r0) + opset(AMULWVWU, r0) case AMULV: opset(AMULVU, r0) @@ -3230,6 +3232,10 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0x3c << 15 // mulh.d case AMULHVU: return 0x3d << 15 // mulhu.d + case AMULWVW: + return 0x3e << 15 // mulw.d.w + case AMULWVWU: + return 0x3f << 15 // mulw.d.wu case ADIV: return 0x40 << 15 // div.w case ADIVU: diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go index f40e57fa64..5b50d1533a 100644 --- a/src/cmd/internal/obj/riscv/asm_test.go +++ b/src/cmd/internal/obj/riscv/asm_test.go @@ -11,8 +11,8 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" - "strings" "testing" ) @@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") fmt.Fprintln(buf, "BEQ X0, X0, label") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "label:") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } // TestLargeCall generates a large function (>1MB of text) with a call to @@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) { fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0") fmt.Fprintln(buf, "CALL ·y(SB)") for i := 0; i < 1<<19; i++ { - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") } fmt.Fprintln(buf, "RET") fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0") - fmt.Fprintln(buf, "ADD $0, X0, X0") + fmt.Fprintln(buf, "ADD $0, X5, X0") fmt.Fprintln(buf, "RET") } @@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0 // FENCE // NOP // FENCE - // RET - want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00" - if !strings.Contains(string(out), want) { + // RET (CJALR or JALR) + want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ") + if !want.Match(out) { t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want) } } diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index 60174a0b3a..a91395dd38 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -326,6 +326,9 @@ const ( NEED_GOT_PCREL_ITYPE_RELOC ) +const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC | + NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC + // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files // at https://github.com/riscv/riscv-opcodes. // diff --git a/src/cmd/internal/obj/riscv/doc.go b/src/cmd/internal/obj/riscv/doc.go new file mode 100644 index 0000000000..365bedd299 --- /dev/null +++ b/src/cmd/internal/obj/riscv/doc.go @@ -0,0 +1,297 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package riscv implements the riscv64 assembler. + +# Register naming + +The integer registers are named X0 through to X31, however X4 must be accessed +through its RISC-V ABI name, TP, and X27, which holds a pointer to the Go +routine structure, must be referred to as g. Additionally, when building in +shared mode, X3 is unavailable and must be accessed via its RISC-V ABI name, +GP. + +The floating-point registers are named F0 through to F31. + +The vector registers are named V0 through to V31. + +Both integer and floating-point registers can be referred to by their RISC-V +ABI names, e.g., A0 or FT0, with the exception that X27 cannot be referred to +by its RISC-V ABI name, S11. It must be referred to as g. + +Some of the integer registers are used by the Go runtime and assembler - X26 is +the closure pointer, X27 points to the Go routine structure and X31 is a +temporary register used by the Go assembler. Use of X31 should be avoided in +hand written assembly code as its value could be altered by the instruction +sequences emitted by the assembler. + +# Instruction naming + +Many RISC-V instructions contain one or more suffixes in their names. In the +[RISC-V ISA Manual] these suffixes are separated from themselves and the +name of the instruction mnemonic with a dot ('.'). In the Go assembler, the +separators are omitted and the suffixes are written in upper case. + +Example: + + FMVWX <=> fmv.w.x + +# Rounding modes + +The Go toolchain does not set the FCSR register and requires the desired +rounding mode to be explicitly encoded within floating-point instructions. +The syntax the Go assembler uses to specify the rounding modes differs +from the syntax in the RISC-V specifications. In the [RISC-V ISA Manual] +the rounding mode is given as an extra operand at the end of an +assembly language instruction. In the Go assembler, the rounding modes are +converted to uppercase and follow the instruction mnemonic from which they +are separated with a dot ('.'). + +Example: + + FCVTLUS.RNE F0, X5 <=> fcvt.lu.s x5, f0, rne + +RTZ is assumed if the rounding mode is omitted. + +# RISC-V extensions + +By default the Go compiler targets the [rva20u64] profile. This profile mandates +all the general RISC-V instructions, allowing Go to use integer, multiplication, +division, floating-point and atomic instructions without having to +perform compile time or runtime checks to verify that their use is appropriate +for the target hardware. All widely available riscv64 devices support at least +[rva20u64]. The Go toolchain can be instructed to target later RISC-V profiles, +including, [rva22u64] and [rva23u64], via the GORISCV64 environment variable. +Instructions that are provided by newer profiles cannot typically be used in +handwritten assembly code without compile time guards (or runtime checks) +that ensure they are hardware supported. + +The file asm_riscv64.h defines macros for each RISC-V extension that is enabled +by setting the GORISCV64 environment variable to a value other than [rva20u64]. +For example, if GORISCV64=rva22u64 the macros hasZba, hasZbb and hasZbs will be +defined. If GORISCV64=rva23u64 hasV will be defined in addition to hasZba, +hasZbb and hasZbs. These macros can be used to determine whether it's safe +to use an instruction in hand-written assembly. + +It is not always necessary to include asm_riscv64.h and use #ifdefs in your +code to safely take advantage of instructions present in the [rva22u64] +profile. In some cases the assembler can generate [rva20u64] compatible code +even when an [rva22u64] instruction is used in an assembly source file. When +GORISCV64=rva20u64 the assembler will synthesize certain [rva22u64] +instructions, e.g., ANDN, using multiple [rva20u64] instructions. Instructions +such as ANDN can then be freely used in assembly code without checking to see +whether the instruction is supported by the target profile. When building a +source file containing the ANDN instruction with GORISCV64=rva22u64 the +assembler will emit the Zbb ANDN instruction directly. When building the same +source file with GORISCV64=rva20u64 the assembler will emit multiple [rva20u64] +instructions to synthesize ANDN. + +The assembler will also use [rva22u64] instructions to implement the zero and +sign extension instructions, e.g., MOVB and MOVHU, when GORISCV64=rva22u64 or +greater. + +The instructions not implemented in the default profile ([rva20u64]) that can +be safely used in assembly code without compile time checks are: + + - ANDN + - MAX + - MAXU + - MIN + - MINU + - MOVB + - MOVH + - MOVHU + - MOVWU + - ORN + - ROL + - ROLW + - ROR + - RORI + - RORIW + - RORW + - XNOR + +# Operand ordering + +The ordering used for instruction operands in the Go assembler differs from the +ordering defined in the [RISC-V ISA Manual]. + +1. R-Type instructions + +R-Type instructions are written in the reverse order to that given in the +[RISC-V ISA Manual], with the register order being rs2, rs1, rd. + +Examples: + + ADD X10, X11, X12 <=> add x12, x11, x10 + FADDD F10, F11, F12 <=> fadd.d f12, f11, f10 + +2. I-Type arithmetic instructions + +I-Type arithmetic instructions (not loads, fences, ebreak, ecall) use the same +ordering as the R-Type instructions, typically, imm12, rs1, rd. + +Examples: + + ADDI $1, X11, X12 <=> add x12, x11, 1 + SLTI $1, X11, X12 <=> slti x12, x11, 1 + +3. Loads and Stores + +Load instructions are written with the source operand (whether it be a register +or a memory address), first followed by the destination operand. + +Examples: + + MOV 16(X2), X10 <=> ld x10, 16(x2) + MOV X10, (X2) <=> sd x10, 0(x2) + +4. Branch instructions + +The branch instructions use the same operand ordering as is given in the +[RISC-V ISA Manual], e.g., rs1, rs2, label. + +Example: + + BLT X12, X23, loop1 <=> blt x12, x23, loop1 + +BLT X12, X23, label will jump to label if X12 < X23. Note this is not the +same ordering as is used for the SLT instructions. + +5. FMA instructions + +The Go assembler uses a different ordering for the RISC-V FMA operands to +the ordering given in the [RISC-V ISA Manual]. The operands are rotated one +place to the left, so that the destination operand comes last. + +Example: + + FMADDS F1, F2, F3, F4 <=> fmadd.s f4, f1, f2, f3 + +6. AMO instructions + +The ordering used for the AMO operations is rs2, rs1, rd, i.e., the operands +as specified in the [RISC-V ISA Manual] are rotated one place to the left. + +Example: + + AMOSWAPW X5, (X6), X7 <=> amoswap.w x7, x5, (x6) + +7. Vector instructions + +The VSETVLI instruction uses the same symbolic names as the [RISC-V ISA Manual] +to represent the components of vtype, with the exception +that they are written in upper case. The ordering of the operands in the Go +assembler differs from the [RISC-V ISA Manual] in that the operands are +rotated one place to the left so that the destination register, the register +that holds the new vl, is the last operand. + +Example: + + VSETVLI X10, E8, M1, TU, MU, X12 <=> vsetvli x12, x10, e8, m1, tu, mu + +Vector load and store instructions follow the pattern set by scalar loads and +stores, i.e., the source is always the first operand and the destination the +last. However, the ordering of the operands of these instructions is +complicated by the optional mask register and, in some cases, the use of an +additional stride or index register. In the Go assembler the index and stride +registers appear as the second operand in indexed or strided loads and stores, +while the mask register, if present, is always the penultimate operand. + +Examples: + + VLE8V (X10), V3 <=> vle8.v v3, (x10) + VSE8V V3, (X10) <=> vse8.v v3, (x10) + VLE8V (X10), V0, V3 <=> vle8.v v3, (x10), v0.t + VSE8V V3, V0, (X10) <=> vse8.v v3, (x10), v0.t + VLSE8V (X10), X11, V3 <=> vlse8.v v3, (x10), x11 + VSSE8V V3, X11, (X10) <=> vsse8.v v3, (x10), x11 + VLSE8V (X10), X11, V0, V3 <=> vlse8.v v3, (x10), x11, v0.t + VSSE8V V3, X11, V0, (X10) <=> vsse8.v v3, (x10), x11, v0.t + VLUXEI8V (X10), V2, V3 <=> vluxei8.v v3, (x10), v2 + VSUXEI8V V3, V2, (X10) <=> vsuxei8.v v3, (x10), v2 + VLUXEI8V (X10), V2, V0, V3 <=> vluxei8.v v3, (x10), v2, v0.t + VSUXEI8V V3, V2, V0, (X10) <=> vsuxei8.v v3, (x10), v2, v0.t + VL1RE8V (X10), V3 <=> vl1re8.v v3, (x10) + VS1RV V3, (X11) <=> vs1r.v v3, (x11) + +The ordering of operands for two and three argument vector arithmetic instructions is +reversed in the Go assembler. + +Examples: + + VMVVV V2, V3 <=> vmv.v.v v3, v2 + VADDVV V1, V2, V3 <=> vadd.vv v3, v2, v1 + VADDVX X10, V2, V3 <=> vadd.vx v3, v2, x10 + VMADCVI $15, V2, V3 <=> vmadc.vi v3, v2, 15 + +The mask register, when specified, is always the penultimate operand in a vector +arithmetic instruction, appearing before the destination register. + +Examples: + + VANDVV V1, V2, V0, V3 <=> vand.vv v3, v2, v1, v0.t + +# Ternary instructions + +The Go assembler allows the second operand to be omitted from most ternary +instructions if it matches the third (destination) operand. + +Examples: + + ADD X10, X12, X12 <=> ADD X10, X12 + ANDI $3, X12, X12 <=> ANDI $3, X12 + +The use of this abbreviated syntax is encouraged. + +# Ordering of atomic instructions + +It is not possible to specify the ordering bits in the FENCE, LR, SC or AMO +instructions. The FENCE instruction is always emitted as a full fence, the +acquire and release bits are always set for the AMO instructions, the acquire +bit is always set for the LR instructions while the release bit is set for +the SC instructions. + +# Immediate operands + +In many cases, where an R-Type instruction has a corresponding I-Type +instruction, the R-Type mnemonic can be used in place of the I-Type mnemonic. +The assembler assumes that the immediate form of the instruction was intended +when the first operand is given as an immediate value rather than a register. + +Example: + + AND $3, X12, X13 <=> ANDI $3, X12, X13 + +# Integer constant materialization + +The MOV instruction can be used to set a register to the value of any 64 bit +constant literal. The way this is achieved by the assembler varies depending +on the value of the constant. Where possible the assembler will synthesize the +constant using one or more RISC-V arithmetic instructions. If it is unable +to easily materialize the constant it will load the 64 bit literal from memory. + +A 32 bit constant literal can be specified as an argument to ADDI, ANDI, ORI and +XORI. If the specified literal does not fit into 12 bits the assembler will +generate extra instructions to synthesize it. + +Integer constants provided as operands to all other instructions must fit into +the number of bits allowed by the instructions' encodings for immediate values. +Otherwise, an error will be generated. + +# Floating point constant materialization + +The MOVF and MOVD instructions can be used to set a register to the value +of any 32 bit or 64 bit floating point constant literal, respectively. Unless +the constant literal is 0.0, MOVF and MOVD will be encoded as FLW and FLD +instructions that load the constant from a location within the program's +binary. + +[RISC-V ISA Manual]: https://github.com/riscv/riscv-isa-manual +[rva20u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#51-rva20u64-profile +[rva22u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22u64-profile +[rva23u64]: https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile +*/ +package riscv diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 3deab34d31..043be17c07 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool { // setPCs sets the Pc field in all instructions reachable from p. // It uses pc as the initial value and returns the next available pc. -func setPCs(p *obj.Prog, pc int64) int64 { +func setPCs(p *obj.Prog, pc int64, compress bool) int64 { for ; p != nil; p = p.Link { p.Pc = pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, compress) { pc += int64(ins.length()) } @@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { big, rescan := false, false - maxPC := setPCs(cursym.Func().Text, 0) + maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions) if maxPC+maxTrampSize > (1 << 20) { big = true } @@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Validate all instructions - this provides nice error messages. for p := cursym.Func().Text; p != nil; p = p.Link { - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { ins.validate(ctxt) } } @@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { } } +func isScaledImmI(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, true) == nil && imm%scale == 0 +} + +func isScaledImmU(imm int64, nbits uint, scale int64) bool { + return immFits(imm, nbits, false) == nil && imm%scale == 0 +} + func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) { if err := immFits(imm, nbits, signed); err != nil { ctxt.Diag("%v: %v", ins, err) @@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31) } +func isIntPrimeReg(r uint32) bool { + return r >= REG_X8 && r <= REG_X15 +} + // wantIntPrimeReg checks that r is an integer register that can be used // in a prime register field of a compressed instruction. func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31) } +func isFloatPrimeReg(r uint32) bool { + return r >= REG_F8 && r <= REG_F15 +} + // wantFloatPrimeReg checks that r is an floating-point register that can // be used in a prime register field of a compressed instruction. func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { @@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool { return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP } +func (ins *instruction) compress() { + switch ins.as { + case ALW: + if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as = ACLW + } + + case ALD: + if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACLD + } + + case AFLD: + if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1 + } else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as = ACFLD + } + + case ASW: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) { + ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1 + } + + case ASD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AFSD: + if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) { + ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1 + } else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) { + ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1 + } + + case AADDI: + if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) { + ins.as = ACADDI16SP + } else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDI + } else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) { + ins.as = ACADDI4SPN + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil { + ins.as, ins.rs1 = ACLI, obj.REG_NONE + } else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 { + ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1 + } else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 { + ins.as, ins.rs1 = ACNOP, ins.rd + } + + case AADDIW: + if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACADDIW + } + + case ALUI: + if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil { + ins.as = ACLUI + } + + case ASLLI: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSLLI + } + + case ASRLI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRLI + } + + case ASRAI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 { + ins.as = ACSRAI + } + + case AANDI: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil { + ins.as = ACANDI + } + + case AADD: + if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 { + ins.as = ACADD + } else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 { + ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1 + } else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 { + ins.as = ACMV + } + + case AADDW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACADDW + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1 + } + + case ASUB: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUB + } + + case ASUBW: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACSUBW + } + + case AAND: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACAND + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1 + } + + case AOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1 + } + + case AXOR: + if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) { + ins.as = ACXOR + } else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 { + ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1 + } + + case AEBREAK: + ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE + } +} + // instructionForProg returns the default *obj.Prog to instruction mapping. func instructionForProg(p *obj.Prog) *instruction { ins := &instruction{ @@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction { } // instructionsForProg returns the machine instructions for an *obj.Prog. -func instructionsForProg(p *obj.Prog) []*instruction { +func instructionsForProg(p *obj.Prog, compress bool) []*instruction { ins := instructionForProg(p) inss := []*instruction{ins} @@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction { ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0 } + // Only compress instructions when there is no relocation, since + // relocation relies on knowledge about the exact instructions that + // are in use. + if compress && p.Mark&NEED_RELOC == 0 { + for _, ins := range inss { + ins.compress() + } + } + for _, ins := range inss { ins.p = p } @@ -4799,15 +4965,22 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { v := pcAlignPadLength(p.Pc, alignedValue) offset := p.Pc for ; v >= 4; v -= 4 { - // NOP - cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0}) + // NOP (ADDI $0, X0, X0) + cursym.WriteBytes(ctxt, offset, []byte{0x13, 0x00, 0x00, 0x00}) offset += 4 } + if v == 2 { + // CNOP + cursym.WriteBytes(ctxt, offset, []byte{0x01, 0x00}) + offset += 2 + } else if v != 0 { + ctxt.Diag("bad PCALIGN pad length") + } continue } offset := p.Pc - for _, ins := range instructionsForProg(p) { + for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) { if ic, err := ins.encode(); err == nil { cursym.WriteInt(ctxt, offset, ins.length(), int64(ic)) offset += int64(ins.length()) diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 9c8e5e96f8..ed41d81388 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -423,8 +423,12 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { q.From.Reg = reg } } - if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { - ctxt.Diag("don't know how to handle %v with -dynlink", p) + from3 := p.GetFrom3() + for i := range p.RestArgs { + a := &p.RestArgs[i].Addr + if a != from3 && a.Name == obj.NAME_EXTERN && !a.Sym.Local() { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } } var source *obj.Addr // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry @@ -434,9 +438,17 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) } + if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p) + } source = &p.From } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p) + } source = &p.To + } else if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() { + source = from3 } else { return } @@ -501,9 +513,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p2.As = p.As p2.From = p.From p2.To = p.To - if from3 := p.GetFrom3(); from3 != nil { - p2.AddRestSource(*from3) - } + p2.RestArgs = p.RestArgs if p.From.Name == obj.NAME_EXTERN { p2.From.Reg = reg p2.From.Name = obj.NAME_NONE @@ -512,6 +522,11 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p2.To.Reg = reg p2.To.Name = obj.NAME_NONE p2.To.Sym = nil + } else if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + from3 = p2.GetFrom3() + from3.Reg = reg + from3.Name = obj.NAME_NONE + from3.Sym = nil } else { return } |
