From 967465da2975fe4322080703ce5a77ea90752829 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 31 Aug 2020 09:43:40 -0400 Subject: cmd/compile: use combined shifts to improve array addressing on ppc64x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo --- src/cmd/internal/obj/ppc64/a.out.go | 4 ++ src/cmd/internal/obj/ppc64/anames.go | 4 ++ src/cmd/internal/obj/ppc64/asm9.go | 74 ++++++++++++++++++++++++++++++------ 3 files changed, 71 insertions(+), 11 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 8b32692778..f438803fb5 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -575,6 +575,7 @@ const ( ARLWMICC ARLWNM ARLWNMCC + ACLRLSLWI ASLW ASLWCC ASRW @@ -716,6 +717,9 @@ const ( ARLDCLCC ARLDICL ARLDICLCC + ARLDIC + ARLDICCC + ACLRLSLDI AROTL AROTLW ASLBIA diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index 287011877c..accd87fe00 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -180,6 +180,7 @@ var Anames = []string{ "RLWMICC", "RLWNM", "RLWNMCC", + "CLRLSLWI", "SLW", "SLWCC", "SRW", @@ -312,6 +313,9 @@ var Anames = []string{ "RLDCLCC", "RLDICL", "RLDICLCC", + "RLDIC", + "RLDICCC", + "CLRLSLDI", "ROTL", "ROTLW", "SLBIA", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 98b453de6c..60dda72507 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1904,6 +1904,7 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) + opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -1922,6 +1923,9 @@ func buildop(ctxt *obj.Link) { opset(ARLDICLCC, r0) opset(ARLDICR, r0) opset(ARLDICRCC, r0) + opset(ARLDIC, r0) + opset(ARLDICCC, r0) + opset(ACLRLSLDI, r0) case AFMOVD: opset(AFMOVDCC, r0) @@ -2734,13 +2738,31 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case ARLDICR, ARLDICRCC: me := int(d) sh := c.regoff(&p.From) + if me < 0 || me > 63 || sh > 63 { + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) - case ARLDICL, ARLDICLCC: + case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC: mb := int(d) sh := c.regoff(&p.From) + if mb < 0 || mb > 63 || sh > 63 { + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) + case ACLRLSLDI: + // This is an extended mnemonic defined in the ISA section C.8.1 + // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // It maps onto RLDIC so is directly generated here based on the operands from + // the clrlsldi. + b := int(d) + n := c.regoff(&p.From) + if n > int32(b) || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + } + o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) + default: c.ctxt.Diag("unexpected op in rldc case\n%v", p) a = 0 @@ -3354,18 +3376,43 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case 62: /* rlwmi $sh,s,$mask,a */ v := c.regoff(&p.From) - - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It maps onto rlwinm which is directly generated here. + if v < 0 || v > 32 || b > 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 63: /* rlwmi b,s,$mask,a */ - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + v := c.regoff(&p.From) + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + if v > b || b > 32 { + // Message will match operands from the ISA even though in the + // code it uses 'v' + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It generates the rlwinm directly here. + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 @@ -4277,6 +4324,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ARLDICRCC: return OPVCC(30, 0, 0, 1) | 2<<1 // rldicr. + case ARLDIC: + return OPVCC(30, 0, 0, 0) | 4<<1 // rldic + case ARLDICCC: + return OPVCC(30, 0, 0, 1) | 4<<1 // rldic. + case ASYSCALL: return OPVCC(17, 1, 0, 0) -- cgit v1.3 From a424f6e45e29960c933a7ccc1cd8fc9bb2766f15 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 23 Sep 2020 11:06:39 -0400 Subject: cmd/asm,cmd/compile,cmd/internal/obj/ppc64: add extswsli support on power9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the extswsli instruction which combines extsw followed by a shift. New benchmark demonstrates the improvement: name old time/op new time/op delta ExtShift 1.34µs ± 0% 1.30µs ± 0% -3.15% (p=0.057 n=4+3) Change-Id: I21b410676fdf15d20e0cbbaa75d7c6dcd3bbb7b0 Reviewed-on: https://go-review.googlesource.com/c/go/+/257017 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 1 + src/cmd/compile/internal/gc/bench_test.go | 12 ++++ src/cmd/compile/internal/ppc64/ssa.go | 2 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 15 ++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 36 ++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 2 + src/cmd/internal/obj/ppc64/anames.go | 2 + src/cmd/internal/obj/ppc64/asm9.go | 104 +++++++++++++++++---------- test/codegen/shift.go | 7 +- 11 files changed, 142 insertions(+), 42 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index e26f6f8933..88a7609ba8 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -266,6 +266,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 SRDCC R3, R4 // 7c841c37 ROTLW $16, R3, R4 // 5464803e ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index 09aaf428c3..a2887f2f7b 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -20,6 +20,18 @@ func BenchmarkLoadAdd(b *testing.B) { } } +// Added for ppc64 extswsli on power9 +func BenchmarkExtShift(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s ^= int64(x[i]+32) * 8 + } + globl = s + } +} + func BenchmarkModify(b *testing.B) { a := make([]int64, 1024) v := globl diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 4a83a0bdd7..a5fbdaffba 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 774d5096de..de30d003e6 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1025,6 +1025,8 @@ (SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) (SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) (SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +// special case for power9 +(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index ed99c40cd2..28317928a8 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -223,6 +223,7 @@ func init() { {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits + {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1fc0f7ea79..1fe00c7026 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1865,6 +1865,7 @@ const ( OpPPC64SLWconst OpPPC64ROTLconst OpPPC64ROTLWconst + OpPPC64EXTSWSLconst OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24849,6 +24850,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "EXTSWSLconst", + auxType: auxInt64, + argLen: 1, + asm: ppc64.AEXTSWSLI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "CNTLZD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 12b08824b5..29ec3992f2 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12877,6 +12877,24 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } break } + // match: (SLDconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { @@ -13000,6 +13018,24 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } break } + // match: (SLWconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index f438803fb5..4c97302f83 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -733,6 +733,8 @@ const ( ASRAD ASRADCC ASRDCC + AEXTSWSLI + AEXTSWSLICC ASTDCCC ATD diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index accd87fe00..fca4b3e355 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -329,6 +329,8 @@ var Anames = []string{ "SRAD", "SRADCC", "SRDCC", + "EXTSWSLI", + "EXTSWSLICC", "STDCCC", "TD", "DWORD", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 60dda72507..9f06bdf8b3 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -160,6 +160,8 @@ var optab = []Optab{ {ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, {ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0}, {ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0}, {ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0}, @@ -1877,6 +1879,9 @@ func buildop(ctxt *obj.Link) { case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRAWCC, r0) + case AEXTSWSLI: + opset(AEXTSWSLICC, r0) + case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRADCC, r0) @@ -2189,49 +2194,54 @@ func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 { return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5 } +func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 { + return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 +} + func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } const ( /* each rhs is OPVCC(_, _, _, _) */ - OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 - OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 - OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 - OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 - OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 - OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 - OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 - OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 - OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 - OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 - OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 - OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 - OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 - OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 - OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 - OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 - OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 - OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 - OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 - OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 - OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 - OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 - OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 - OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 - OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 - OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 - OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 - OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 - OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 - OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 - OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 - OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 - OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 - OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 - OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 - OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 - OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 + OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 + OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 + OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 + OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 + OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 + OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 + OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 + OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 + OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 + OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 + OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 + OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 + OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 + OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 + OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 + OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 + OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 + OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 + OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 + OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 + OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 + OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 + OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 + OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 + OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 + OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 + OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 + OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 + OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 + OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 + OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 + OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 + OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_EXTSWSLI = 31<<26 | 445<<2 ) func oclass(a *obj.Addr) int { @@ -2965,14 +2975,21 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case AROTL: a = int(0) op = OP_RLDICL + case AEXTSWSLI: + a = int(v) default: c.ctxt.Diag("unexpected op in sldi case\n%v", p) a = 0 o1 = 0 } - o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) - if p.As == ASLDCC || p.As == ASRDCC { + if p.As == AEXTSWSLI || p.As == AEXTSWSLICC { + o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v)) + + } else { + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) + } + if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC { o1 |= 1 // Set the condition code bit } @@ -4350,6 +4367,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ASRADCC: return OPVCC(31, 794, 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + case ASRW: return OPVCC(31, 536, 0, 0) case ASRWCC: @@ -5013,6 +5035,10 @@ func (c *ctxt9) opirr(a obj.As) uint32 { return OPVCC(31, (413 << 1), 0, 0) case ASRADCC: return OPVCC(31, (413 << 1), 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) case ASTSW: return OPVCC(31, 725, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 32214851b5..abc4b091c9 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -182,7 +182,7 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt return f, g } -func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) { // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -202,7 +202,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, u // ppc64le:-"AND","CLRLSLDI" // ppc64:-"AND","CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 - return f, g, h, i + // ppc64le/power9:-"SLD","EXTSWSLI" + // ppc64/power9:-"SLD","EXTSWSLI" + j := int64(x32+32)*8 + return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { -- cgit v1.3 From cc2a5cf4b8b0aeaccd3dd439f8d3d68f25eef358 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 28 Sep 2020 18:20:12 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: fix some shift rules due to a regression A recent change to improve shifts was generating some invalid cases when the rule was based on an AND. The extended mnemonics CLRLSLDI and CLRLSLWI only allow certain values for the operands and in the mask case those values were not being checked properly. This adds a check to those rules to verify that the 'b' and 'n' values used when an AND was part of the rule have correct values. There was a bug in some diag messages in asm9. The message expected 3 values but only provided 2. Those are corrected here also. The test/codegen/shift.go was updated to add a few more cases to check for the case mentioned here. Some of the comments that mention the order of operands in these extended mnemonics were wrong and those have been corrected. Fixes #41683. Change-Id: If5bb860acaa5051b9e0cd80784b2868b85898c31 Reviewed-on: https://go-review.googlesource.com/c/go/+/258138 Run-TryBot: Lynn Boger Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo TryBot-Result: Go Bot Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 ++-- src/cmd/compile/internal/ppc64/ssa.go | 12 +++++----- src/cmd/compile/internal/ssa/gen/PPC64.rules | 9 ++++--- src/cmd/compile/internal/ssa/rewrite.go | 4 ++-- src/cmd/compile/internal/ssa/rewritePPC64.go | 34 +++++++-------------------- src/cmd/internal/obj/ppc64/asm9.go | 35 ++++++++++++++-------------- test/codegen/shift.go | 17 ++++++++------ 7 files changed, 50 insertions(+), 65 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 88a7609ba8..869f8c2d4f 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -287,8 +287,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICRCC $0, R4, $15, R6 // 788603c5 RLDIC $0, R4, $15, R6 // 788603c8 RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $16, R5, $8, R4 // 54a4861e - CLRLSLDI $2, R4, $24, R3 // 78831588 + CLRLSLWI $8, R5, $6, R4 // 54a430b2 + CLRLSLDI $24, R4, $4, R3 // 78832508 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index a5fbdaffba..d83b2df379 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -570,9 +570,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r @@ -582,9 +582,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index de30d003e6..83ee4c499b 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1018,13 +1018,12 @@ (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) -(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) -(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) -(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) -(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) -(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9f4de83a77..5d8b3ddc4e 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1380,8 +1380,8 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } -// Catch the simple ones first -// TODO: Later catch more cases +// This verifies that the mask occupies the +// rightmost bits. func isPPC64ValidShiftMask(v int64) bool { if ((v + 1) & v) == 0 { return true diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 29ec3992f2..9822637b05 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12831,7 +12831,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12841,7 +12841,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLDI) @@ -12850,7 +12850,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12867,7 +12867,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLDI) @@ -12953,26 +12953,8 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { v.AddArg(x) return true } - // match: (SLWconst [c] z:(MOVWZreg x)) - // cond: z.Uses == 1 && c < 24 - // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) - for { - c := auxIntToInt64(v.AuxInt) - z := v_0 - if z.Op != OpPPC64MOVWZreg { - break - } - x := z.Args[0] - if !(z.Uses == 1 && c < 24) { - break - } - v.reset(OpPPC64CLRLSLWI) - v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) - v.AddArg(x) - return true - } // match: (SLWconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12982,7 +12964,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLWI) @@ -12991,7 +12973,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { return true } // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -13008,7 +12990,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLWI) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9f06bdf8b3..928e299f43 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2749,7 +2749,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { me := int(d) sh := c.regoff(&p.From) if me < 0 || me > 63 || sh > 63 { - c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) @@ -2757,19 +2757,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { mb := int(d) sh := c.regoff(&p.From) if mb < 0 || mb > 63 || sh > 63 { - c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) case ACLRLSLDI: // This is an extended mnemonic defined in the ISA section C.8.1 - // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // clrlsldi ra,rs,b,n --> rldic ra,rs,n,b-n // It maps onto RLDIC so is directly generated here based on the operands from // the clrlsldi. - b := int(d) - n := c.regoff(&p.From) - if n > int32(b) || b > 63 { - c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + n := int32(d) + b := c.regoff(&p.From) + if n > b || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p) } o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) @@ -3395,14 +3395,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) + n := c.regoff(p.GetFrom3()) // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It maps onto rlwinm which is directly generated here. - if v < 0 || v > 32 || b > 32 { - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + if n > v || v >= 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) @@ -3414,16 +3415,16 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) - if v > b || b > 32 { + n := c.regoff(p.GetFrom3()) + if n > v || v >= 32 { // Message will match operands from the ISA even though in the // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index abc4b091c9..bbfc85ffbb 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -187,8 +187,8 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" f := (v8 &0xF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" + // ppc64le:"CLRLSLWI" + // ppc64:"CLRLSLWI" f += byte(v16)<<3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -196,12 +196,15 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" h := (v32 & 0xFFFFF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" - h += uint32(v64)<<4 - // ppc64le:-"AND","CLRLSLDI" - // ppc64:-"AND","CLRLSLDI" + // ppc64le:"CLRLSLDI" + // ppc64:"CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 + // ppc64le:-"CLRLSLDI" + // ppc64:-"CLRLSLDI" + i += (v64 & 0xFFFFFFF) << 38 + // ppc64le/power9:-"CLRLSLDI" + // ppc64/power9:-"CLRLSLDI" + i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" j := int64(x32+32)*8 -- cgit v1.3 From bdab5df40f474c7768a945ef4fcf5aab634f7af5 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Fri, 2 Oct 2020 17:51:13 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: use mulli where possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support to allow the use of mulli when one of the multiply operands is a constant that fits in 16 bits. This especially helps in the case where this instruction appears in a loop since the load of the constant is not being moved out of the loop. Some improvements seen in compress/flate on power9: Decode/Digits/Huffman/1e4 259µs ± 0% 261µs ± 0% +0.57% (p=1.000 n=1+1) Decode/Digits/Huffman/1e5 2.43ms ± 0% 2.45ms ± 0% +0.79% (p=1.000 n=1+1) Decode/Digits/Huffman/1e6 23.9ms ± 0% 24.2ms ± 0% +0.86% (p=1.000 n=1+1) Decode/Digits/Speed/1e4 278µs ± 0% 279µs ± 0% +0.34% (p=1.000 n=1+1) Decode/Digits/Speed/1e5 2.80ms ± 0% 2.81ms ± 0% +0.29% (p=1.000 n=1+1) Decode/Digits/Speed/1e6 28.0ms ± 0% 28.1ms ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e4 278µs ± 0% 278µs ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e5 2.68ms ± 0% 2.69ms ± 0% +0.19% (p=1.000 n=1+1) Decode/Digits/Default/1e6 26.6ms ± 0% 26.6ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e4 278µs ± 0% 278µs ± 0% +0.00% (p=1.000 n=1+1) Decode/Digits/Compression/1e5 2.68ms ± 0% 2.69ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e6 26.6ms ± 0% 26.6ms ± 0% +0.07% (p=1.000 n=1+1) Decode/Newton/Huffman/1e4 322µs ± 0% 312µs ± 0% -2.84% (p=1.000 n=1+1) Decode/Newton/Huffman/1e5 3.11ms ± 0% 2.91ms ± 0% -6.41% (p=1.000 n=1+1) Decode/Newton/Huffman/1e6 31.4ms ± 0% 29.3ms ± 0% -6.85% (p=1.000 n=1+1) Decode/Newton/Speed/1e4 282µs ± 0% 269µs ± 0% -4.69% (p=1.000 n=1+1) Decode/Newton/Speed/1e5 2.29ms ± 0% 2.20ms ± 0% -4.13% (p=1.000 n=1+1) Decode/Newton/Speed/1e6 22.7ms ± 0% 21.3ms ± 0% -6.06% (p=1.000 n=1+1) Decode/Newton/Default/1e4 254µs ± 0% 237µs ± 0% -6.60% (p=1.000 n=1+1) Decode/Newton/Default/1e5 1.86ms ± 0% 1.75ms ± 0% -5.99% (p=1.000 n=1+1) Decode/Newton/Default/1e6 18.1ms ± 0% 17.4ms ± 0% -4.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e4 254µs ± 0% 244µs ± 0% -3.91% (p=1.000 n=1+1) Decode/Newton/Compression/1e5 1.85ms ± 0% 1.79ms ± 0% -3.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e6 18.0ms ± 0% 17.3ms ± 0% -3.88% (p=1.000 n=1+1) Change-Id: I840320fab1c4bf64c76b001c2651ab79f23df4eb Reviewed-on: https://go-review.googlesource.com/c/go/+/259444 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 +++ src/cmd/compile/internal/gc/bench_test.go | 12 +++++++ src/cmd/compile/internal/ppc64/ssa.go | 3 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 ++ src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 30 ++++++++++++++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 54 ++++++++++++++++++++++++++++ src/cmd/internal/obj/ppc64/asm9.go | 9 ++--- 8 files changed, 111 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 869f8c2d4f..c6d7b59aad 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -204,12 +204,16 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MULLW R3, R4 // 7c8419d6 MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 MULLWCC R3, R4, R5 // 7ca419d7 MULHW R3, R4, R5 // 7ca41896 MULHWU R3, R4, R5 // 7ca41816 MULLD R3, R4 // 7c8419d2 MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 MULLDCC R3, R4, R5 // 7ca419d3 MULHD R3, R4, R5 // 7ca41892 MULHDCC R3, R4, R5 // 7ca41893 diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index a2887f2f7b..8c4288128f 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -7,6 +7,7 @@ package gc import "testing" var globl int64 +var globl32 int32 func BenchmarkLoadAdd(b *testing.B) { x := make([]int64, 1024) @@ -42,6 +43,17 @@ func BenchmarkModify(b *testing.B) { } } +func BenchmarkMullImm(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int32 + for i := range x { + s += x[i] * 100 + } + globl32 = s + } +} + func BenchmarkConstModify(b *testing.B) { a := make([]int64, 1024) for i := 0; i < b.N; i++ { diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index d83b2df379..1ece4d999f 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, + ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 83ee4c499b..a05cfee654 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -821,6 +821,8 @@ (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) +(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x) + // Subtract from (with carry, but ignored) constant. // Note, these clobber the carry bit. (SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 28317928a8..5885660597 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -181,6 +181,8 @@ func init() { {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) + {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) + {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit) {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d7d2b24a48..051550fb17 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1832,6 +1832,8 @@ const ( OpPPC64FSUBS OpPPC64MULLD OpPPC64MULLW + OpPPC64MULLDconst + OpPPC64MULLWconst OpPPC64MADDLD OpPPC64MULHD OpPPC64MULHW @@ -24377,6 +24379,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MULLDconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "MULLWconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "MADDLD", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 9822637b05..1b8a5a78ca 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -568,6 +568,10 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64MOVWstorezero(v) case OpPPC64MTVSRD: return rewriteValuePPC64_OpPPC64MTVSRD(v) + case OpPPC64MULLD: + return rewriteValuePPC64_OpPPC64MULLD(v) + case OpPPC64MULLW: + return rewriteValuePPC64_OpPPC64MULLW(v) case OpPPC64NEG: return rewriteValuePPC64_OpPPC64NEG(v) case OpPPC64NOR: @@ -11003,6 +11007,56 @@ func rewriteValuePPC64_OpPPC64MTVSRD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64MULLD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLD x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLDconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLDconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} +func rewriteValuePPC64_OpPPC64MULLW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLW x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLWconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLWconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64NEG(v *Value) bool { v_0 := v.Args[0] // match: (NEG (ADDconst [c] x)) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 928e299f43..c2e8e9e9d0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1279,6 +1279,9 @@ func buildop(ctxt *obj.Link) { case AREMD: opset(AREMDU, r0) + case AMULLW: + opset(AMULLD, r0) + case ADIVW: /* op Rb[,Ra],Rd */ opset(AMULHW, r0) @@ -1312,7 +1315,6 @@ func buildop(ctxt *obj.Link) { opset(AMULHDCC, r0) opset(AMULHDU, r0) opset(AMULHDUCC, r0) - opset(AMULLD, r0) opset(AMULLDCC, r0) opset(AMULLDVCC, r0) opset(AMULLDV, r0) @@ -1996,7 +1998,6 @@ func buildop(ctxt *obj.Link) { AMOVB, /* macro: move byte with sign extension */ AMOVBU, /* macro: move byte with sign extension & update */ AMOVFL, - AMULLW, /* op $s[,r2],r3; op r1[,r2],r3; no cc/v */ ASUBC, /* op r1,$s,r3; op r1[,r2],r3 */ ASTSW, @@ -4990,8 +4991,8 @@ func (c *ctxt9) opirr(a obj.As) uint32 { case ADARN: return OPVCC(31, 755, 0, 0) /* darn - v3.00 */ - case AMULLW: - return OPVCC(7, 0, 0, 0) + case AMULLW, AMULLD: + return OPVCC(7, 0, 0, 0) /* mulli works with MULLW or MULLD */ case AOR: return OPVCC(24, 0, 0, 0) -- cgit v1.3 From 912262b806a432a29302e0cee45e4f42ef7038a2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 19 Jul 2020 00:30:12 -0400 Subject: cmd/internal/obj: move LSym.Func into LSym.Extra This creates space for a different kind of extension field in LSym without making the struct any larger. (There are many LSym, so we care about keeping the struct small.) Change-Id: Ib16edb9e15f54c2a7351c8b875e19684058711e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/243943 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/asm.go | 2 +- src/cmd/compile/internal/gc/dwinl.go | 4 +- src/cmd/compile/internal/gc/gsubr.go | 2 +- src/cmd/compile/internal/gc/obj.go | 9 ++-- src/cmd/compile/internal/gc/pgen.go | 18 ++++---- src/cmd/compile/internal/gc/plive.go | 9 ++-- src/cmd/compile/internal/gc/scope.go | 4 +- src/cmd/compile/internal/gc/ssa.go | 10 ++-- src/cmd/internal/obj/arm/asm5.go | 8 ++-- src/cmd/internal/obj/arm/obj5.go | 34 +++++++------- src/cmd/internal/obj/arm64/asm7.go | 12 ++--- src/cmd/internal/obj/arm64/obj7.go | 36 +++++++-------- src/cmd/internal/obj/dwarf.go | 31 +++++++------ src/cmd/internal/obj/ld.go | 2 +- src/cmd/internal/obj/link.go | 23 ++++++++- src/cmd/internal/obj/mips/asm0.go | 10 ++-- src/cmd/internal/obj/mips/obj0.go | 42 ++++++++--------- src/cmd/internal/obj/objfile.go | 90 ++++++++++++++++++------------------ src/cmd/internal/obj/pass.go | 6 +-- src/cmd/internal/obj/pcln.go | 28 +++++------ src/cmd/internal/obj/plist.go | 14 +++--- src/cmd/internal/obj/ppc64/asm9.go | 10 ++-- src/cmd/internal/obj/ppc64/obj9.go | 28 +++++------ src/cmd/internal/obj/riscv/obj.go | 38 +++++++-------- src/cmd/internal/obj/s390x/asmz.go | 6 +-- src/cmd/internal/obj/s390x/objz.go | 28 +++++------ src/cmd/internal/obj/sym.go | 6 ++- src/cmd/internal/obj/wasm/wasmobj.go | 34 +++++++------- src/cmd/internal/obj/x86/asm6.go | 8 ++-- src/cmd/internal/obj/x86/obj6.go | 26 +++++------ 30 files changed, 304 insertions(+), 274 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 7878d74549..b9efa454ed 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -181,7 +181,7 @@ func (p *Parser) asmText(operands [][]lex.Token) { // Argsize set below. }, } - nameAddr.Sym.Func.Text = prog + nameAddr.Sym.Func().Text = prog prog.To.Val = int32(argSize) p.append(prog, "", true) } diff --git a/src/cmd/compile/internal/gc/dwinl.go b/src/cmd/compile/internal/gc/dwinl.go index 27e2cbcd98..5120fa1166 100644 --- a/src/cmd/compile/internal/gc/dwinl.go +++ b/src/cmd/compile/internal/gc/dwinl.go @@ -34,7 +34,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { // Walk progs to build up the InlCalls data structure var prevpos src.XPos - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := fnsym.Func().Text; p != nil; p = p.Link { if p.Pos == prevpos { continue } @@ -150,7 +150,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { start := int64(-1) curii := -1 var prevp *obj.Prog - for p := fnsym.Func.Text; p != nil; prevp, p = p, p.Link { + for p := fnsym.Func().Text; p != nil; prevp, p = p, p.Link { if prevp != nil && p.Pos == prevp.Pos { continue } diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index 480d411f49..14c217ff3b 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -199,7 +199,7 @@ func (pp *Progs) settext(fn *Node) { ptxt := pp.Prog(obj.ATEXT) pp.Text = ptxt - fn.Func.lsym.Func.Text = ptxt + fn.Func.lsym.Func().Text = ptxt ptxt.From.Type = obj.TYPE_MEM ptxt.From.Name = obj.NAME_EXTERN ptxt.From.Sym = fn.Func.lsym diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index d7f4a94041..f6557e2d15 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -305,20 +305,21 @@ func dumpglobls() { // global symbols can't be declared during parallel compilation. func addGCLocals() { for _, s := range Ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } - for _, gcsym := range []*obj.LSym{s.Func.GCArgs, s.Func.GCLocals, s.Func.GCRegs} { + for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} { if gcsym != nil && !gcsym.OnList() { ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK) } } - if x := s.Func.StackObjects; x != nil { + if x := fn.StackObjects; x != nil { attr := int16(obj.RODATA) ggloblsym(x, int32(len(x.P)), attr) x.Set(obj.AttrStatic, true) } - if x := s.Func.OpenCodedDeferInfo; x != nil { + if x := fn.OpenCodedDeferInfo; x != nil { ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK) } } diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 52b1ed351d..353f4b08c9 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -266,8 +266,8 @@ func compile(fn *Node) { dtypesym(n.Type) // Also make sure we allocate a linker symbol // for the stack object data, for the same reason. - if fn.Func.lsym.Func.StackObjects == nil { - fn.Func.lsym.Func.StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") + if fn.Func.lsym.Func().StackObjects == nil { + fn.Func.lsym.Func().StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") } } } @@ -415,7 +415,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S case PAUTO: if !n.Name.Used() { // Text == nil -> generating abstract function - if fnsym.Func.Text != nil { + if fnsym.Func().Text != nil { Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)") } continue @@ -425,7 +425,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S continue } apdecls = append(apdecls, n) - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } decls, dwarfVars := createDwarfVars(fnsym, fn.Func, apdecls) @@ -435,7 +435,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S // the function symbol to insure that the type included in DWARF // processing during linking. typesyms := []*obj.LSym{} - for t, _ := range fnsym.Func.Autot { + for t, _ := range fnsym.Func().Autot { typesyms = append(typesyms, t) } sort.Sort(obj.BySymName(typesyms)) @@ -444,7 +444,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S r.Sym = sym r.Type = objabi.R_USETYPE } - fnsym.Func.Autot = nil + fnsym.Func().Autot = nil var varScopes []ScopeID for _, decl := range decls { @@ -522,7 +522,7 @@ func createSimpleVar(fnsym *obj.LSym, n *Node) *dwarf.Var { } typename := dwarf.InfoPrefix + typesymname(n.Type) - delete(fnsym.Func.Autot, ngotype(n).Linksym()) + delete(fnsym.Func().Autot, ngotype(n).Linksym()) inlIndex := 0 if genDwarfInline > 1 { if n.Name.InlFormal() || n.Name.InlLocal() { @@ -667,7 +667,7 @@ func createDwarfVars(fnsym *obj.LSym, fn *Func, apDecls []*Node) ([]*Node, []*dw ChildIndex: -1, }) // Record go type of to insure that it gets emitted by the linker. - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } return decls, vars @@ -731,7 +731,7 @@ func createComplexVar(fnsym *obj.LSym, fn *Func, varID ssa.VarID) *dwarf.Var { } gotype := ngotype(n).Linksym() - delete(fnsym.Func.Autot, gotype) + delete(fnsym.Func().Autot, gotype) typename := dwarf.InfoPrefix + gotype.Name[len("type."):] inlIndex := 0 if genDwarfInline > 1 { diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index a9ea37701e..b471accb65 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -1552,26 +1552,27 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { // Emit the live pointer map data structures ls := e.curfn.Func.lsym - ls.Func.GCArgs, ls.Func.GCLocals, ls.Func.GCRegs = lv.emit() + fninfo := ls.Func() + fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit() p := pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCArgs + p.To.Sym = fninfo.GCArgs p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_LocalsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCLocals + p.To.Sym = fninfo.GCLocals if !go115ReduceLiveness { p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCRegs + p.To.Sym = fninfo.GCRegs } return lv.livenessMap diff --git a/src/cmd/compile/internal/gc/scope.go b/src/cmd/compile/internal/gc/scope.go index d7239d5693..e66b859e10 100644 --- a/src/cmd/compile/internal/gc/scope.go +++ b/src/cmd/compile/internal/gc/scope.go @@ -62,9 +62,9 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) { if len(marks) == 0 { return } - p0 := fnsym.Func.Text + p0 := fnsym.Func().Text scope := findScope(marks, p0.Pos) - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := p0; p != nil; p = p.Link { if p.Pos == p0.Pos { continue } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 3d5fa4cd0a..d8f627c213 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -240,7 +240,7 @@ func dvarint(x *obj.LSym, off int, v int64) int { // - Offset of where argument should be placed in the args frame when making call func (s *state) emitOpenDeferInfo() { x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer") - s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x + s.curfn.Func.lsym.Func().OpenCodedDeferInfo = x off := 0 // Compute maxargsize (max size of arguments for all defers) @@ -6108,7 +6108,7 @@ func emitStackObjects(e *ssafn, pp *Progs) { // Populate the stack object data. // Format must match runtime/stack.go:stackObjectRecord. - x := e.curfn.Func.lsym.Func.StackObjects + x := e.curfn.Func.lsym.Func().StackObjects off := 0 off = duintptr(x, off, uint64(len(vars))) for _, v := range vars { @@ -6145,7 +6145,7 @@ func genssa(f *ssa.Func, pp *Progs) { s.livenessMap = liveness(e, f, pp) emitStackObjects(e, pp) - openDeferInfo := e.curfn.Func.lsym.Func.OpenCodedDeferInfo + openDeferInfo := e.curfn.Func.lsym.Func().OpenCodedDeferInfo if openDeferInfo != nil { // This function uses open-coded defers -- write out the funcdata // info that we computed at the end of genssa. @@ -6350,7 +6350,7 @@ func genssa(f *ssa.Func, pp *Progs) { // some of the inline marks. // Use this instruction instead. p.Pos = p.Pos.WithIsStmt() // promote position to a statement - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[m]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[m]) // Make the inline mark a real nop, so it doesn't generate any code. m.As = obj.ANOP m.Pos = src.NoXPos @@ -6362,7 +6362,7 @@ func genssa(f *ssa.Func, pp *Progs) { // Any unmatched inline marks now need to be added to the inlining tree (and will generate a nop instruction). for _, p := range inlMarkList { if p.As != obj.ANOP { - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[p]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[p]) } } } diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go index 269a4223d5..ebb98b4859 100644 --- a/src/cmd/internal/obj/arm/asm5.go +++ b/src/cmd/internal/obj/arm/asm5.go @@ -390,7 +390,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var p *obj.Prog var op *obj.Prog - p = cursym.Func.Text + p = cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -482,8 +482,8 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bflag = 0 pc = 0 times++ - c.cursym.Func.Text.Pc = 0 // force re-layout the code. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { o = c.oplook(p) if int64(pc) > p.Pc { p.Pc = int64(pc) @@ -558,7 +558,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * perhaps we'd be able to parallelize the span loop above. */ - p = c.cursym.Func.Text + p = c.cursym.Func().Text c.autosize = p.To.Offset + 4 c.cursym.Grow(c.cursym.Size) diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go index 4d9187b530..f2bfb9679f 100644 --- a/src/cmd/internal/obj/arm/obj5.go +++ b/src/cmd/internal/obj/arm/obj5.go @@ -249,13 +249,13 @@ const ( func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset == -4 { // Historical way to mark NOFRAME. @@ -271,30 +271,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - cursym.Func.Locals = autoffset - cursym.Func.Args = p.To.Val.(int32) + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) /* * find leaf subroutines */ - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case ADIV, ADIVU, AMOD, AMODU: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF case ABL, ABX, obj.ADUFFZERO, obj.ADUFFCOPY: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF } } var q2 *obj.Prog - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -311,20 +311,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && cursym.Func.Text.Mark&LEAF == 0 { + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // are not identified as leaves but still have no frame. if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", cursym.Name) } - cursym.Func.Text.Mark |= LEAF + cursym.Func().Text.Mark |= LEAF } // FP offsets need an updated p.To.Offset. p.To.Offset = int64(autosize) - 4 - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -347,7 +347,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REGSP p.Spadj = autosize - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVW g_panic(g), R1 @@ -460,7 +460,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: nocache(p) - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AB p.From = obj.Addr{} @@ -508,7 +508,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } case ADIV, ADIVU, AMOD, AMODU: - if cursym.Func.Text.From.Sym.NoSplit() { + if cursym.Func().Text.From.Sym.NoSplit() { ctxt.Diag("cannot divide in NOSPLIT function") } const debugdivmod = false @@ -720,7 +720,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -751,7 +751,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -762,7 +762,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { b := obj.Appendp(pcdata, c.newprog) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH - b.To.SetTarget(c.cursym.Func.Text.Link) + b.To.SetTarget(c.cursym.Func().Text.Link) b.Spadj = +framesize return end diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index c46066313e..6b9fe27c05 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -913,7 +913,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -943,8 +943,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { alignedValue := p.From.Offset m = pcAlignPadLength(pc, alignedValue, ctxt) // Update the current text symbol alignment value. - if int32(alignedValue) > cursym.Func.Align { - cursym.Func.Align = int32(alignedValue) + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: @@ -983,7 +983,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { if p.As == ADWORD && (pc&7) != 0 { pc += 4 } @@ -1047,7 +1047,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { psz := int32(0) var i int var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) @@ -1088,7 +1088,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index f1bc2583cb..0baf51973a 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -166,7 +166,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -209,7 +209,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -220,7 +220,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { jmp := obj.Appendp(pcdata, c.newprog) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(c.cursym.Func.Text.Link) + jmp.To.SetTarget(c.cursym.Func().Text.Link) jmp.Spadj = +framesize return end @@ -441,13 +441,13 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Historical way to mark NOFRAME. @@ -463,13 +463,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF @@ -477,18 +477,18 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF } } var q *obj.Prog var q1 *obj.Prog var retjmp *obj.LSym - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: - c.cursym.Func.Text = p + c.cursym.Func().Text = p c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { @@ -514,7 +514,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) } c.autosize += extrasize - c.cursym.Func.Locals += extrasize + c.cursym.Func().Locals += extrasize // low 32 bits for autosize // high 32 bits for extrasize @@ -524,14 +524,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Offset = 0 } - if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { if c.ctxt.Debugvlog { - c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name) + c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -641,7 +641,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGFP } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retjmp = p.To.Sym p.To = obj.Addr{} - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if c.autosize != 0 { p.As = AADD p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index 9abb31b558..328fb03b24 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -46,12 +46,12 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // we expect at the start of a new sequence. stmt := true line := int64(1) - pc := s.Func.Text.Pc + pc := s.Func().Text.Pc var lastpc int64 // last PC written to line table, not last PC in func name := "" prologue, wrotePrologue := false, false // Walk the progs, generating the DWARF table. - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) // If we're not at a real instruction, keep looping! if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { @@ -103,7 +103,7 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // text address before the end sequence op. If this isn't done, // GDB will assign a line number of zero the last row in the line // table, which we don't want. - lastlen := uint64(s.Size - (lastpc - s.Func.Text.Pc)) + lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) putpclcdelta(ctxt, dctxt, lines, lastlen, 0) dctxt.AddUint8(lines, 0) // start extended opcode dwarf.Uleb128put(dctxt, lines, 1) @@ -301,26 +301,27 @@ func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, if s.Type != objabi.STEXT { ctxt.Diag("dwarfSym of non-TEXT %v", s) } - if s.Func.dwarfInfoSym == nil { - s.Func.dwarfInfoSym = &LSym{ + fn := s.Func() + if fn.dwarfInfoSym == nil { + fn.dwarfInfoSym = &LSym{ Type: objabi.SDWARFFCN, } if ctxt.Flag_locationlists { - s.Func.dwarfLocSym = &LSym{ + fn.dwarfLocSym = &LSym{ Type: objabi.SDWARFLOC, } } - s.Func.dwarfRangesSym = &LSym{ + fn.dwarfRangesSym = &LSym{ Type: objabi.SDWARFRANGE, } - s.Func.dwarfDebugLinesSym = &LSym{ + fn.dwarfDebugLinesSym = &LSym{ Type: objabi.SDWARFLINES, } if s.WasInlined() { - s.Func.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) + fn.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) } } - return s.Func.dwarfInfoSym, s.Func.dwarfLocSym, s.Func.dwarfRangesSym, s.Func.dwarfAbsFnSym, s.Func.dwarfDebugLinesSym + return fn.dwarfInfoSym, fn.dwarfLocSym, fn.dwarfRangesSym, fn.dwarfAbsFnSym, fn.dwarfDebugLinesSym } func (s *LSym) Length(dwarfContext interface{}) int64 { @@ -331,7 +332,7 @@ func (s *LSym) Length(dwarfContext interface{}) int64 { // first instruction (prog) of the specified function. This will // presumably be the file in which the function is defined. func (ctxt *Link) fileSymbol(fn *LSym) *LSym { - p := fn.Func.Text + p := fn.Func().Text if p != nil { f, _ := linkgetlineFromPos(ctxt, p.Pos) fsym := ctxt.Lookup(f) @@ -405,8 +406,8 @@ func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath str if absfn.Size != 0 { ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s) } - if s.Func == nil { - s.Func = new(FuncInfo) + if s.Func() == nil { + s.NewFuncInfo() } scopes, _ := ctxt.DebugInfo(s, absfn, curfn) dwctxt := dwCtxt{ctxt} @@ -527,8 +528,8 @@ func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) { // wrapper generation as opposed to the main inlining phase) it's // possible that we didn't cache the abstract function sym for the // text symbol -- do so now if needed. See issue 38068. - if s.Func != nil && s.Func.dwarfAbsFnSym == nil { - s.Func.dwarfAbsFnSym = absfn + if fn := s.Func(); fn != nil && fn.dwarfAbsFnSym == nil { + fn.dwarfAbsFnSym = absfn } ft.precursor[s] = fnState{precursor: fn, absfn: absfn} diff --git a/src/cmd/internal/obj/ld.go b/src/cmd/internal/obj/ld.go index 4ba52c7785..5d6c000dc6 100644 --- a/src/cmd/internal/obj/ld.go +++ b/src/cmd/internal/obj/ld.go @@ -59,7 +59,7 @@ func mkfwd(sym *LSym) { } i := 0 - for p := sym.Func.Text; p != nil && p.Link != nil; p = p.Link { + for p := sym.Func().Text; p != nil && p.Link != nil; p = p.Link { i-- if i < 0 { i = LOG - 1 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index f14b691802..ae85dbbe4e 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -38,6 +38,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "fmt" + "log" "sync" ) @@ -400,7 +401,7 @@ type LSym struct { P []byte R []Reloc - Func *FuncInfo + Extra *interface{} // *FuncInfo if present Pkg string PkgIdx int32 @@ -433,6 +434,26 @@ type FuncInfo struct { FuncInfoSym *LSym } +// NewFuncInfo allocates and returns a FuncInfo for LSym. +func (s *LSym) NewFuncInfo() *FuncInfo { + if s.Extra != nil { + log.Fatalf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra) + } + f := new(FuncInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// Func returns the *FuncInfo associated with s, or else nil. +func (s *LSym) Func() *FuncInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FuncInfo) + return f +} + type InlMark struct { // When unwinding from an instruction in an inlined body, mark // where we should unwind to. diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go index 6107974745..fd29f9fa21 100644 --- a/src/cmd/internal/obj/mips/asm0.go +++ b/src/cmd/internal/obj/mips/asm0.go @@ -410,7 +410,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -455,7 +455,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -512,7 +512,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [4]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { @@ -529,7 +529,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. @@ -1302,7 +1302,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = OP_JMP(c.opirr(p.As), uint32(v)) if p.To.Sym == nil { - p.To.Sym = c.cursym.Func.Text.From.Sym + p.To.Sym = c.cursym.Func().Text.From.Sym p.To.Offset = p.To.Target().Pc } rel := obj.Addrel(c.cursym) diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go index f19facc00c..135a8df3aa 100644 --- a/src/cmd/internal/obj/mips/obj0.go +++ b/src/cmd/internal/obj/mips/obj0.go @@ -133,11 +133,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a switch for enabling/disabling instruction scheduling nosched := true - if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil { + if c.cursym.Func().Text == nil || c.cursym.Func().Text.Link == nil { return } - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -ctxt.FixedFrameSize() { // Historical way to mark NOFRAME. @@ -153,8 +153,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -162,7 +162,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * expand BECOME pseudo */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -203,7 +203,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { AJAL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case AJMP, @@ -267,7 +267,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -288,19 +288,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { - if c.cursym.Func.Text.From.Sym.NoSplit() { + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } } p.To.Offset = int64(autosize) - ctxt.FixedFrameSize() - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -344,7 +344,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -438,7 +438,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction p.To.Sym = nil - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AJMP p.From = obj.Addr{} @@ -540,7 +540,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if c.ctxt.Arch.Family == sys.MIPS { // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access - for p = c.cursym.Func.Text; p != nil; p = p1 { + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link if p.As != AMOVD { @@ -580,7 +580,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if nosched { // if we don't do instruction scheduling, simply add // NOP after each branch instruction. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + for p = c.cursym.Func().Text; p != nil; p = p.Link { if p.Mark&BRANCH != 0 { c.addnop(p) } @@ -589,10 +589,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // instruction scheduling - q = nil // p - 1 - q1 = c.cursym.Func.Text // top of block - o := 0 // count of instructions - for p = c.cursym.Func.Text; p != nil; p = p1 { + q = nil // p - 1 + q1 = c.cursym.Func().Text // top of block + o := 0 // count of instructions + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link o++ if p.Mark&NOSCHED != 0 { @@ -791,7 +791,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -805,7 +805,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.As = AJMP p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) p.Mark |= BRANCH // placeholder for q1's jump target diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index fa60c9ad6d..a08de891d3 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -189,8 +189,8 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // object file, and the Pcln variables haven't been filled in. As such, we // need to check that Pcsp exists, and assume the other pcln variables exist // as well. Tests like test/fixedbugs/issue22200.go demonstrate this issue. - if s.Func != nil && s.Func.Pcln.Pcsp != nil { - pc := &s.Func.Pcln + if fn := s.Func(); fn != nil && fn.Pcln.Pcsp != nil { + pc := &fn.Pcln w.Bytes(pc.Pcsp.P) w.Bytes(pc.Pcfile.P) w.Bytes(pc.Pcline.P) @@ -303,8 +303,8 @@ func (w *writer) Sym(s *LSym) { name = filepath.ToSlash(name) } var align uint32 - if s.Func != nil { - align = uint32(s.Func.Align) + if fn := s.Func(); fn != nil { + align = uint32(fn.Align) } if s.ContentAddressable() { // We generally assume data symbols are natually aligned, @@ -470,38 +470,38 @@ func (w *writer) Aux(s *LSym) { if s.Gotype != nil { w.aux1(goobj.AuxGotype, s.Gotype) } - if s.Func != nil { - w.aux1(goobj.AuxFuncInfo, s.Func.FuncInfoSym) + if fn := s.Func(); fn != nil { + w.aux1(goobj.AuxFuncInfo, fn.FuncInfoSym) - for _, d := range s.Func.Pcln.Funcdata { + for _, d := range fn.Pcln.Funcdata { w.aux1(goobj.AuxFuncdata, d) } - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { - w.aux1(goobj.AuxDwarfInfo, s.Func.dwarfInfoSym) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + w.aux1(goobj.AuxDwarfInfo, fn.dwarfInfoSym) } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { - w.aux1(goobj.AuxDwarfLoc, s.Func.dwarfLocSym) + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + w.aux1(goobj.AuxDwarfLoc, fn.dwarfLocSym) } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { - w.aux1(goobj.AuxDwarfRanges, s.Func.dwarfRangesSym) + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + w.aux1(goobj.AuxDwarfRanges, fn.dwarfRangesSym) } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { - w.aux1(goobj.AuxDwarfLines, s.Func.dwarfDebugLinesSym) + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + w.aux1(goobj.AuxDwarfLines, fn.dwarfDebugLinesSym) } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { - w.aux1(goobj.AuxPcsp, s.Func.Pcln.Pcsp) + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, fn.Pcln.Pcsp) } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { - w.aux1(goobj.AuxPcfile, s.Func.Pcln.Pcfile) + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, fn.Pcln.Pcfile) } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { - w.aux1(goobj.AuxPcline, s.Func.Pcln.Pcline) + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, fn.Pcln.Pcline) } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { - w.aux1(goobj.AuxPcinline, s.Func.Pcln.Pcinline) + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, fn.Pcln.Pcinline) } - for _, pcSym := range s.Func.Pcln.Pcdata { + for _, pcSym := range fn.Pcln.Pcdata { w.aux1(goobj.AuxPcdata, pcSym) } @@ -571,34 +571,34 @@ func nAuxSym(s *LSym) int { if s.Gotype != nil { n++ } - if s.Func != nil { + if fn := s.Func(); fn != nil { // FuncInfo is an aux symbol, each Funcdata is an aux symbol - n += 1 + len(s.Func.Pcln.Funcdata) - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { + n += 1 + len(fn.Pcln.Funcdata) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { n++ } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { n++ } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { n++ } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { n++ } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { n++ } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { n++ } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { n++ } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { n++ } - n += len(s.Func.Pcln.Pcdata) + n += len(fn.Pcln.Pcdata) } return n } @@ -620,15 +620,16 @@ func genFuncInfoSyms(ctxt *Link) { var b bytes.Buffer symidx := int32(len(ctxt.defs)) for _, s := range ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } o := goobj.FuncInfo{ - Args: uint32(s.Func.Args), - Locals: uint32(s.Func.Locals), - FuncID: objabi.FuncID(s.Func.FuncID), + Args: uint32(fn.Args), + Locals: uint32(fn.Locals), + FuncID: objabi.FuncID(fn.FuncID), } - pc := &s.Func.Pcln + pc := &fn.Pcln o.Pcsp = makeSymRef(preparePcSym(pc.Pcsp)) o.Pcfile = makeSymRef(preparePcSym(pc.Pcfile)) o.Pcline = makeSymRef(preparePcSym(pc.Pcline)) @@ -670,10 +671,10 @@ func genFuncInfoSyms(ctxt *Link) { isym.Set(AttrIndexed, true) symidx++ infosyms = append(infosyms, isym) - s.Func.FuncInfoSym = isym + fn.FuncInfoSym = isym b.Reset() - dwsyms := []*LSym{s.Func.dwarfRangesSym, s.Func.dwarfLocSym, s.Func.dwarfDebugLinesSym, s.Func.dwarfInfoSym} + dwsyms := []*LSym{fn.dwarfRangesSym, fn.dwarfLocSym, fn.dwarfDebugLinesSym, fn.dwarfInfoSym} for _, s := range dwsyms { if s == nil || s.Size == 0 { continue @@ -744,14 +745,15 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { } fmt.Fprintf(ctxt.Bso, "size=%d", s.Size) if s.Type == objabi.STEXT { - fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(s.Func.Args), uint64(s.Func.Locals), uint64(s.Func.FuncID)) + fn := s.Func() + fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(fn.Args), uint64(fn.Locals), uint64(fn.FuncID)) if s.Leaf() { fmt.Fprintf(ctxt.Bso, " leaf") } } fmt.Fprintf(ctxt.Bso, "\n") if s.Type == objabi.STEXT { - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc))) if ctxt.Debugasm > 1 { io.WriteString(ctxt.Bso, p.String()) diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index 09d520b4e9..01657dd4f6 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -118,7 +118,7 @@ func checkaddr(ctxt *Link, p *Prog, a *Addr) { } func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { checkaddr(ctxt, p, &p.From) if p.GetFrom3() != nil { checkaddr(ctxt, p, p.GetFrom3()) @@ -138,7 +138,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { if p.To.Sym != nil { continue } - q := sym.Func.Text + q := sym.Func().Text for q != nil && p.To.Offset != q.Pc { if q.Forwd != nil && p.To.Offset >= q.Forwd.Pc { q = q.Forwd @@ -164,7 +164,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { } // Collapse series of jumps to jumps. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { if p.To.Target() == nil { continue } diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index ce0d3714c0..67c4f9a62b 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -35,20 +35,21 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, val := int32(-1) oldval := val - if func_.Func.Text == nil { + fn := func_.Func() + if fn.Text == nil { // Return the emtpy symbol we've built so far. return sym } - pc := func_.Func.Text.Pc + pc := fn.Text.Pc if dbg { - ctxt.Logf("%6x %6d %v\n", uint64(pc), val, func_.Func.Text) + ctxt.Logf("%6x %6d %v\n", uint64(pc), val, fn.Text) } buf := make([]byte, binary.MaxVarintLen32) started := false - for p := func_.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { // Update val. If it's not changing, keep going. val = valfunc(ctxt, func_, val, p, 0, arg) @@ -107,7 +108,7 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, if started { if dbg { - ctxt.Logf("%6x done\n", uint64(func_.Func.Text.Pc+func_.Size)) + ctxt.Logf("%6x done\n", uint64(fn.Text.Pc+func_.Size)) } v := (func_.Size - pc) / int64(ctxt.Arch.MinLC) if v < 0 { @@ -257,12 +258,12 @@ func pctopcdata(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg i } func linkpcln(ctxt *Link, cursym *LSym) { - pcln := &cursym.Func.Pcln + pcln := &cursym.Func().Pcln pcln.UsedFiles = make(map[goobj.CUFileIndex]struct{}) npcdata := 0 nfuncdata := 0 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { // Find the highest ID of any used PCDATA table. This ignores PCDATA table // that consist entirely of "-1", since that's the assumed default value. // From.Offset is table ID @@ -288,11 +289,12 @@ func linkpcln(ctxt *Link, cursym *LSym) { // Check that all the Progs used as inline markers are still reachable. // See issue #40473. - inlMarkProgs := make(map[*Prog]struct{}, len(cursym.Func.InlMarks)) - for _, inlMark := range cursym.Func.InlMarks { + fn := cursym.Func() + inlMarkProgs := make(map[*Prog]struct{}, len(fn.InlMarks)) + for _, inlMark := range fn.InlMarks { inlMarkProgs[inlMark.p] = struct{}{} } - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if _, ok := inlMarkProgs[p]; ok { delete(inlMarkProgs, p) } @@ -303,7 +305,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { pcinlineState := new(pcinlineState) pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) - for _, inlMark := range cursym.Func.InlMarks { + for _, inlMark := range fn.InlMarks { pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) } pcln.InlTree = pcinlineState.localTree @@ -316,7 +318,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // tabulate which pc and func data we have. havepc := make([]uint32, (npcdata+31)/32) havefunc := make([]uint32, (nfuncdata+31)/32) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As == AFUNCDATA { if (havefunc[p.From.Offset/32]>>uint64(p.From.Offset%32))&1 != 0 { ctxt.Diag("multiple definitions for FUNCDATA $%d", p.From.Offset) @@ -344,7 +346,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // funcdata if nfuncdata > 0 { - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As != AFUNCDATA { continue } diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index 6e33f29959..eb54c67f6a 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -81,7 +81,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string continue } found := false - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.As == AFUNCDATA && p.From.Type == TYPE_CONST && p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps { found = true break @@ -89,7 +89,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string } if !found { - p := Appendp(s.Func.Text, newprog) + p := Appendp(s.Func().Text, newprog) p.As = AFUNCDATA p.From.Type = TYPE_CONST p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps @@ -120,15 +120,15 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { // func _() { } return } - if s.Func != nil { + if s.Func() != nil { ctxt.Diag("InitTextSym double init for %s", s.Name) } - s.Func = new(FuncInfo) + s.NewFuncInfo() if s.OnList() { ctxt.Diag("symbol %s listed multiple times", s.Name) } name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1) - s.Func.FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) + s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) s.Set(AttrOnList, true) s.Set(AttrDuplicateOK, flag&DUPOK != 0) s.Set(AttrNoSplit, flag&NOSPLIT != 0) @@ -185,7 +185,7 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit stack map. func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_StackMapIndex @@ -198,7 +198,7 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit register map. func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_RegMapIndex diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c2e8e9e9d0..dcabb3cd6a 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -663,8 +663,8 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // the function alignment is not changed which might // result in 16 byte alignment but that is still fine. // TODO: alignment on AIX - if ctxt.Headtype != objabi.Haix && cursym.Func.Align < 32 { - cursym.Func.Align = 32 + if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 { + cursym.Func().Align = 32 } default: ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) @@ -673,7 +673,7 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { } func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -722,7 +722,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -784,7 +784,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index c012762a18..3ab19de602 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -402,13 +402,13 @@ func (c *ctxt9) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt9{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -424,8 +424,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -435,7 +435,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var q *obj.Prog var q1 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -541,7 +541,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -598,7 +598,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -664,7 +664,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { rel.Type = objabi.R_ADDRPOWER_PCREL } - if !c.cursym.Func.Text.From.Sym.NoSplit() { + if !c.cursym.Func().Text.From.Sym.NoSplit() { q = c.stacksplit(q, autosize) // emit split check } @@ -732,14 +732,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = 24 } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -853,7 +853,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 || c.cursym.Name == "runtime.racecallbackthunk" { p.As = ABR p.From = obj.Addr{} @@ -1161,7 +1161,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { var morestacksym *obj.LSym if c.cursym.CFunc() { morestacksym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { morestacksym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { morestacksym = c.ctxt.Lookup("runtime.morestack") diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 841b30d85c..045c2250b5 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -427,7 +427,7 @@ func InvertBranch(as obj.As) obj.As { // instruction. Must be called after progedit. func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ACALL: return true @@ -499,12 +499,12 @@ func stackOffset(a *obj.Addr, stacksize int64) { // concrete, real RISC-V instructions or directive pseudo-ops like TEXT, // PCDATA, and FUNCDATA. func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } // Generate the prologue. - text := cursym.Func.Text + text := cursym.Func().Text if text.As != obj.ATEXT { ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") return @@ -538,12 +538,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { stacksize += ctxt.FixedFrameSize() } - cursym.Func.Args = text.To.Val.(int32) - cursym.Func.Locals = int32(stacksize) + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) prologue := text - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check } @@ -567,7 +567,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), X11 @@ -647,13 +647,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Update stack-based offsets. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { stackOffset(&p.From, stacksize) stackOffset(&p.To, stacksize) } // Additional instruction rewriting. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.AGETCALLERPC: if cursym.Leaf() { @@ -733,7 +733,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Rewrite MOV pseudo-instructions. This cannot be done in // progedit, as SP offsets need to be applied before we split // up some of the Addrs. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: rewriteMOV(ctxt, newprog, p) @@ -741,7 +741,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Split immediates larger than 12-bits. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { // $imm, REG, TO case AADDI, AANDI, AORI, AXORI: @@ -858,9 +858,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { rescan := false - setPCs(cursym.Func.Text, 0) + setPCs(cursym.Func().Text, 0) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: if p.To.Type != obj.TYPE_BRANCH { @@ -917,7 +917,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Now that there are no long branches, resolve branch and jump targets. // At this point, instruction rewriting which changes the number of // instructions will break everything--don't do it! - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: switch p.To.Type { @@ -940,7 +940,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Validate all instructions - this provides nice error messages. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { for _, ins := range instructionsForProg(p) { ins.validate(ctxt) } @@ -1068,7 +1068,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.To.Type = obj.TYPE_BRANCH if cursym.CFunc() { p.To.Sym = ctxt.Lookup("runtime.morestackc") - } else if !cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = ctxt.Lookup("runtime.morestack") @@ -1083,7 +1083,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.As = AJAL p.To = obj.Addr{Type: obj.TYPE_BRANCH} p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} - p.To.SetTarget(cursym.Func.Text.Link) + p.To.SetTarget(cursym.Func().Text.Link) // placeholder for to_done's jump target p = obj.Appendp(p, newprog) @@ -1926,7 +1926,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var symcode []uint32 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AJALR: if p.To.Sym != nil { @@ -1981,7 +1981,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Arch.ByteOrder.PutUint32(p, symcode[i]) } - obj.MarkUnsafePoints(ctxt, cursym.Func.Text, newprog, isUnsafePoint, nil) + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) } func isUnsafePoint(p *obj.Prog) bool { diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index cb3a2c3196..da14dd3c41 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -447,7 +447,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -473,7 +473,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.cursym.R[nrelocs0+i] = obj.Reloc{} } c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { changed = true @@ -504,7 +504,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, nil) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, nil) } // Return whether p is an unsafe point. diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 625bb0f7b4..3af5425b36 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -205,13 +205,13 @@ func (c *ctxtz) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxtz{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -227,8 +227,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -237,7 +237,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { */ var q *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: q = p @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, ABCL: q = p - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -294,7 +294,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: @@ -356,19 +356,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -461,7 +461,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} @@ -696,7 +696,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -709,7 +709,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.As = ABR p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) return p } diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index e5d7b2cbfd..0182773f8e 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -358,7 +358,8 @@ func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) { } func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent *LSym, aux *LSym)) { - pc := &fsym.Func.Pcln + fninfo := fsym.Func() + pc := &fninfo.Pcln if flag&traverseAux == 0 { // NB: should it become necessary to walk aux sym reloc references // without walking the aux syms themselves, this can be changed. @@ -389,7 +390,8 @@ func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent fn(fsym, filesym) } } - dwsyms := []*LSym{fsym.Func.dwarfRangesSym, fsym.Func.dwarfLocSym, fsym.Func.dwarfDebugLinesSym, fsym.Func.dwarfInfoSym} + + dwsyms := []*LSym{fninfo.dwarfRangesSym, fninfo.dwarfLocSym, fninfo.dwarfDebugLinesSym, fninfo.dwarfInfoSym} for _, dws := range dwsyms { if dws == nil || dws.Size == 0 { continue diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index a9e093a8ad..f7f66a1255 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -182,14 +182,14 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { return p } - framesize := s.Func.Text.To.Offset + framesize := s.Func().Text.To.Offset if framesize < 0 { panic("bad framesize") } - s.Func.Args = s.Func.Text.To.Val.(int32) - s.Func.Locals = int32(framesize) + s.Func().Args = s.Func().Text.To.Val.(int32) + s.Func().Locals = int32(framesize) - if s.Func.Text.From.Sym.Wrapper() { + if s.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -222,7 +222,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { Offset: 0, // panic.argp } - p := s.Func.Text + p := s.Func().Text p = appendp(p, AMOVD, gpanic, regAddr(REG_R0)) p = appendp(p, AGet, regAddr(REG_R0)) @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } if framesize > 0 { - p := s.Func.Text + p := s.Func().Text p = appendp(p, AGet, regAddr(REG_SP)) p = appendp(p, AI32Const, constAddr(framesize)) p = appendp(p, AI32Sub) @@ -260,8 +260,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction var tableIdxs []uint64 tablePC := int64(0) - base := ctxt.PosTable.Pos(s.Func.Text.Pos).Base() - for p := s.Func.Text; p != nil; p = p.Link { + base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base() + for p := s.Func().Text; p != nil; p = p.Link { prevBase := base base = ctxt.PosTable.Pos(p.Pos).Base() switch p.As { @@ -313,8 +313,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { tableIdxs = append(tableIdxs, uint64(numResumePoints)) s.Size = pc + 1 - if !s.Func.Text.From.Sym.NoSplit() { - p := s.Func.Text + if !s.Func().Text.From.Sym.NoSplit() { + p := s.Func().Text if framesize <= objabi.StackSmall { // small stack: SP <= stackguard @@ -352,7 +352,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { p = appendp(p, AIf) p = appendp(p, obj.ACALL, constAddr(0)) - if s.Func.Text.From.Sym.NeedCtxt() { + if s.Func().Text.From.Sym.NeedCtxt() { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack} } else { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt} @@ -365,7 +365,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var entryPointLoopBranches []*obj.Prog var unwindExitBranches []*obj.Prog currentDepth := 0 - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -562,7 +562,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.From.Name { case obj.NAME_AUTO: p.From.Offset += int64(framesize) @@ -712,7 +712,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } { - p := s.Func.Text + p := s.Func().Text if len(unwindExitBranches) > 0 { p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack for _, b := range unwindExitBranches { @@ -749,7 +749,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { currentDepth = 0 blockDepths := make(map[*obj.Prog]int) - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -850,7 +850,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { hasLocalSP = true var regUsed [MAXREG - MINREG]bool - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.From.Reg != 0 { regUsed[p.From.Reg-MINREG] = true } @@ -896,7 +896,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { updateLocalSP(w) } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case AGet: if p.From.Type != obj.TYPE_REG { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 4940c79eaa..c412f4945d 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2050,7 +2050,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("x86 tables not initialized, call x86.instinit first") } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { p.To.SetTarget(p) } @@ -2085,7 +2085,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } var count int64 // rough count of number of instructions - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { count++ p.Back = branchShort // use short branches first time through if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { @@ -2113,7 +2113,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { c = 0 var pPrev *obj.Prog nops = nops[:0] - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { c0 := c c = pjc.padJump(ctxt, s, p, c) @@ -2227,7 +2227,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // the first instruction.) return p.From.Index == REG_TLS } - obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil) + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) } } diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 18a6afcd77..e11fa13f65 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -563,11 +563,11 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } - p := cursym.Func.Text + p := cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 @@ -602,12 +602,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } textarg := int64(p.To.Val.(int32)) - cursym.Func.Args = int32(textarg) - cursym.Func.Locals = int32(p.To.Offset) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) // TODO(rsc): Remove. - if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { - cursym.Func.Locals = 0 + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 } // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. @@ -642,7 +642,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p = load_g_cx(ctxt, p, newprog) // load g into CX } - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check } @@ -690,7 +690,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REG_BP } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -808,7 +808,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var deltasp int32 - for p = cursym.Func.Text; p != nil; p = p.Link { + for p = cursym.Func().Text; p != nil; p = p.Link { pcsize := ctxt.Arch.RegSize switch p.From.Name { case obj.NAME_AUTO: @@ -1103,7 +1103,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA end := ctxt.EndUnsafePoint(jls, newprog, -1) var last *obj.Prog - for last = cursym.Func.Text; last.Link != nil; last = last.Link { + for last = cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -1117,7 +1117,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA pcdata = ctxt.StartUnsafePoint(pcdata, newprog) call := obj.Appendp(pcdata, newprog) - call.Pos = cursym.Func.Text.Pos + call.Pos = cursym.Func().Text.Pos call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH call.To.Name = obj.NAME_EXTERN @@ -1125,7 +1125,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA switch { case cursym.CFunc(): morestack = "runtime.morestackc" - case !cursym.Func.Text.From.Sym.NeedCtxt(): + case !cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = ctxt.Lookup(morestack) @@ -1144,7 +1144,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA jmp := obj.Appendp(pcdata, newprog) jmp.As = obj.AJMP jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(cursym.Func.Text.Link) + jmp.To.SetTarget(cursym.Func().Text.Link) jmp.Spadj = +framesize jls.To.SetTarget(call) -- cgit v1.3 From e981936855383883edb5fcc85a196c485b15f0f9 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Tue, 6 Oct 2020 17:08:31 -0400 Subject: cmd/internal/obj/ppc64,cmd/asm/internal/asm/testdata: fix up ppc64 testcases When a fix was made at the end of the last release related to NOPs, it was discovered that the ppc64.s testcase was out of date and contained comments that weren't being processed. Essentially the instructions in that test were being assembled but there was no verification that the encodings weres correct. The ppc64enc.s file was mostly complete and included the valid encodings for verification. This change moves ppc64enc.s to ppc64.s and adds the instructions that were missing. This also adds a minor fix to asm9.go on the assembly of the addex that was discovered during this testing. Change-Id: Iaada1563b137849ad195fa88f32ecc9ab3e1e95f Reviewed-on: https://go-review.googlesource.com/c/go/+/260217 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Lynn Boger --- src/cmd/asm/internal/asm/endtoend_test.go | 4 - src/cmd/asm/internal/asm/testdata/ppc64.s | 2006 +++++++++----------------- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 642 --------- src/cmd/internal/obj/ppc64/asm9.go | 2 +- 4 files changed, 707 insertions(+), 1947 deletions(-) delete mode 100644 src/cmd/asm/internal/asm/testdata/ppc64enc.s (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index b21e3156ae..decf5391db 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -442,10 +442,6 @@ func TestPPC64EndToEnd(t *testing.T) { testEndToEnd(t, "ppc64", "ppc64") } -func TestPPC64Encoder(t *testing.T) { - testEndToEnd(t, "ppc64", "ppc64enc") -} - func TestRISCVEncoder(t *testing.T) { testEndToEnd(t, "riscv64", "riscvenc") } diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index ba64d84a35..2b1191c44b 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -2,1311 +2,717 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This input was created by taking the instruction productions in -// the old assembler's (9a's) grammar and hand-writing complete -// instructions for each rule, to guarantee we cover the same space. +// This contains the majority of valid opcode combinations +// available in cmd/internal/obj/ppc64/asm9.go with +// their valid instruction encodings. #include "../../../../../runtime/textflag.h" -TEXT foo(SB),DUPOK|NOSPLIT,$0 +TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + // move constants + MOVD $1, R3 // 38600001 + MOVD $-1, R4 // 3880ffff + MOVD $65535, R5 // 6005ffff + MOVD $65536, R6 // 64060001 + MOVD $-32767, R5 // 38a08001 + MOVD $-32768, R6 // 38c08000 + MOVD $1234567, R5 // 6405001260a5d687 + MOVW $1, R3 // 38600001 + MOVW $-1, R4 // 3880ffff + MOVW $65535, R5 // 6005ffff + MOVW $65536, R6 // 64060001 + MOVW $-32767, R5 // 38a08001 + MOVW $-32768, R6 // 38c08000 + MOVW $1234567, R5 // 6405001260a5d687 + MOVD 8(R3), R4 // e8830008 + MOVD (R3)(R4), R5 // 7ca4182a + MOVW 4(R3), R4 // e8830006 + MOVW (R3)(R4), R5 // 7ca41aaa + MOVWZ 4(R3), R4 // 80830004 + MOVWZ (R3)(R4), R5 // 7ca4182e + MOVH 4(R3), R4 // a8830004 + MOVH (R3)(R4), R5 // 7ca41aae + MOVHZ 2(R3), R4 // a0830002 + MOVHZ (R3)(R4), R5 // 7ca41a2e + MOVB 1(R3), R4 // 888300017c840774 + MOVB (R3)(R4), R5 // 7ca418ae7ca50774 + MOVBZ 1(R3), R4 // 88830001 + MOVBZ (R3)(R4), R5 // 7ca418ae + MOVDBR (R3)(R4), R5 // 7ca41c28 + MOVWBR (R3)(R4), R5 // 7ca41c2c + MOVHBR (R3)(R4), R5 // 7ca41e2c + + MOVDU 8(R3), R4 // e8830009 + MOVDU (R3)(R4), R5 // 7ca4186a + MOVWU (R3)(R4), R5 // 7ca41aea + MOVWZU 4(R3), R4 // 84830004 + MOVWZU (R3)(R4), R5 // 7ca4186e + MOVHU 2(R3), R4 // ac830002 + MOVHU (R3)(R4), R5 // 7ca41aee + MOVHZU 2(R3), R4 // a4830002 + MOVHZU (R3)(R4), R5 // 7ca41a6e + MOVBU 1(R3), R4 // 8c8300017c840774 + MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 + MOVBZU 1(R3), R4 // 8c830001 + MOVBZU (R3)(R4), R5 // 7ca418ee + + MOVD R4, 8(R3) // f8830008 + MOVD R5, (R3)(R4) // 7ca4192a + MOVW R4, 4(R3) // 90830004 + MOVW R5, (R3)(R4) // 7ca4192e + MOVH R4, 2(R3) // b0830002 + MOVH R5, (R3)(R4) // 7ca41b2e + MOVB R4, 1(R3) // 98830001 + MOVB R5, (R3)(R4) // 7ca419ae + MOVDBR R5, (R3)(R4) // 7ca41d28 + MOVWBR R5, (R3)(R4) // 7ca41d2c + MOVHBR R5, (R3)(R4) // 7ca41f2c + + MOVDU R4, 8(R3) // f8830009 + MOVDU R5, (R3)(R4) // 7ca4196a + MOVWU R4, 4(R3) // 94830004 + MOVWU R5, (R3)(R4) // 7ca4196e + MOVHU R4, 2(R3) // b4830002 + MOVHU R5, (R3)(R4) // 7ca41b6e + MOVBU R4, 1(R3) // 9c830001 + MOVBU R5, (R3)(R4) // 7ca419ee + + ADD $1, R3 // 38630001 + ADD $1, R3, R4 // 38830001 + ADD $-1, R4 // 3884ffff + ADD $-1, R4, R5 // 38a4ffff + ADD $65535, R5 // 601fffff7cbf2a14 + ADD $65535, R5, R6 // 601fffff7cdf2a14 + ADD $65536, R6 // 3cc60001 + ADD $65536, R6, R7 // 3ce60001 + ADD $-32767, R5 // 38a58001 + ADD $-32767, R5, R4 // 38858001 + ADD $-32768, R6 // 38c68000 + ADD $-32768, R6, R5 // 38a68000 + ADD $1234567, R5 // 641f001263ffd6877cbf2a14 + ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 + ADDEX R3, R5, $3, R6 // 7cc32f54 + ADDIS $8, R3 // 3c630008 + ADDIS $1000, R3, R4 // 3c8303e8 + + ANDCC $1, R3 // 70630001 + ANDCC $1, R3, R4 // 70640001 + ANDCC $-1, R4 // 3be0ffff7fe42039 + ANDCC $-1, R4, R5 // 3be0ffff7fe52039 + ANDCC $65535, R5 // 70a5ffff + ANDCC $65535, R5, R6 // 70a6ffff + ANDCC $65536, R6 // 74c60001 + ANDCC $65536, R6, R7 // 74c70001 + ANDCC $-32767, R5 // 3be080017fe52839 + ANDCC $-32767, R5, R4 // 3be080017fe42839 + ANDCC $-32768, R6 // 3be080007fe63039 + ANDCC $-32768, R5, R6 // 3be080007fe62839 + ANDCC $1234567, R5 // 641f001263ffd6877fe52839 + ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 + ANDISCC $1, R3 // 74630001 + ANDISCC $1000, R3, R4 // 746403e8 + + OR $1, R3 // 60630001 + OR $1, R3, R4 // 60640001 + OR $-1, R4 // 3be0ffff7fe42378 + OR $-1, R4, R5 // 3be0ffff7fe52378 + OR $65535, R5 // 60a5ffff + OR $65535, R5, R6 // 60a6ffff + OR $65536, R6 // 64c60001 + OR $65536, R6, R7 // 64c70001 + OR $-32767, R5 // 3be080017fe52b78 + OR $-32767, R5, R6 // 3be080017fe62b78 + OR $-32768, R6 // 3be080007fe63378 + OR $-32768, R6, R7 // 3be080007fe73378 + OR $1234567, R5 // 641f001263ffd6877fe52b78 + OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 + ORIS $255, R3, R4 + + XOR $1, R3 // 68630001 + XOR $1, R3, R4 // 68640001 + XOR $-1, R4 // 3be0ffff7fe42278 + XOR $-1, R4, R5 // 3be0ffff7fe52278 + XOR $65535, R5 // 68a5ffff + XOR $65535, R5, R6 // 68a6ffff + XOR $65536, R6 // 6cc60001 + XOR $65536, R6, R7 // 6cc70001 + XOR $-32767, R5 // 3be080017fe52a78 + XOR $-32767, R5, R6 // 3be080017fe62a78 + XOR $-32768, R6 // 3be080007fe63278 + XOR $-32768, R6, R7 // 3be080007fe73278 + XOR $1234567, R5 // 641f001263ffd6877fe52a78 + XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 + XORIS $15, R3, R4 + + // TODO: the order of CR operands don't match + CMP R3, R4 // 7c232000 + CMPU R3, R4 // 7c232040 + CMPW R3, R4 // 7c032000 + CMPWU R3, R4 // 7c032040 + CMPB R3,R4,R4 // 7c6423f8 + CMPEQB R3,R4,CR6 // 7f0321c0 + + // TODO: constants for ADDC? + ADD R3, R4 // 7c841a14 + ADD R3, R4, R5 // 7ca41a14 + ADDC R3, R4 // 7c841814 + ADDC R3, R4, R5 // 7ca41814 + ADDE R3, R4 // 7c841914 + ADDECC R3, R4 // 7c841915 + ADDEV R3, R4 // 7c841d14 + ADDEVCC R3, R4 // 7c841d15 + ADDV R3, R4 // 7c841e14 + ADDVCC R3, R4 // 7c841e15 + ADDCCC R3, R4, R5 // 7ca41815 + ADDME R3, R4 // 7c8301d4 + ADDMECC R3, R4 // 7c8301d5 + ADDMEV R3, R4 // 7c8305d4 + ADDMEVCC R3, R4 // 7c8305d5 + ADDCV R3, R4 // 7c841c14 + ADDCVCC R3, R4 // 7c841c15 + ADDZE R3, R4 // 7c830194 + ADDZECC R3, R4 // 7c830195 + ADDZEV R3, R4 // 7c830594 + ADDZEVCC R3, R4 // 7c830595 + SUBME R3, R4 // 7c8301d0 + SUBMECC R3, R4 // 7c8301d1 + SUBMEV R3, R4 // 7c8305d0 + SUBZE R3, R4 // 7c830190 + SUBZECC R3, R4 // 7c830191 + SUBZEV R3, R4 // 7c830590 + SUBZEVCC R3, R4 // 7c830591 + + AND R3, R4 // 7c841838 + AND R3, R4, R5 // 7c851838 + ANDN R3, R4, R5 // 7c851878 + ANDCC R3, R4, R5 // 7c851839 + OR R3, R4 // 7c841b78 + OR R3, R4, R5 // 7c851b78 + ORN R3, R4, R5 // 7c851b38 + ORCC R3, R4, R5 // 7c851b79 + XOR R3, R4 // 7c841a78 + XOR R3, R4, R5 // 7c851a78 + XORCC R3, R4, R5 // 7c851a79 + NAND R3, R4, R5 // 7c851bb8 + NANDCC R3, R4, R5 // 7c851bb9 + EQV R3, R4, R5 // 7c851a38 + EQVCC R3, R4, R5 // 7c851a39 + NOR R3, R4, R5 // 7c8518f8 + NORCC R3, R4, R5 // 7c8518f9 + + SUB R3, R4 // 7c832050 + SUB R3, R4, R5 // 7ca32050 + SUBC R3, R4 // 7c832010 + SUBC R3, R4, R5 // 7ca32010 + + MULLW R3, R4 // 7c8419d6 + MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 + + MULLWCC R3, R4, R5 // 7ca419d7 + MULHW R3, R4, R5 // 7ca41896 + + MULHWU R3, R4, R5 // 7ca41816 + MULLD R3, R4 // 7c8419d2 + MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 + + MULLDCC R3, R4, R5 // 7ca419d3 + MULHD R3, R4, R5 // 7ca41892 + MULHDCC R3, R4, R5 // 7ca41893 + + MULLWV R3, R4 // 7c841dd6 + MULLWV R3, R4, R5 // 7ca41dd6 + MULLWVCC R3, R4, R5 // 7ca41dd7 + MULHWUCC R3, R4, R5 // 7ca41817 + MULLDV R3, R4, R5 // 7ca41dd2 + MULLDVCC R3, R4, R5 // 7ca41dd3 + + DIVD R3,R4 // 7c841bd2 + DIVD R3, R4, R5 // 7ca41bd2 + DIVDCC R3,R4, R5 // 7ca41bd3 + DIVDU R3, R4, R5 // 7ca41b92 + DIVDV R3, R4, R5 // 7ca41fd2 + DIVDUCC R3, R4, R5 // 7ca41b93 + DIVDVCC R3, R4, R5 // 7ca41fd3 + DIVDUV R3, R4, R5 // 7ca41f92 + DIVDUVCC R3, R4, R5 // 7ca41f93 + DIVDE R3, R4, R5 // 7ca41b52 + DIVDECC R3, R4, R5 // 7ca41b53 + DIVDEU R3, R4, R5 // 7ca41b12 + DIVDEUCC R3, R4, R5 // 7ca41b13 + + REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 + REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 + REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 + REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 + + MADDHD R3,R4,R5,R6 // 10c32170 + MADDHDU R3,R4,R5,R6 // 10c32171 + + MODUD R3, R4, R5 // 7ca41a12 + MODUW R3, R4, R5 // 7ca41a16 + MODSD R3, R4, R5 // 7ca41e12 + MODSW R3, R4, R5 // 7ca41e16 + + SLW $8, R3, R4 // 5464402e + SLW R3, R4, R5 // 7c851830 + SLWCC R3, R4 // 7c841831 + SLD $16, R3, R4 // 786483e4 + SLD R3, R4, R5 // 7c851836 + SLDCC R3, R4 // 7c841837 + + SRW $8, R3, R4 // 5464c23e + SRW R3, R4, R5 // 7c851c30 + SRWCC R3, R4 // 7c841c31 + SRAW $8, R3, R4 // 7c644670 + SRAW R3, R4, R5 // 7c851e30 + SRAWCC R3, R4 // 7c841e31 + SRD $16, R3, R4 // 78648402 + SRD R3, R4, R5 // 7c851c36 + SRDCC R3, R4 // 7c841c37 + SRAD $16, R3, R4 // 7c648674 + SRAD R3, R4, R5 // 7c851e34 + SRDCC R3, R4 // 7c841c37 + ROTLW $16, R3, R4 // 5464803e + ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 + RLWMI $7, R3, $65535, R6 // 50663c3e + RLWMICC $7, R3, $65535, R6 // 50663c3f + RLWNM $3, R4, $7, R6 // 54861f7e + RLWNMCC $3, R4, $7, R6 // 54861f7f + RLDMI $0, R4, $7, R6 // 7886076c + RLDMICC $0, R4, $7, R6 // 7886076d + RLDIMI $0, R4, $7, R6 // 788601cc + RLDIMICC $0, R4, $7, R6 // 788601cd + RLDC $0, R4, $15, R6 // 78860728 + RLDCCC $0, R4, $15, R6 // 78860729 + RLDCL $0, R4, $7, R6 // 78860770 + RLDCLCC $0, R4, $15, R6 // 78860721 + RLDCR $0, R4, $-16, R6 // 788606f2 + RLDCRCC $0, R4, $-16, R6 // 788606f3 + RLDICL $0, R4, $15, R6 // 788603c0 + RLDICLCC $0, R4, $15, R6 // 788603c1 + RLDICR $0, R4, $15, R6 // 788603c4 + RLDICRCC $0, R4, $15, R6 // 788603c5 + RLDIC $0, R4, $15, R6 // 788603c8 + RLDICCC $0, R4, $15, R6 // 788603c9 + CLRLSLWI $16, R5, $8, R4 // 54a4422e + CLRLSLDI $24, R4, $2, R3 // 78831588 + + BEQ 0(PC) // 41820000 + BEQ CR1,0(PC) // 41860000 + BGE 0(PC) // 40800000 + BGE CR2,0(PC) // 40880000 + BGT 4(PC) // 41810010 + BGT CR3,4(PC) // 418d0010 + BLE 0(PC) // 40810000 + BLE CR4,0(PC) // 40910000 + BLT 0(PC) // 41800000 + BLT CR5,0(PC) // 41940000 + BNE 0(PC) // 40820000 + BLT CR6,0(PC) // 41980000 + JMP 8(PC) // 48000010 -//inst: -// -// load ints and bytes -// -// LMOVW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW foo<>+4(SB), R2 - MOVW 16(R1), R2 - -// LMOVW regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW (R1), R2 - MOVW (R1+R2), R3 // MOVW (R1)(R2*1), R3 - -// LMOVB rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVB addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB foo<>+3(SB), R2 - MOVB 16(R1), R2 - -// LMOVB regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB (R1), R2 - MOVB (R1+R2), R3 // MOVB (R1)(R2*1), R3 - -// -// load floats -// -// LFMOV addr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD foo<>+4(SB), F2 - FMOVD 16(R1), F2 - -// LFMOV regaddr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD (R1), F2 - -// LFMOV fimm ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD $0.1, F2 // FMOVD $(0.10000000000000001), F2 - -// LFMOV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, F2 - -// LFMOV freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, foo<>+4(SB) - FMOVD F2, 16(R1) - -// LFMOV freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, (R1) - -// -// store ints and bytes -// -// LMOVW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, foo<>+3(SB) - MOVW R1, 16(R2) - -// LMOVW rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, (R1) - MOVW R1, (R2+R3) // MOVW R1, (R2)(R3*1) - -// LMOVB rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, foo<>+3(SB) - MOVB R1, 16(R2) - -// LMOVB rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, (R1) - MOVB R1, (R2+R3) // MOVB R1, (R2)(R3*1) -// -// store floats -// -// LMOVW freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, foo<>+4(SB) - FMOVD F1, 16(R2) - -// LMOVW freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, (R1) - -// -// floating point status -// -// LMOVW fpscr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, F1 - -// LMOVW freg ',' fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL F1, FPSCR - -// LMOVW freg ',' imm ',' fpscr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - MOVFL F1, $4, FPSCR - -// LMOVW fpscr ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, CR0 - -// LMTFSB imm ',' con -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -//TODO 9a doesn't work MTFSB0 $4, 4 - -// -// field moves (mtcrf) -// -// LMOVW rreg ',' imm ',' lcr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -// TODO 9a doesn't work MOVFL R1,$4,CR - -// LMOVW rreg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR1 - -// LMOVW rreg ',' lcr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR - -// -// integer operations -// logical instructions -// shift instructions -// unary instructions -// -// LADDW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD R1, R2, R3 - -// LADDW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD $1, R2, R3 - -// LADDW rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -//TODO 9a trouble ADD R1, $2, R3 maybe swap rreg and imm - -// LADDW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD R1, R2 - -// LADDW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD $4, R1 - -// LLOGW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADDE R1, R2, R3 - -// LLOGW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADDE R1, R2 - -// LSHW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW R1, R2, R3 - -// LSHW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW R1, R2 - -// LSHW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW $4, R1, R2 - -// LSHW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg -// { -// outcode(int($1), &$2, 0, &$2); -// } - SUBME R1 // SUBME R1, R1 - -// -// multiply-accumulate -// -// LMA rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO this instruction is undefined in lex.go LMA R1, R2, R3 NOT SUPPORTED (called MAC) - -// -// move immediate: macro for cau+or, addi, addis, and other combinations -// -// LMOVW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - -// LMOVW ximm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - MOVW $foo(SB), R1 - -// condition register operations -// -// LCROP cbit ',' cbit -// { -// outcode(int($1), &$2, int($4.Reg), &$4); -// } -//TODO 9a trouble CREQV 1, 2 delete? liblink encodes like a divide (maybe wrong too) - -// LCROP cbit ',' con ',' cbit -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO 9a trouble CREQV 1, 2, 3 - -// -// condition register moves -// move from machine state register -// -// LMOVW creg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL CR0, CR1 - -// LMOVW psr ',' creg // TODO: should psr should be fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW FPSCR, CR1 - -// LMOVW lcr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW CR, R1 - -// LMOVW psr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW SPR(0), R1 - MOVW SPR(7), R1 - -// LMOVW xlreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW LR, R1 - MOVW CTR, R1 - -// LMOVW rreg ',' xlreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, LR - MOVW R1, CTR - -// LMOVW creg ',' psr // TODO doesn't exist -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW CR1, SPR(7) - -// LMOVW rreg ',' psr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, SPR(7) - -// -// branch, branch conditional -// branch conditional register -// branch conditional to count register -// -// LBRA rel -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) -label0: - BR 1(PC) // JMP 1(PC) - BEQ CR1, 2(PC) - BR label0+0 // JMP 62 - -// LBRA addr -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) - BR LR // JMP LR - BEQ CR1, 2(PC) -// BR 0(R1) // TODO should work - BEQ CR1, 2(PC) - BR foo+0(SB) // JMP foo(SB) - -// LBRA '(' xlreg ')' -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - BEQ CR1, 2(PC) - BR (CTR) // JMP CTR - -// LBRA ',' rel // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' addr // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' '(' xlreg ')' // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$4); -// } -// LBRA creg ',' rel -// { -// outcode(int($1), &$2, 0, &$4); -// } -label1: - BEQ CR1, 1(PC) - BEQ CR1, label1 // BEQ CR1, 72 - -// LBRA creg ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$4); -// } - -// LBRA creg ',' '(' xlreg ')' // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$5); -// } - -// LBRA con ',' rel // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' '(' xlreg ')' -// { -// outcode(int($1), &nullgen, int($2), &$5); -// } -// BC 4, (CTR) // TODO - should work - -// LBRA con ',' con ',' rel -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -// BC 3, 4, label1 // TODO - should work - -// LBRA con ',' con ',' addr // TODO mystery -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -//TODO 9a trouble BC 3, 3, 4(R1) - -// LBRA con ',' con ',' '(' xlreg ')' -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$7); -// } - BC 3, 3, (LR) // BC $3, R3, LR - -// -// conditional trap // TODO NOT DEFINED -// TODO these instructions are not in lex.go -// -// LTRAP rreg ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP imm ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP rreg comma -// { -// outcode(int($1), &$2, 0, &nullgen); -// } -// LTRAP comma -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - -// -// floating point operate -// -// LFCONV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FABS F1, F2 - -// LFADD freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FADD F1, F2 - -// LFADD freg ',' freg ',' freg -// { -// outcode(int($1), &$2, int($4.Reg), &$6); -// } - FADD F1, F2, F3 - -// LFMA freg ',' freg ',' freg ',' freg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - FMADD F1, F2, F3, F4 - -// LFCMP freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FCMPU F1, F2 - -// LFCMP freg ',' freg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } -// FCMPU F1, F2, CR0 - -// FTDIV FRA, FRB, BF produces -// ftdiv BF, FRA, FRB - FTDIV F1,F2,$7 - -// FTSQRT FRB, BF produces -// ftsqrt BF, FRB - FTSQRT F2,$7 - -// FCFID -// FCFIDS - - FCFID F2,F3 - FCFIDCC F3,F3 - FCFIDS F2,F3 - FCFIDSCC F2,F3 - -// -// CMP -// -// LCMP rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, R2 - -// LCMP rreg ',' imm -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, $4 - -// LCMP rreg ',' rreg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, R2, CR0 // CMP R1, CR0, R2 - -// LCMP rreg ',' imm ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, $4, CR0 // CMP R1, CR0, $4 - -// CMPB RS,RB,RA produces -// cmpb RA,RS,RB - CMPB R2,R2,R1 - -// CMPEQB RA,RB,BF produces -// cmpeqb BF,RA,RB - CMPEQB R1, R2, CR0 - -// -// rotate extended mnemonics map onto other shift instructions -// - - ROTL $12,R2,R3 - ROTL R2,R3,R4 - ROTLW $9,R2,R3 - ROTLW R2,R3,R4 - -// -// rotate and mask -// -// LRLWM imm ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $4, R1, $16, R2 - -// LRLWM imm ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $26, R1, 4, 5, R2 // RLDC $26, R1, $201326592, R2 - -// LRLWM rreg ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDCL R1, R2, $7, R3 - -// LRLWM rreg ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLWMI R1, R2, 4, 5, R3 // RLWMI R1, R2, $201326592, R3 - - -// opcodes added with constant shift counts, not masks - - RLDICR $3, R2, $24, R4 - - RLDICL $1, R2, $61, R6 - - RLDIMI $7, R2, $52, R7 - -// opcodes for right and left shifts, const and reg shift counts - - SLD $4, R3, R4 - SLD R2, R3, R4 - SLW $4, R3, R4 - SLW R2, R3, R4 - SRD $8, R3, R4 - SRD R2, R3, R4 - SRW $8, R3, R4 - SRW R2, R3, R4 - -// -// load/store multiple -// -// LMOVMW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW foo+0(SB), R2 // TODO TLS broke this! - MOVMW 4(R1), R2 - -// LMOVMW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW R1, foo+0(SB) // TODO TLS broke this! - MOVMW R1, 4(R2) - -// -// various indexed load/store -// indexed unary (eg, cache clear) -// -// LXLD regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - LSW (R1), R2 - LSW (R1+R2), R3 // LSW (R1)(R2*1), R3 - -// LXLD regaddr ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - LSW (R1), $1, R2 - LSW (R1+R2), $1, R3 // LSW (R1)(R2*1), $1, R3 - -// LXST rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - STSW R1, (R2) - STSW R1, (R2+R3) // STSW R1, (R2)(R3*1) - -// LXST rreg ',' imm ',' regaddr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - STSW R1, $1, (R2) - STSW R1, $1, (R2+R3) // STSW R1, $1, (R2)(R3*1) - -// LXMV regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR (R1), R2 - MOVHBR (R1+R2), R3 // MOVHBR (R1)(R2*1), R3 - -// LXMV rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR R1, (R2) - MOVHBR R1, (R2+R3) // MOVHBR R1, (R2)(R3*1) - -// LXOP regaddr -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - DCBF (R1) - DCBF (R1+R2) // DCBF (R1)(R2*1) - DCBF (R1), $1 - DCBF (R1)(R2*1), $1 - DCBT (R1), $1 - DCBT (R1)(R2*1), $1 - -// LDMX (RB)(RA*1),RT produces -// ldmx RT,RA,RB - LDMX (R2)(R1*1), R3 - -// Population count, X-form -// RS,RA produces -// RA,RS - POPCNTD R1,R2 - POPCNTW R1,R2 - POPCNTB R1,R2 - -// Copysign - FCPSGN F1,F2,F3 - -// Random number generator, X-form -// DARN L,RT produces -// darn RT,L - DARN $1, R1 - -// Copy/Paste facility -// RB,RA produces -// RA,RB - COPY R2,R1 - PASTECC R2,R1 - -// Modulo signed/unsigned double/word X-form -// RA,RB,RT produces -// RT,RA,RB - MODUD R3,R4,R5 - MODUW R3,R4,R5 - MODSD R3,R4,R5 - MODSW R3,R4,R5 - -// VMX instructions - -// Described as: -// , -// produces -// - -// Vector load, VX-form -// (RB)(RA*1),VRT produces -// VRT,RA,RB - LVEBX (R1)(R2*1), V0 - LVEHX (R3)(R4*1), V1 - LVEWX (R5)(R6*1), V2 - LVX (R7)(R8*1), V3 - LVXL (R9)(R10*1), V4 - LVSL (R11)(R12*1), V5 - LVSR (R14)(R15*1), V6 - -// Vector store, VX-form -// VRT,(RB)(RA*1) produces -// VRT,RA,RB - STVEBX V31, (R1)(R2*1) - STVEHX V30, (R2)(R3*1) - STVEWX V29, (R4)(R5*1) - STVX V28, (R6)(R7*1) - STVXL V27, (R9)(R9*1) - -// Vector AND, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VAND V10, V9, V8 - VANDC V15, V14, V13 - VNAND V19, V18, V17 - -// Vector OR, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VOR V26, V25, V24 - VORC V23, V22, V21 - VNOR V20, V19, V18 - VXOR V17, V16, V15 - VEQV V14, V13, V12 - -// Vector ADD, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VADDUBM V3, V2, V1 - VADDUHM V3, V2, V1 - VADDUWM V3, V2, V1 - VADDUDM V3, V2, V1 - VADDUQM V3, V2, V1 - VADDCUQ V3, V2, V1 - VADDCUW V3, V2, V1 - VADDUBS V3, V2, V1 - VADDUHS V3, V2, V1 - VADDUWS V3, V2, V1 - VADDSBS V3, V2, V1 - VADDSHS V3, V2, V1 - VADDSWS V3, V2, V1 - -// Vector ADD extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VADDEUQM V4, V3, V2, V1 - VADDECUQ V4, V3, V2, V1 - -// Vector multiply, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VMULESB V2, V3, V1 - VMULOSB V2, V3, V1 - VMULEUB V2, V3, V1 - VMULOUB V2, V3, V1 - VMULESH V2, V3, V1 - VMULOSH V2, V3, V1 - VMULEUH V2, V3, V1 - VMULOUH V2, V3, V1 - VMULESW V2, V3, V1 - VMULOSW V2, V3, V1 - VMULEUW V2, V3, V1 - VMULOUW V2, V3, V1 - VMULUWM V2, V3, V1 - -// Vector polynomial multiply-sum, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VPMSUMB V2, V3, V1 - VPMSUMH V2, V3, V1 - VPMSUMW V2, V3, V1 - VPMSUMD V2, V3, V1 - -// Vector multiply-sum, VA-form -// VRA, VRB, VRC, VRT produces -// VRT, VRA, VRB, VRC - VMSUMUDM V4, V3, V2, V1 - -// Vector SUB, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VSUBUBM V3, V2, V1 - VSUBUHM V3, V2, V1 - VSUBUWM V3, V2, V1 - VSUBUDM V3, V2, V1 - VSUBUQM V3, V2, V1 - VSUBCUQ V3, V2, V1 - VSUBCUW V3, V2, V1 - VSUBUBS V3, V2, V1 - VSUBUHS V3, V2, V1 - VSUBUWS V3, V2, V1 - VSUBSBS V3, V2, V1 - VSUBSHS V3, V2, V1 - VSUBSWS V3, V2, V1 - -// Vector SUB extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSUBEUQM V4, V3, V2, V1 - VSUBECUQ V4, V3, V2, V1 - -// Vector rotate, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VRLB V2, V1, V0 - VRLH V2, V1, V0 - VRLW V2, V1, V0 - VRLD V2, V1, V0 - -// Vector shift, VX-form -// VRA,VRB,VRT -// VRT,VRA,VRB - VSLB V2, V1, V0 - VSLH V2, V1, V0 - VSLW V2, V1, V0 - VSL V2, V1, V0 - VSLO V2, V1, V0 - VSRB V2, V1, V0 - VSRH V2, V1, V0 - VSRW V2, V1, V0 - VSR V2, V1, V0 - VSRO V2, V1, V0 - VSLD V2, V1, V0 - VSRD V2, V1, V0 - VSRAB V2, V1, V0 - VSRAH V2, V1, V0 - VSRAW V2, V1, V0 - VSRAD V2, V1, V0 - -// Vector shift by octect immediate, VA-form with SHB 4-bit field -// SHB,VRA,VRB,VRT produces -// VRT,VRA,VRB,SHB - VSLDOI $4, V2, V1, V0 - -// Vector merge odd and even word -// VRA,VRB,VRT produces -// VRT,VRA,VRB - - VMRGOW V4,V5,V6 - VMRGEW V4,V5,V6 - -// Vector count, VX-form -// VRB,VRT produces -// VRT,VRB - VCLZB V4, V5 - VCLZH V4, V5 - VCLZW V4, V5 - VCLZD V4, V5 - VPOPCNTB V4, V5 - VPOPCNTH V4, V5 - VPOPCNTW V4, V5 - VPOPCNTD V4, V5 - -// Vector compare, VC-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB -// * Note: 'CC' suffix denotes Rc=1 -// i.e. vcmpequb. v3,v1,v2 equals VCMPEQUBCC V1,V2,V3 - VCMPEQUB V3, V2, V1 - VCMPEQUBCC V3, V2, V1 - VCMPEQUH V3, V2, V1 - VCMPEQUHCC V3, V2, V1 - VCMPEQUW V3, V2, V1 - VCMPEQUWCC V3, V2, V1 - VCMPEQUD V3, V2, V1 - VCMPEQUDCC V3, V2, V1 - VCMPGTUB V3, V2, V1 - VCMPGTUBCC V3, V2, V1 - VCMPGTUH V3, V2, V1 - VCMPGTUHCC V3, V2, V1 - VCMPGTUW V3, V2, V1 - VCMPGTUWCC V3, V2, V1 - VCMPGTUD V3, V2, V1 - VCMPGTUDCC V3, V2, V1 - VCMPGTSB V3, V2, V1 - VCMPGTSBCC V3, V2, V1 - VCMPGTSH V3, V2, V1 - VCMPGTSHCC V3, V2, V1 - VCMPGTSW V3, V2, V1 - VCMPGTSWCC V3, V2, V1 - VCMPGTSD V3, V2, V1 - VCMPGTSDCC V3, V2, V1 - VCMPNEZB V3, V2, V1 - VCMPNEZBCC V3, V2, V1 - VCMPNEB V3, V2, V1 - VCMPNEBCC V3, V2, V1 - VCMPNEH V3, V2, V1 - VCMPNEHCC V3, V2, V1 - VCMPNEW V3, V2, V1 - VCMPNEWCC V3, V2, V1 - -// Vector permute, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VPERM V3, V2, V1, V0 - VPERMXOR V3, V2, V1, V0 - VPERMR V3, V2, V1, V0 - -// Vector bit permute, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VBPERMQ V3,V1,V2 - VBPERMD V3,V1,V2 - -// Vector select, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSEL V3, V2, V1, V0 - -// Vector splat, VX-form with 4-bit UIM field -// UIM,VRB,VRT produces -// VRT,VRB,UIM - VSPLTB $15, V1, V0 - VSPLTH $7, V1, V0 - VSPLTW $3, V1, V0 - -// Vector splat immediate signed, VX-form with 5-bit SIM field -// SIM,VRT produces -// VRT,SIM - VSPLTISB $31, V4 - VSPLTISH $31, V4 - VSPLTISW $31, V4 - -// Vector AES cipher, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VCIPHER V3, V2, V1 - VCIPHERLAST V3, V2, V1 - VNCIPHER V3, V2, V1 - VNCIPHERLAST V3, V2, V1 - -// Vector AES subbytes, VX-form -// VRA,VRT produces -// VRT,VRA - VSBOX V2, V1 - -// Vector SHA, VX-form with ST bit field and 4-bit SIX field -// SIX,VRA,ST,VRT produces -// VRT,VRA,ST,SIX - VSHASIGMAW $15, V1, $1, V0 - VSHASIGMAD $15, V1, $1, V0 - -// VSX instructions -// Described as: -// , -// produces -// - -// VSX load, XX1-form -// (RB)(RA*1),XT produces -// XT,RA,RB - LXVD2X (R1)(R2*1), VS0 - LXVW4X (R1)(R2*1), VS0 - LXVH8X (R1)(R2*1), VS0 - LXVB16X (R1)(R2*1), VS0 - LXVDSX (R1)(R2*1), VS0 - LXSDX (R1)(R2*1), VS0 - LXSIWAX (R1)(R2*1), VS0 - LXSIWZX (R1)(R2*1), VS0 - -// VSX load with length X-form (also left-justified) - LXVL R3,R4, VS0 - LXVLL R3,R4, VS0 - LXVX R3,R4, VS0 -// VSX load, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - LXV 32752(R1), VS0 - -// VSX store, XX1-form -// XS,(RB)(RA*1) produces -// XS,RA,RB - STXVD2X VS63, (R1)(R2*1) - STXVW4X VS63, (R1)(R2*1) - STXVH8X VS63, (R1)(R2*1) - STXVB16X VS63, (R1)(R2*1) - STXSDX VS63, (R1)(R2*1) - STXSIWX VS63, (R1)(R2*1) - -// VSX store, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - STXV VS63, -32752(R1) - -// VSX store with length, X-form (also left-justified) - STXVL VS0, R3,R4 - STXVLL VS0, R3,R4 - STXVX VS0, R3,R4 - -// VSX move from VSR, XX1-form -// XS,RA produces -// RA,XS -// Extended mnemonics accept VMX and FP registers as sources - MFVSRD VS0, R1 - MFVSRWZ VS33, R1 - MFVSRLD VS63, R1 - MFVRD V0, R1 - MFFPRD F0, R1 - -// VSX move to VSR, XX1-form -// RA,XT produces -// XT,RA -// Extended mnemonics accept VMX and FP registers as targets - MTVSRD R1, VS0 - MTVSRWA R1, VS31 - MTVSRWZ R1, VS63 - MTVSRDD R1, R2, VS0 - MTVSRWS R1, VS32 - MTVRD R1, V13 - MTFPRD R1, F24 - -// VSX AND, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLAND VS0,VS1,VS32 - XXLANDC VS0,VS1,VS32 - XXLEQV VS0,VS1,VS32 - XXLNAND VS0,VS1,VS32 - -// VSX OR, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLORC VS0,VS1,VS32 - XXLNOR VS0,VS1,VS32 - XXLORQ VS0,VS1,VS32 - XXLXOR VS0,VS1,VS32 - XXLOR VS0,VS1,VS32 - -// VSX select, XX4-form -// XA,XB,XC,XT produces -// XT,XA,XB,XC - XXSEL VS0,VS1,VS3,VS32 - -// VSX merge, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXMRGHW VS0,VS1,VS32 - XXMRGLW VS0,VS1,VS32 - -// VSX splat, XX2-form -// XB,UIM,XT produces -// XT,XB,UIM - XXSPLTW VS0,$3,VS32 - XXSPLTIB $26,VS0 - -// VSX permute, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXPERM VS0,VS1,VS32 - -// VSX permute, XX3-form -// XA,XB,DM,XT produces -// XT,XA,XB,DM - XXPERMDI VS0,VS1,$3,VS32 - -// VSX shift, XX3-form -// XA,XB,SHW,XT produces -// XT,XA,XB,SHW - XXSLDWI VS0,VS1,$3,VS32 - -// VSX byte-reverse XX2-form -// XB,XT produces -// XT,XB - XXBRQ VS0,VS1 - XXBRD VS0,VS1 - XXBRW VS0,VS1 - XXBRH VS0,VS1 - -// VSX scalar FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSP VS0,VS32 - XSCVSPDP VS0,VS32 - XSCVDPSPN VS0,VS32 - XSCVSPDPN VS0,VS32 - -// VSX vector FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSP VS0,VS32 - XVCVSPDP VS0,VS32 - -// VSX scalar FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSXDS VS0,VS32 - XSCVDPSXWS VS0,VS32 - XSCVDPUXDS VS0,VS32 - XSCVDPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVSXDDP VS0,VS32 - XSCVUXDDP VS0,VS32 - XSCVSXDSP VS0,VS32 - XSCVUXDSP VS0,VS32 - -// VSX vector FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSXDS VS0,VS32 - XVCVDPSXWS VS0,VS32 - XVCVDPUXDS VS0,VS32 - XVCVDPUXWS VS0,VS32 - XVCVSPSXDS VS0,VS32 - XVCVSPSXWS VS0,VS32 - XVCVSPUXDS VS0,VS32 - XVCVSPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVSXDDP VS0,VS32 - XVCVSXWDP VS0,VS32 - XVCVUXDDP VS0,VS32 - XVCVUXWDP VS0,VS32 - XVCVSXDSP VS0,VS32 - XVCVSXWSP VS0,VS32 - XVCVUXDSP VS0,VS32 - XVCVUXWSP VS0,VS32 - -// Multiply-Add High Doubleword -// RA,RB,RC,RT produces -// RT,RA,RB,RC - MADDHD R1,R2,R3,R4 - MADDHDU R1,R2,R3,R4 - -// Add Extended using alternate carry bit -// ADDEX RA,RB,CY,RT produces -// addex RT, RA, RB, CY - ADDEX R1, R2, $0, R3 - -// Immediate-shifted operations -// ADDIS SI, RA, RT produces -// addis RT, RA, SI - ADDIS $8, R3, R4 - ADDIS $-1, R3, R4 - -// ANDISCC UI, RS, RA produces -// andis. RA, RS, UI - ANDISCC $7, R4, R5 - -// ORIS UI, RS, RA produces -// oris RA, RS, UI - ORIS $4, R2, R3 - -// XORIS UI, RS, RA produces -// xoris RA, RS, UI - XORIS $1, R1, R2 - -// -// NOP -// -// LNOP comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } NOP - -// LNOP rreg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } NOP R2 + NOP F2 + NOP $4 + + CRAND CR1, CR2, CR3 // 4c620a02 + CRANDN CR1, CR2, CR3 // 4c620902 + CREQV CR1, CR2, CR3 // 4c620a42 + CRNAND CR1, CR2, CR3 // 4c6209c2 + CRNOR CR1, CR2, CR3 // 4c620842 + CROR CR1, CR2, CR3 // 4c620b82 + CRORN CR1, CR2, CR3 // 4c620b42 + CRXOR CR1, CR2, CR3 // 4c620982 + + ISEL $1, R3, R4, R5 // 7ca3205e + ISEL $0, R3, R4, R5 // 7ca3201e + ISEL $2, R3, R4, R5 // 7ca3209e + ISEL $3, R3, R4, R5 // 7ca320de + ISEL $4, R3, R4, R5 // 7ca3211e + POPCNTB R3, R4 // 7c6400f4 + POPCNTW R3, R4 // 7c6402f4 + POPCNTD R3, R4 // 7c6403f4 + + PASTECC R3, R4 // 7c23270d + COPY R3, R4 // 7c23260c + + // load-and-reserve + LBAR (R4)(R3*1),$1,R5 // 7ca32069 + LBAR (R4),$0,R5 // 7ca02068 + LBAR (R3),R5 // 7ca01868 + LHAR (R4)(R3*1),$1,R5 // 7ca320e9 + LHAR (R4),$0,R5 // 7ca020e8 + LHAR (R3),R5 // 7ca018e8 + LWAR (R4)(R3*1),$1,R5 // 7ca32029 + LWAR (R4),$0,R5 // 7ca02028 + LWAR (R3),R5 // 7ca01828 + LDAR (R4)(R3*1),$1,R5 // 7ca320a9 + LDAR (R4),$0,R5 // 7ca020a8 + LDAR (R3),R5 // 7ca018a8 + + STBCCC R3, (R4)(R5) // 7c65256d + STWCCC R3, (R4)(R5) // 7c65212d + STDCCC R3, (R4)(R5) // 7c6521ad + STHCCC R3, (R4)(R5) + STSW R3, (R4)(R5) + + SYNC // 7c0004ac + ISYNC // 4c00012c + LWSYNC // 7c2004ac + + DARN $1, R5 // 7ca105e6 + + DCBF (R3)(R4) // 7c0418ac + DCBI (R3)(R4) // 7c041bac + DCBST (R3)(R4) // 7c04186c + DCBZ (R3)(R4) // 7c041fec + DCBT (R3)(R4) // 7c041a2c + ICBI (R3)(R4) // 7c041fac + + // float constants + FMOVD $(0.0), F1 // f0210cd0 + FMOVD $(-0.0), F1 // f0210cd0fc200850 + + FMOVD 8(R3), F1 // c8230008 + FMOVD (R3)(R4), F1 // 7c241cae + FMOVDU 8(R3), F1 // cc230008 + FMOVDU (R3)(R4), F1 // 7c241cee + FMOVS 4(R3), F1 // c0230004 + FMOVS (R3)(R4), F1 // 7c241c2e + FMOVSU 4(R3), F1 // c4230004 + FMOVSU (R3)(R4), F1 // 7c241c6e + + FMOVD F1, 8(R3) // d8230008 + FMOVD F1, (R3)(R4) // 7c241dae + FMOVDU F1, 8(R3) // dc230008 + FMOVDU F1, (R3)(R4) // 7c241dee + FMOVS F1, 4(R3) // d0230004 + FMOVS F1, (R3)(R4) // 7c241d2e + FMOVSU F1, 4(R3) // d4230004 + FMOVSU F1, (R3)(R4) // 7c241d6e + FADD F1, F2 // fc42082a + FADD F1, F2, F3 // fc62082a + FADDCC F1, F2, F3 // fc62082b + FADDS F1, F2 // ec42082a + FADDS F1, F2, F3 // ec62082a + FADDSCC F1, F2, F3 // ec62082b + FSUB F1, F2 // fc420828 + FSUB F1, F2, F3 // fc620828 + FSUBCC F1, F2, F3 // fc620829 + FSUBS F1, F2 // ec420828 + FSUBS F1, F2, F3 // ec620828 + FSUBCC F1, F2, F3 // fc620829 + FMUL F1, F2 // fc420072 + FMUL F1, F2, F3 // fc620072 + FMULCC F1, F2, F3 // fc620073 + FMULS F1, F2 // ec420072 + FMULS F1, F2, F3 // ec620072 + FMULSCC F1, F2, F3 // ec620073 + FDIV F1, F2 // fc420824 + FDIV F1, F2, F3 // fc620824 + FDIVCC F1, F2, F3 // fc620825 + FDIVS F1, F2 // ec420824 + FDIVS F1, F2, F3 // ec620824 + FDIVSCC F1, F2, F3 // ec620825 + FMADD F1, F2, F3, F4 // fc8110fa + FMADDCC F1, F2, F3, F4 // fc8110fb + FMADDS F1, F2, F3, F4 // ec8110fa + FMADDSCC F1, F2, F3, F4 // ec8110fb + FMSUB F1, F2, F3, F4 // fc8110f8 + FMSUBCC F1, F2, F3, F4 // fc8110f9 + FMSUBS F1, F2, F3, F4 // ec8110f8 + FMSUBSCC F1, F2, F3, F4 // ec8110f9 + FNMADD F1, F2, F3, F4 // fc8110fe + FNMADDCC F1, F2, F3, F4 // fc8110ff + FNMADDS F1, F2, F3, F4 // ec8110fe + FNMADDSCC F1, F2, F3, F4 // ec8110ff + FNMSUB F1, F2, F3, F4 // fc8110fc + FNMSUBCC F1, F2, F3, F4 // fc8110fd + FNMSUBS F1, F2, F3, F4 // ec8110fc + FNMSUBSCC F1, F2, F3, F4 // ec8110fd + FSEL F1, F2, F3, F4 // fc8110ee + FSELCC F1, F2, F3, F4 // fc8110ef + FABS F1, F2 // fc400a10 + FABSCC F1, F2 // fc400a11 + FNEG F1, F2 // fc400850 + FABSCC F1, F2 // fc400a11 + FRSP F1, F2 // fc400818 + FRSPCC F1, F2 // fc400819 + FCTIW F1, F2 // fc40081c + FCTIWCC F1, F2 // fc40081d + FCTIWZ F1, F2 // fc40081e + FCTIWZCC F1, F2 // fc40081f + FCTID F1, F2 // fc400e5c + FCTIDCC F1, F2 // fc400e5d + FCTIDZ F1, F2 // fc400e5e + FCTIDZCC F1, F2 // fc400e5f + FCFID F1, F2 // fc400e9c + FCFIDCC F1, F2 // fc400e9d + FCFIDU F1, F2 // fc400f9c + FCFIDUCC F1, F2 // fc400f9d + FCFIDS F1, F2 // ec400e9c + FCFIDSCC F1, F2 // ec400e9d + FRES F1, F2 // ec400830 + FRESCC F1, F2 // ec400831 + FRIM F1, F2 // fc400bd0 + FRIMCC F1, F2 // fc400bd1 + FRIP F1, F2 // fc400b90 + FRIPCC F1, F2 // fc400b91 + FRIZ F1, F2 // fc400b50 + FRIZCC F1, F2 // fc400b51 + FRIN F1, F2 // fc400b10 + FRINCC F1, F2 // fc400b11 + FRSQRTE F1, F2 // fc400834 + FRSQRTECC F1, F2 // fc400835 + FSQRT F1, F2 // fc40082c + FSQRTCC F1, F2 // fc40082d + FSQRTS F1, F2 // ec40082c + FSQRTSCC F1, F2 // ec40082d + FCPSGN F1, F2 // fc420810 + FCPSGNCC F1, F2 // fc420811 + FCMPO F1, F2 // fc011040 + FCMPU F1, F2 // fc011000 + LVX (R3)(R4), V1 // 7c2418ce + LVXL (R3)(R4), V1 // 7c241ace + LVSL (R3)(R4), V1 // 7c24180c + LVSR (R3)(R4), V1 // 7c24184c + LVEBX (R3)(R4), V1 // 7c24180e + LVEHX (R3)(R4), V1 // 7c24184e + LVEWX (R3)(R4), V1 // 7c24188e + STVX V1, (R3)(R4) // 7c2419ce + STVXL V1, (R3)(R4) // 7c241bce + STVEBX V1, (R3)(R4) // 7c24190e + STVEHX V1, (R3)(R4) // 7c24194e + STVEWX V1, (R3)(R4) // 7c24198e + + VAND V1, V2, V3 // 10611404 + VANDC V1, V2, V3 // 10611444 + VNAND V1, V2, V3 // 10611584 + VOR V1, V2, V3 // 10611484 + VORC V1, V2, V3 // 10611544 + VXOR V1, V2, V3 // 106114c4 + VNOR V1, V2, V3 // 10611504 + VEQV V1, V2, V3 // 10611684 + VADDUBM V1, V2, V3 // 10611000 + VADDUHM V1, V2, V3 // 10611040 + VADDUWM V1, V2, V3 // 10611080 + VADDUDM V1, V2, V3 // 106110c0 + VADDUQM V1, V2, V3 // 10611100 + VADDCUQ V1, V2, V3 // 10611140 + VADDCUW V1, V2, V3 // 10611180 + VADDUBS V1, V2, V3 // 10611200 + VADDUHS V1, V2, V3 // 10611240 + VADDUWS V1, V2, V3 // 10611280 + VSUBUBM V1, V2, V3 // 10611400 + VSUBUHM V1, V2, V3 // 10611440 + VSUBUWM V1, V2, V3 // 10611480 + VSUBUDM V1, V2, V3 // 106114c0 + VSUBUQM V1, V2, V3 // 10611500 + VSUBCUQ V1, V2, V3 // 10611540 + VSUBCUW V1, V2, V3 // 10611580 + VSUBUBS V1, V2, V3 // 10611600 + VSUBUHS V1, V2, V3 // 10611640 + VSUBUWS V1, V2, V3 // 10611680 + VSUBSBS V1, V2, V3 // 10611700 + VSUBSHS V1, V2, V3 // 10611740 + VSUBSWS V1, V2, V3 // 10611780 + VSUBEUQM V1, V2, V3, V4 // 108110fe + VSUBECUQ V1, V2, V3, V4 // 108110ff + VMULESB V1, V2, V3 // 10611308 + VMULOSB V1, V2, V3 // 10611108 + VMULEUB V1, V2, V3 // 10611208 + VMULOUB V1, V2, V3 // 10611008 + VMULESH V1, V2, V3 // 10611348 + VMULOSH V1, V2, V3 // 10611148 + VMULEUH V1, V2, V3 // 10611248 + VMULOUH V1, V2, V3 // 10611048 + VMULESH V1, V2, V3 // 10611348 + VMULOSW V1, V2, V3 // 10611188 + VMULEUW V1, V2, V3 // 10611288 + VMULOUW V1, V2, V3 // 10611088 + VMULUWM V1, V2, V3 // 10611089 + VPMSUMB V1, V2, V3 // 10611408 + VPMSUMH V1, V2, V3 // 10611448 + VPMSUMW V1, V2, V3 // 10611488 + VPMSUMD V1, V2, V3 // 106114c8 + VMSUMUDM V1, V2, V3, V4 // 108110e3 + VRLB V1, V2, V3 // 10611004 + VRLH V1, V2, V3 // 10611044 + VRLW V1, V2, V3 // 10611084 + VRLD V1, V2, V3 // 106110c4 + VSLB V1, V2, V3 // 10611104 + VSLH V1, V2, V3 // 10611144 + VSLW V1, V2, V3 // 10611184 + VSL V1, V2, V3 // 106111c4 + VSLO V1, V2, V3 // 1061140c + VSRB V1, V2, V3 // 10611204 + VSRH V1, V2, V3 // 10611244 + VSRW V1, V2, V3 // 10611284 + VSR V1, V2, V3 // 106112c4 + VSRO V1, V2, V3 // 1061144c + VSLD V1, V2, V3 // 106115c4 + VSRAB V1, V2, V3 // 10611304 + VSRAH V1, V2, V3 // 10611344 + VSRAW V1, V2, V3 // 10611384 + VSRAD V1, V2, V3 // 106113c4 + VSLDOI $3, V1, V2, V3 // 106110ec + VCLZB V1, V2 // 10400f02 + VCLZH V1, V2 // 10400f42 + VCLZW V1, V2 // 10400f82 + VCLZD V1, V2 // 10400fc2 + VPOPCNTB V1, V2 // 10400f03 + VPOPCNTH V1, V2 // 10400f43 + VPOPCNTW V1, V2 // 10400f83 + VPOPCNTD V1, V2 // 10400fc3 + VCMPEQUB V1, V2, V3 // 10611006 + VCMPEQUBCC V1, V2, V3 // 10611406 + VCMPEQUH V1, V2, V3 // 10611046 + VCMPEQUHCC V1, V2, V3 // 10611446 + VCMPEQUW V1, V2, V3 // 10611086 + VCMPEQUWCC V1, V2, V3 // 10611486 + VCMPEQUD V1, V2, V3 // 106110c7 + VCMPEQUDCC V1, V2, V3 // 106114c7 + VCMPGTUB V1, V2, V3 // 10611206 + VCMPGTUBCC V1, V2, V3 // 10611606 + VCMPGTUH V1, V2, V3 // 10611246 + VCMPGTUHCC V1, V2, V3 // 10611646 + VCMPGTUW V1, V2, V3 // 10611286 + VCMPGTUWCC V1, V2, V3 // 10611686 + VCMPGTUD V1, V2, V3 // 106112c7 + VCMPGTUDCC V1, V2, V3 // 106116c7 + VCMPGTSB V1, V2, V3 // 10611306 + VCMPGTSBCC V1, V2, V3 // 10611706 + VCMPGTSH V1, V2, V3 // 10611346 + VCMPGTSHCC V1, V2, V3 // 10611746 + VCMPGTSW V1, V2, V3 // 10611386 + VCMPGTSWCC V1, V2, V3 // 10611786 + VCMPGTSD V1, V2, V3 // 106113c7 + VCMPGTSDCC V1, V2, V3 // 106117c7 + VCMPNEZB V1, V2, V3 // 10611107 + VCMPNEZBCC V1, V2, V3 // 10611507 + VCMPNEB V1, V2, V3 // 10611007 + VCMPNEBCC V1, V2, V3 // 10611407 + VCMPNEH V1, V2, V3 // 10611047 + VCMPNEHCC V1, V2, V3 // 10611447 + VCMPNEW V1, V2, V3 // 10611087 + VCMPNEWCC V1, V2, V3 // 10611487 + VPERM V1, V2, V3, V4 // 108110eb + VPERMR V1, V2, V3, V4 // 108110fb + VPERMXOR V1, V2, V3, V4 // 108110ed + VBPERMQ V1, V2, V3 // 1061154c + VBPERMD V1, V2, V3 // 106115cc + VSEL V1, V2, V3, V4 // 108110ea + VSPLTB $1, V1, V2 // 10410a0c + VSPLTH $1, V1, V2 // 10410a4c + VSPLTW $1, V1, V2 // 10410a8c + VSPLTISB $1, V1 // 1021030c + VSPLTISW $1, V1 // 1021038c + VSPLTISH $1, V1 // 1021034c + VCIPHER V1, V2, V3 // 10611508 + VCIPHERLAST V1, V2, V3 // 10611509 + VNCIPHER V1, V2, V3 // 10611548 + VNCIPHERLAST V1, V2, V3 // 10611549 + VSBOX V1, V2 // 104105c8 + VSHASIGMAW $1, V1, $15, V2 // 10418e82 + VSHASIGMAD $2, V1, $15, V2 // 104196c2 + + LXVD2X (R3)(R4), VS1 // 7c241e98 + LXVDSX (R3)(R4), VS1 // 7c241a98 + LXVH8X (R3)(R4), VS1 // 7c241e58 + LXVB16X (R3)(R4), VS1 // 7c241ed8 + LXVW4X (R3)(R4), VS1 // 7c241e18 + LXV 16(R3), VS1 // f4230011 + LXVL R3, R4, VS1 // 7c23221a + LXVLL R3, R4, VS1 // 7c23225a + LXVX R3, R4, VS1 // 7c232218 + LXSDX (R3)(R4), VS1 // 7c241c98 + STXVD2X VS1, (R3)(R4) // 7c241f98 + STXV VS1,16(R3) // f4230015 + STXVL VS1, R3, R4 // 7c23231a + STXVLL VS1, R3, R4 // 7c23235a + STXVX VS1, R3, R4 // 7c232318 + STXVB16X VS1, (R4)(R5) // 7c2527d8 + STXVH8X VS1, (R4)(R5) // 7c252758 + + STXSDX VS1, (R3)(R4) // 7c241d98 + LXSIWAX (R3)(R4), VS1 // 7c241898 + STXSIWX VS1, (R3)(R4) // 7c241918 + MFVSRD VS1, R3 // 7c230066 + MTFPRD R3, F0 // 7c030166 + MFVRD V0, R3 // 7c030067 + MFVSRLD VS63,R4 // 7fe40267 + MFVSRWZ VS33,R4 // 7c2400e7 + MTVSRD R3, VS1 // 7c230166 + MTVRD R3, V13 // 7da30167 + MTVSRWA R4, VS31 // 7fe401a6 + MTVSRWS R4, VS32 // 7c040327 + MTVSRWZ R4, VS63 // 7fe401e7 + XXBRD VS0, VS1 // f037076c + XXBRW VS1, VS2 // f04f0f6c + XXBRH VS2, VS3 // f067176c + XXLAND VS1, VS2, VS3 // f0611410 + XXLANDC VS1, VS2, VS3 // f0611450 + XXLEQV VS0, VS1, VS2 // f0400dd0 + XXLNAND VS0, VS1, VS2 // f0400d90 + XXLNOR VS0, VS1, VS32 // f0000d11 + XXLOR VS1, VS2, VS3 // f0611490 + XXLORC VS1, VS2, VS3 // f0611550 + XXLORQ VS1, VS2, VS3 // f0611490 + XXLXOR VS1, VS2, VS3 // f06114d0 + XXSEL VS1, VS2, VS3, VS4 // f08110f0 + XXMRGHW VS1, VS2, VS3 // f0611090 + XXMRGLW VS1, VS2, VS3 // f0611190 + XXSPLTW VS1, $1, VS2 // f0410a90 + XXPERM VS1, VS2, VS3 // f06110d0 + XXSLDWI VS1, VS2, $1, VS3 // f0611110 + XSCVDPSP VS1, VS2 // f0400c24 + XVCVDPSP VS1, VS2 // f0400e24 + XSCVSXDDP VS1, VS2 // f0400de0 + XVCVDPSXDS VS1, VS2 // f0400f60 + XVCVSXDDP VS1, VS2 // f0400fe0 + XSCVDPSPN VS1,VS32 // f0000c2d + XSCVDPSP VS1,VS32 // f0000c25 + XSCVDPSXDS VS1,VS32 // f0000d61 + XSCVDPSXWS VS1,VS32 // f0000961 + XSCVDPUXDS VS1,VS32 // f0000d21 + XSCVDPUXWS VS1,VS32 // f0000921 + XSCVSPDPN VS1,VS32 // f0000d2d + XSCVSPDP VS1,VS32 // f0000d25 + XSCVSXDDP VS1,VS32 // f0000de1 + XSCVSXDSP VS1,VS32 // f0000ce1 + XSCVUXDDP VS1,VS32 // f0000da1 + XSCVUXDSP VS1,VS32 // f0000ca1 + XVCVDPSP VS1,VS32 // f0000e25 + XVCVDPSXDS VS1,VS32 // f0000f61 + XVCVDPSXWS VS1,VS32 // f0000b61 + XVCVDPUXDS VS1,VS32 // f0000f21 + XVCVDPUXWS VS1,VS32 // f0000b21 + XVCVSPDP VS1,VS32 // f0000f25 + XVCVSPSXDS VS1,VS32 // f0000e61 + XVCVSPSXWS VS1,VS32 // f0000a61 + XVCVSPUXDS VS1,VS32 // f0000e21 + XVCVSPUXWS VS1,VS32 // f0000a21 + XVCVSXDDP VS1,VS32 // f0000fe1 + XVCVSXDSP VS1,VS32 // f0000ee1 + XVCVSXWDP VS1,VS32 // f0000be1 + XVCVSXWSP VS1,VS32 // f0000ae1 + XVCVUXDDP VS1,VS32 // f0000fa1 + XVCVUXDSP VS1,VS32 // f0000ea1 + XVCVUXWDP VS1,VS32 // f0000ba1 + XVCVUXWSP VS1,VS32 // f0000aa1 + + MOVD R3, LR // 7c6803a6 + MOVD R3, CTR // 7c6903a6 + MOVD R3, XER // 7c6103a6 + MOVD LR, R3 // 7c6802a6 + MOVD CTR, R3 // 7c6902a6 + MOVD XER, R3 // 7c6102a6 + MOVFL CR3, CR1 // 4c8c0000 -// LNOP freg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP F2 - -// LNOP ',' rreg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP R2 - -// LNOP ',' freg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP F2 - -// LNOP imm // SYSCALL $num: load $num to R0 before syscall and restore R0 to 0 afterwards. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP $4 - -// RET -// -// LRETRN comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - BEQ 2(PC) RET - -// More BR/BL cases, and canonical names JMP, CALL. - - BEQ 2(PC) - BR foo(SB) // JMP foo(SB) - BL foo(SB) // CALL foo(SB) - BEQ 2(PC) - JMP foo(SB) - CALL foo(SB) - RET foo(SB) - -// load-and-reserve -// L*AR (RB)(RA*1),EH,RT produces -// l*arx RT,RA,RB,EH -// -// Extended forms also accepted. Assumes RA=0, EH=0: -// L*AR (RB),RT -// L*AR (RB),EH,RT - LBAR (R4)(R3*1), $1, R5 - LBAR (R4), $0, R5 - LBAR (R3), R5 - LHAR (R4)(R3*1), $1, R5 - LHAR (R4), $0, R5 - LHAR (R3), R5 - LWAR (R4)(R3*1), $1, R5 - LWAR (R4), $0, R5 - LWAR (R3), R5 - LDAR (R4)(R3*1), $1, R5 - LDAR (R4), $0, R5 - LDAR (R3), R5 - -// END -// -// LEND comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - END diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s deleted file mode 100644 index c6d7b59aad..0000000000 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Initial set of opcode combinations based on -// improvements to processing of constant -// operands. - -// Full set will be added at a later date. - -#include "../../../../../runtime/textflag.h" - -TEXT asmtest(SB),DUPOK|NOSPLIT,$0 - // move constants - MOVD $1, R3 // 38600001 - MOVD $-1, R4 // 3880ffff - MOVD $65535, R5 // 6005ffff - MOVD $65536, R6 // 64060001 - MOVD $-32767, R5 // 38a08001 - MOVD $-32768, R6 // 38c08000 - MOVD $1234567, R5 // 6405001260a5d687 - MOVW $1, R3 // 38600001 - MOVW $-1, R4 // 3880ffff - MOVW $65535, R5 // 6005ffff - MOVW $65536, R6 // 64060001 - MOVW $-32767, R5 // 38a08001 - MOVW $-32768, R6 // 38c08000 - MOVW $1234567, R5 // 6405001260a5d687 - MOVD 8(R3), R4 // e8830008 - MOVD (R3)(R4), R5 // 7ca4182a - MOVW 4(R3), R4 // e8830006 - MOVW (R3)(R4), R5 // 7ca41aaa - MOVWZ 4(R3), R4 // 80830004 - MOVWZ (R3)(R4), R5 // 7ca4182e - MOVH 4(R3), R4 // a8830004 - MOVH (R3)(R4), R5 // 7ca41aae - MOVHZ 2(R3), R4 // a0830002 - MOVHZ (R3)(R4), R5 // 7ca41a2e - MOVB 1(R3), R4 // 888300017c840774 - MOVB (R3)(R4), R5 // 7ca418ae7ca50774 - MOVBZ 1(R3), R4 // 88830001 - MOVBZ (R3)(R4), R5 // 7ca418ae - MOVDBR (R3)(R4), R5 // 7ca41c28 - MOVWBR (R3)(R4), R5 // 7ca41c2c - MOVHBR (R3)(R4), R5 // 7ca41e2c - - MOVDU 8(R3), R4 // e8830009 - MOVDU (R3)(R4), R5 // 7ca4186a - MOVWU (R3)(R4), R5 // 7ca41aea - MOVWZU 4(R3), R4 // 84830004 - MOVWZU (R3)(R4), R5 // 7ca4186e - MOVHU 2(R3), R4 // ac830002 - MOVHU (R3)(R4), R5 // 7ca41aee - MOVHZU 2(R3), R4 // a4830002 - MOVHZU (R3)(R4), R5 // 7ca41a6e - MOVBU 1(R3), R4 // 8c8300017c840774 - MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 - MOVBZU 1(R3), R4 // 8c830001 - MOVBZU (R3)(R4), R5 // 7ca418ee - - MOVD R4, 8(R3) // f8830008 - MOVD R5, (R3)(R4) // 7ca4192a - MOVW R4, 4(R3) // 90830004 - MOVW R5, (R3)(R4) // 7ca4192e - MOVH R4, 2(R3) // b0830002 - MOVH R5, (R3)(R4) // 7ca41b2e - MOVB R4, 1(R3) // 98830001 - MOVB R5, (R3)(R4) // 7ca419ae - MOVDBR R5, (R3)(R4) // 7ca41d28 - MOVWBR R5, (R3)(R4) // 7ca41d2c - MOVHBR R5, (R3)(R4) // 7ca41f2c - - MOVDU R4, 8(R3) // f8830009 - MOVDU R5, (R3)(R4) // 7ca4196a - MOVWU R4, 4(R3) // 94830004 - MOVWU R5, (R3)(R4) // 7ca4196e - MOVHU R4, 2(R3) // b4830002 - MOVHU R5, (R3)(R4) // 7ca41b6e - MOVBU R4, 1(R3) // 9c830001 - MOVBU R5, (R3)(R4) // 7ca419ee - - ADD $1, R3 // 38630001 - ADD $1, R3, R4 // 38830001 - ADD $-1, R4 // 3884ffff - ADD $-1, R4, R5 // 38a4ffff - ADD $65535, R5 // 601fffff7cbf2a14 - ADD $65535, R5, R6 // 601fffff7cdf2a14 - ADD $65536, R6 // 3cc60001 - ADD $65536, R6, R7 // 3ce60001 - ADD $-32767, R5 // 38a58001 - ADD $-32767, R5, R4 // 38858001 - ADD $-32768, R6 // 38c68000 - ADD $-32768, R6, R5 // 38a68000 - ADD $1234567, R5 // 641f001263ffd6877cbf2a14 - ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 - ADDIS $8, R3 // 3c630008 - ADDIS $1000, R3, R4 // 3c8303e8 - - ANDCC $1, R3 // 70630001 - ANDCC $1, R3, R4 // 70640001 - ANDCC $-1, R4 // 3be0ffff7fe42039 - ANDCC $-1, R4, R5 // 3be0ffff7fe52039 - ANDCC $65535, R5 // 70a5ffff - ANDCC $65535, R5, R6 // 70a6ffff - ANDCC $65536, R6 // 74c60001 - ANDCC $65536, R6, R7 // 74c70001 - ANDCC $-32767, R5 // 3be080017fe52839 - ANDCC $-32767, R5, R4 // 3be080017fe42839 - ANDCC $-32768, R6 // 3be080007fe63039 - ANDCC $-32768, R5, R6 // 3be080007fe62839 - ANDCC $1234567, R5 // 641f001263ffd6877fe52839 - ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 - ANDISCC $1, R3 // 74630001 - ANDISCC $1000, R3, R4 // 746403e8 - - OR $1, R3 // 60630001 - OR $1, R3, R4 // 60640001 - OR $-1, R4 // 3be0ffff7fe42378 - OR $-1, R4, R5 // 3be0ffff7fe52378 - OR $65535, R5 // 60a5ffff - OR $65535, R5, R6 // 60a6ffff - OR $65536, R6 // 64c60001 - OR $65536, R6, R7 // 64c70001 - OR $-32767, R5 // 3be080017fe52b78 - OR $-32767, R5, R6 // 3be080017fe62b78 - OR $-32768, R6 // 3be080007fe63378 - OR $-32768, R6, R7 // 3be080007fe73378 - OR $1234567, R5 // 641f001263ffd6877fe52b78 - OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 - - XOR $1, R3 // 68630001 - XOR $1, R3, R4 // 68640001 - XOR $-1, R4 // 3be0ffff7fe42278 - XOR $-1, R4, R5 // 3be0ffff7fe52278 - XOR $65535, R5 // 68a5ffff - XOR $65535, R5, R6 // 68a6ffff - XOR $65536, R6 // 6cc60001 - XOR $65536, R6, R7 // 6cc70001 - XOR $-32767, R5 // 3be080017fe52a78 - XOR $-32767, R5, R6 // 3be080017fe62a78 - XOR $-32768, R6 // 3be080007fe63278 - XOR $-32768, R6, R7 // 3be080007fe73278 - XOR $1234567, R5 // 641f001263ffd6877fe52a78 - XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 - - // TODO: the order of CR operands don't match - CMP R3, R4 // 7c232000 - CMPU R3, R4 // 7c232040 - CMPW R3, R4 // 7c032000 - CMPWU R3, R4 // 7c032040 - - // TODO: constants for ADDC? - ADD R3, R4 // 7c841a14 - ADD R3, R4, R5 // 7ca41a14 - ADDC R3, R4 // 7c841814 - ADDC R3, R4, R5 // 7ca41814 - ADDE R3, R4 // 7c841914 - ADDECC R3, R4 // 7c841915 - ADDEV R3, R4 // 7c841d14 - ADDEVCC R3, R4 // 7c841d15 - ADDV R3, R4 // 7c841e14 - ADDVCC R3, R4 // 7c841e15 - ADDCCC R3, R4, R5 // 7ca41815 - ADDME R3, R4 // 7c8301d4 - ADDMECC R3, R4 // 7c8301d5 - ADDMEV R3, R4 // 7c8305d4 - ADDMEVCC R3, R4 // 7c8305d5 - ADDCV R3, R4 // 7c841c14 - ADDCVCC R3, R4 // 7c841c15 - ADDZE R3, R4 // 7c830194 - ADDZECC R3, R4 // 7c830195 - ADDZEV R3, R4 // 7c830594 - ADDZEVCC R3, R4 // 7c830595 - SUBME R3, R4 // 7c8301d0 - SUBMECC R3, R4 // 7c8301d1 - SUBMEV R3, R4 // 7c8305d0 - SUBZE R3, R4 // 7c830190 - SUBZECC R3, R4 // 7c830191 - SUBZEV R3, R4 // 7c830590 - SUBZEVCC R3, R4 // 7c830591 - - AND R3, R4 // 7c841838 - AND R3, R4, R5 // 7c851838 - ANDN R3, R4, R5 // 7c851878 - ANDCC R3, R4, R5 // 7c851839 - OR R3, R4 // 7c841b78 - OR R3, R4, R5 // 7c851b78 - ORN R3, R4, R5 // 7c851b38 - ORCC R3, R4, R5 // 7c851b79 - XOR R3, R4 // 7c841a78 - XOR R3, R4, R5 // 7c851a78 - XORCC R3, R4, R5 // 7c851a79 - NAND R3, R4, R5 // 7c851bb8 - NANDCC R3, R4, R5 // 7c851bb9 - EQV R3, R4, R5 // 7c851a38 - EQVCC R3, R4, R5 // 7c851a39 - NOR R3, R4, R5 // 7c8518f8 - NORCC R3, R4, R5 // 7c8518f9 - - SUB R3, R4 // 7c832050 - SUB R3, R4, R5 // 7ca32050 - SUBC R3, R4 // 7c832010 - SUBC R3, R4, R5 // 7ca32010 - - MULLW R3, R4 // 7c8419d6 - MULLW R3, R4, R5 // 7ca419d6 - MULLW $10, R3 // 1c63000a - MULLW $10000000, R3 // 641f009863ff96807c7f19d6 - MULLWCC R3, R4, R5 // 7ca419d7 - MULHW R3, R4, R5 // 7ca41896 - - MULHWU R3, R4, R5 // 7ca41816 - MULLD R3, R4 // 7c8419d2 - MULLD R4, R4, R5 // 7ca421d2 - MULLD $20, R4 // 1c840014 - MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 - MULLDCC R3, R4, R5 // 7ca419d3 - MULHD R3, R4, R5 // 7ca41892 - MULHDCC R3, R4, R5 // 7ca41893 - - MULLWV R3, R4 // 7c841dd6 - MULLWV R3, R4, R5 // 7ca41dd6 - MULLWVCC R3, R4, R5 // 7ca41dd7 - MULHWUCC R3, R4, R5 // 7ca41817 - MULLDV R3, R4, R5 // 7ca41dd2 - MULLDVCC R3, R4, R5 // 7ca41dd3 - - DIVD R3,R4 // 7c841bd2 - DIVD R3, R4, R5 // 7ca41bd2 - DIVDCC R3,R4, R5 // 7ca41bd3 - DIVDU R3, R4, R5 // 7ca41b92 - DIVDV R3, R4, R5 // 7ca41fd2 - DIVDUCC R3, R4, R5 // 7ca41b93 - DIVDVCC R3, R4, R5 // 7ca41fd3 - DIVDUV R3, R4, R5 // 7ca41f92 - DIVDUVCC R3, R4, R5 // 7ca41f93 - DIVDE R3, R4, R5 // 7ca41b52 - DIVDECC R3, R4, R5 // 7ca41b53 - DIVDEU R3, R4, R5 // 7ca41b12 - DIVDEUCC R3, R4, R5 // 7ca41b13 - - REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 - REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 - REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 - REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 - - MODUD R3, R4, R5 // 7ca41a12 - MODUW R3, R4, R5 // 7ca41a16 - MODSD R3, R4, R5 // 7ca41e12 - MODSW R3, R4, R5 // 7ca41e16 - - SLW $8, R3, R4 // 5464402e - SLW R3, R4, R5 // 7c851830 - SLWCC R3, R4 // 7c841831 - SLD $16, R3, R4 // 786483e4 - SLD R3, R4, R5 // 7c851836 - SLDCC R3, R4 // 7c841837 - - SRW $8, R3, R4 // 5464c23e - SRW R3, R4, R5 // 7c851c30 - SRWCC R3, R4 // 7c841c31 - SRAW $8, R3, R4 // 7c644670 - SRAW R3, R4, R5 // 7c851e30 - SRAWCC R3, R4 // 7c841e31 - SRD $16, R3, R4 // 78648402 - SRD R3, R4, R5 // 7c851c36 - SRDCC R3, R4 // 7c841c37 - SRAD $16, R3, R4 // 7c648674 - SRAD R3, R4, R5 // 7c851e34 - SRDCC R3, R4 // 7c841c37 - ROTLW $16, R3, R4 // 5464803e - ROTLW R3, R4, R5 // 5c85183e - EXTSWSLI $3, R4, R5 // 7c851ef4 - RLWMI $7, R3, $65535, R6 // 50663c3e - RLWMICC $7, R3, $65535, R6 // 50663c3f - RLWNM $3, R4, $7, R6 // 54861f7e - RLWNMCC $3, R4, $7, R6 // 54861f7f - RLDMI $0, R4, $7, R6 // 7886076c - RLDMICC $0, R4, $7, R6 // 7886076d - RLDIMI $0, R4, $7, R6 // 788601cc - RLDIMICC $0, R4, $7, R6 // 788601cd - RLDC $0, R4, $15, R6 // 78860728 - RLDCCC $0, R4, $15, R6 // 78860729 - RLDCL $0, R4, $7, R6 // 78860770 - RLDCLCC $0, R4, $15, R6 // 78860721 - RLDCR $0, R4, $-16, R6 // 788606f2 - RLDCRCC $0, R4, $-16, R6 // 788606f3 - RLDICL $0, R4, $15, R6 // 788603c0 - RLDICLCC $0, R4, $15, R6 // 788603c1 - RLDICR $0, R4, $15, R6 // 788603c4 - RLDICRCC $0, R4, $15, R6 // 788603c5 - RLDIC $0, R4, $15, R6 // 788603c8 - RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $8, R5, $6, R4 // 54a430b2 - CLRLSLDI $24, R4, $4, R3 // 78832508 - - BEQ 0(PC) // 41820000 - BGE 0(PC) // 40800000 - BGT 4(PC) // 41810030 - BLE 0(PC) // 40810000 - BLT 0(PC) // 41800000 - BNE 0(PC) // 40820000 - JMP 8(PC) // 48000020 - - CRAND CR1, CR2, CR3 // 4c620a02 - CRANDN CR1, CR2, CR3 // 4c620902 - CREQV CR1, CR2, CR3 // 4c620a42 - CRNAND CR1, CR2, CR3 // 4c6209c2 - CRNOR CR1, CR2, CR3 // 4c620842 - CROR CR1, CR2, CR3 // 4c620b82 - CRORN CR1, CR2, CR3 // 4c620b42 - CRXOR CR1, CR2, CR3 // 4c620982 - - ISEL $1, R3, R4, R5 // 7ca3205e - ISEL $0, R3, R4, R5 // 7ca3201e - ISEL $2, R3, R4, R5 // 7ca3209e - ISEL $3, R3, R4, R5 // 7ca320de - ISEL $4, R3, R4, R5 // 7ca3211e - POPCNTB R3, R4 // 7c6400f4 - POPCNTW R3, R4 // 7c6402f4 - POPCNTD R3, R4 // 7c6403f4 - - PASTECC R3, R4 // 7c23270d - COPY R3, R4 // 7c23260c - - // load-and-reserve - LBAR (R4)(R3*1),$1,R5 // 7ca32069 - LBAR (R4),$0,R5 // 7ca02068 - LBAR (R3),R5 // 7ca01868 - LHAR (R4)(R3*1),$1,R5 // 7ca320e9 - LHAR (R4),$0,R5 // 7ca020e8 - LHAR (R3),R5 // 7ca018e8 - LWAR (R4)(R3*1),$1,R5 // 7ca32029 - LWAR (R4),$0,R5 // 7ca02028 - LWAR (R3),R5 // 7ca01828 - LDAR (R4)(R3*1),$1,R5 // 7ca320a9 - LDAR (R4),$0,R5 // 7ca020a8 - LDAR (R3),R5 // 7ca018a8 - - STBCCC R3, (R4)(R5) // 7c65256d - STWCCC R3, (R4)(R5) // 7c65212d - STDCCC R3, (R4)(R5) // 7c6521ad - STHCCC R3, (R4)(R5) - - SYNC // 7c0004ac - ISYNC // 4c00012c - LWSYNC // 7c2004ac - - DCBF (R3)(R4) // 7c0418ac - DCBI (R3)(R4) // 7c041bac - DCBST (R3)(R4) // 7c04186c - DCBZ (R3)(R4) // 7c041fec - DCBT (R3)(R4) // 7c041a2c - ICBI (R3)(R4) // 7c041fac - - // float constants - FMOVD $(0.0), F1 // f0210cd0 - FMOVD $(-0.0), F1 // f0210cd0fc200850 - - FMOVD 8(R3), F1 // c8230008 - FMOVD (R3)(R4), F1 // 7c241cae - FMOVDU 8(R3), F1 // cc230008 - FMOVDU (R3)(R4), F1 // 7c241cee - FMOVS 4(R3), F1 // c0230004 - FMOVS (R3)(R4), F1 // 7c241c2e - FMOVSU 4(R3), F1 // c4230004 - FMOVSU (R3)(R4), F1 // 7c241c6e - - FMOVD F1, 8(R3) // d8230008 - FMOVD F1, (R3)(R4) // 7c241dae - FMOVDU F1, 8(R3) // dc230008 - FMOVDU F1, (R3)(R4) // 7c241dee - FMOVS F1, 4(R3) // d0230004 - FMOVS F1, (R3)(R4) // 7c241d2e - FMOVSU F1, 4(R3) // d4230004 - FMOVSU F1, (R3)(R4) // 7c241d6e - FADD F1, F2 // fc42082a - FADD F1, F2, F3 // fc62082a - FADDCC F1, F2, F3 // fc62082b - FADDS F1, F2 // ec42082a - FADDS F1, F2, F3 // ec62082a - FADDSCC F1, F2, F3 // ec62082b - FSUB F1, F2 // fc420828 - FSUB F1, F2, F3 // fc620828 - FSUBCC F1, F2, F3 // fc620829 - FSUBS F1, F2 // ec420828 - FSUBS F1, F2, F3 // ec620828 - FSUBCC F1, F2, F3 // fc620829 - FMUL F1, F2 // fc420072 - FMUL F1, F2, F3 // fc620072 - FMULCC F1, F2, F3 // fc620073 - FMULS F1, F2 // ec420072 - FMULS F1, F2, F3 // ec620072 - FMULSCC F1, F2, F3 // ec620073 - FDIV F1, F2 // fc420824 - FDIV F1, F2, F3 // fc620824 - FDIVCC F1, F2, F3 // fc620825 - FDIVS F1, F2 // ec420824 - FDIVS F1, F2, F3 // ec620824 - FDIVSCC F1, F2, F3 // ec620825 - FMADD F1, F2, F3, F4 // fc8110fa - FMADDCC F1, F2, F3, F4 // fc8110fb - FMADDS F1, F2, F3, F4 // ec8110fa - FMADDSCC F1, F2, F3, F4 // ec8110fb - FMSUB F1, F2, F3, F4 // fc8110f8 - FMSUBCC F1, F2, F3, F4 // fc8110f9 - FMSUBS F1, F2, F3, F4 // ec8110f8 - FMSUBSCC F1, F2, F3, F4 // ec8110f9 - FNMADD F1, F2, F3, F4 // fc8110fe - FNMADDCC F1, F2, F3, F4 // fc8110ff - FNMADDS F1, F2, F3, F4 // ec8110fe - FNMADDSCC F1, F2, F3, F4 // ec8110ff - FNMSUB F1, F2, F3, F4 // fc8110fc - FNMSUBCC F1, F2, F3, F4 // fc8110fd - FNMSUBS F1, F2, F3, F4 // ec8110fc - FNMSUBSCC F1, F2, F3, F4 // ec8110fd - FSEL F1, F2, F3, F4 // fc8110ee - FSELCC F1, F2, F3, F4 // fc8110ef - FABS F1, F2 // fc400a10 - FABSCC F1, F2 // fc400a11 - FNEG F1, F2 // fc400850 - FABSCC F1, F2 // fc400a11 - FRSP F1, F2 // fc400818 - FRSPCC F1, F2 // fc400819 - FCTIW F1, F2 // fc40081c - FCTIWCC F1, F2 // fc40081d - FCTIWZ F1, F2 // fc40081e - FCTIWZCC F1, F2 // fc40081f - FCTID F1, F2 // fc400e5c - FCTIDCC F1, F2 // fc400e5d - FCTIDZ F1, F2 // fc400e5e - FCTIDZCC F1, F2 // fc400e5f - FCFID F1, F2 // fc400e9c - FCFIDCC F1, F2 // fc400e9d - FCFIDU F1, F2 // fc400f9c - FCFIDUCC F1, F2 // fc400f9d - FCFIDS F1, F2 // ec400e9c - FCFIDSCC F1, F2 // ec400e9d - FRES F1, F2 // ec400830 - FRESCC F1, F2 // ec400831 - FRIM F1, F2 // fc400bd0 - FRIMCC F1, F2 // fc400bd1 - FRIP F1, F2 // fc400b90 - FRIPCC F1, F2 // fc400b91 - FRIZ F1, F2 // fc400b50 - FRIZCC F1, F2 // fc400b51 - FRIN F1, F2 // fc400b10 - FRINCC F1, F2 // fc400b11 - FRSQRTE F1, F2 // fc400834 - FRSQRTECC F1, F2 // fc400835 - FSQRT F1, F2 // fc40082c - FSQRTCC F1, F2 // fc40082d - FSQRTS F1, F2 // ec40082c - FSQRTSCC F1, F2 // ec40082d - FCPSGN F1, F2 // fc420810 - FCPSGNCC F1, F2 // fc420811 - FCMPO F1, F2 // fc011040 - FCMPU F1, F2 // fc011000 - LVX (R3)(R4), V1 // 7c2418ce - LVXL (R3)(R4), V1 // 7c241ace - LVSL (R3)(R4), V1 // 7c24180c - LVSR (R3)(R4), V1 // 7c24184c - LVEBX (R3)(R4), V1 // 7c24180e - LVEHX (R3)(R4), V1 // 7c24184e - LVEWX (R3)(R4), V1 // 7c24188e - STVX V1, (R3)(R4) // 7c2419ce - STVXL V1, (R3)(R4) // 7c241bce - STVEBX V1, (R3)(R4) // 7c24190e - STVEHX V1, (R3)(R4) // 7c24194e - STVEWX V1, (R3)(R4) // 7c24198e - - VAND V1, V2, V3 // 10611404 - VANDC V1, V2, V3 // 10611444 - VNAND V1, V2, V3 // 10611584 - VOR V1, V2, V3 // 10611484 - VORC V1, V2, V3 // 10611544 - VXOR V1, V2, V3 // 106114c4 - VNOR V1, V2, V3 // 10611504 - VEQV V1, V2, V3 // 10611684 - VADDUBM V1, V2, V3 // 10611000 - VADDUHM V1, V2, V3 // 10611040 - VADDUWM V1, V2, V3 // 10611080 - VADDUDM V1, V2, V3 // 106110c0 - VADDUQM V1, V2, V3 // 10611100 - VADDCUQ V1, V2, V3 // 10611140 - VADDCUW V1, V2, V3 // 10611180 - VADDUBS V1, V2, V3 // 10611200 - VADDUHS V1, V2, V3 // 10611240 - VADDUWS V1, V2, V3 // 10611280 - VSUBUBM V1, V2, V3 // 10611400 - VSUBUHM V1, V2, V3 // 10611440 - VSUBUWM V1, V2, V3 // 10611480 - VSUBUDM V1, V2, V3 // 106114c0 - VSUBUQM V1, V2, V3 // 10611500 - VSUBCUQ V1, V2, V3 // 10611540 - VSUBCUW V1, V2, V3 // 10611580 - VSUBUBS V1, V2, V3 // 10611600 - VSUBUHS V1, V2, V3 // 10611640 - VSUBUWS V1, V2, V3 // 10611680 - VSUBSBS V1, V2, V3 // 10611700 - VSUBSHS V1, V2, V3 // 10611740 - VSUBSWS V1, V2, V3 // 10611780 - VSUBEUQM V1, V2, V3, V4 // 108110fe - VSUBECUQ V1, V2, V3, V4 // 108110ff - VMULESB V1, V2, V3 // 10611308 - VMULOSB V1, V2, V3 // 10611108 - VMULEUB V1, V2, V3 // 10611208 - VMULOUB V1, V2, V3 // 10611008 - VMULESH V1, V2, V3 // 10611348 - VMULOSH V1, V2, V3 // 10611148 - VMULEUH V1, V2, V3 // 10611248 - VMULOUH V1, V2, V3 // 10611048 - VMULESH V1, V2, V3 // 10611348 - VMULOSW V1, V2, V3 // 10611188 - VMULEUW V1, V2, V3 // 10611288 - VMULOUW V1, V2, V3 // 10611088 - VMULUWM V1, V2, V3 // 10611089 - VPMSUMB V1, V2, V3 // 10611408 - VPMSUMH V1, V2, V3 // 10611448 - VPMSUMW V1, V2, V3 // 10611488 - VPMSUMD V1, V2, V3 // 106114c8 - VMSUMUDM V1, V2, V3, V4 // 108110e3 - VRLB V1, V2, V3 // 10611004 - VRLH V1, V2, V3 // 10611044 - VRLW V1, V2, V3 // 10611084 - VRLD V1, V2, V3 // 106110c4 - VSLB V1, V2, V3 // 10611104 - VSLH V1, V2, V3 // 10611144 - VSLW V1, V2, V3 // 10611184 - VSL V1, V2, V3 // 106111c4 - VSLO V1, V2, V3 // 1061140c - VSRB V1, V2, V3 // 10611204 - VSRH V1, V2, V3 // 10611244 - VSRW V1, V2, V3 // 10611284 - VSR V1, V2, V3 // 106112c4 - VSRO V1, V2, V3 // 1061144c - VSLD V1, V2, V3 // 106115c4 - VSRAB V1, V2, V3 // 10611304 - VSRAH V1, V2, V3 // 10611344 - VSRAW V1, V2, V3 // 10611384 - VSRAD V1, V2, V3 // 106113c4 - VSLDOI $3, V1, V2, V3 // 106110ec - VCLZB V1, V2 // 10400f02 - VCLZH V1, V2 // 10400f42 - VCLZW V1, V2 // 10400f82 - VCLZD V1, V2 // 10400fc2 - VPOPCNTB V1, V2 // 10400f03 - VPOPCNTH V1, V2 // 10400f43 - VPOPCNTW V1, V2 // 10400f83 - VPOPCNTD V1, V2 // 10400fc3 - VCMPEQUB V1, V2, V3 // 10611006 - VCMPEQUBCC V1, V2, V3 // 10611406 - VCMPEQUH V1, V2, V3 // 10611046 - VCMPEQUHCC V1, V2, V3 // 10611446 - VCMPEQUW V1, V2, V3 // 10611086 - VCMPEQUWCC V1, V2, V3 // 10611486 - VCMPEQUD V1, V2, V3 // 106110c7 - VCMPEQUDCC V1, V2, V3 // 106114c7 - VCMPGTUB V1, V2, V3 // 10611206 - VCMPGTUBCC V1, V2, V3 // 10611606 - VCMPGTUH V1, V2, V3 // 10611246 - VCMPGTUHCC V1, V2, V3 // 10611646 - VCMPGTUW V1, V2, V3 // 10611286 - VCMPGTUWCC V1, V2, V3 // 10611686 - VCMPGTUD V1, V2, V3 // 106112c7 - VCMPGTUDCC V1, V2, V3 // 106116c7 - VCMPGTSB V1, V2, V3 // 10611306 - VCMPGTSBCC V1, V2, V3 // 10611706 - VCMPGTSH V1, V2, V3 // 10611346 - VCMPGTSHCC V1, V2, V3 // 10611746 - VCMPGTSW V1, V2, V3 // 10611386 - VCMPGTSWCC V1, V2, V3 // 10611786 - VCMPGTSD V1, V2, V3 // 106113c7 - VCMPGTSDCC V1, V2, V3 // 106117c7 - VCMPNEZB V1, V2, V3 // 10611107 - VCMPNEZBCC V1, V2, V3 // 10611507 - VCMPNEB V1, V2, V3 // 10611007 - VCMPNEBCC V1, V2, V3 // 10611407 - VCMPNEH V1, V2, V3 // 10611047 - VCMPNEHCC V1, V2, V3 // 10611447 - VCMPNEW V1, V2, V3 // 10611087 - VCMPNEWCC V1, V2, V3 // 10611487 - VPERM V1, V2, V3, V4 // 108110eb - VPERMR V1, V2, V3, V4 // 108110fb - VPERMXOR V1, V2, V3, V4 // 108110ed - VBPERMQ V1, V2, V3 // 1061154c - VBPERMD V1, V2, V3 // 106115cc - VSEL V1, V2, V3, V4 // 108110ea - VSPLTB $1, V1, V2 // 10410a0c - VSPLTH $1, V1, V2 // 10410a4c - VSPLTW $1, V1, V2 // 10410a8c - VSPLTISB $1, V1 // 1021030c - VSPLTISW $1, V1 // 1021038c - VSPLTISH $1, V1 // 1021034c - VCIPHER V1, V2, V3 // 10611508 - VCIPHERLAST V1, V2, V3 // 10611509 - VNCIPHER V1, V2, V3 // 10611548 - VNCIPHERLAST V1, V2, V3 // 10611549 - VSBOX V1, V2 // 104105c8 - VSHASIGMAW $1, V1, $15, V2 // 10418e82 - VSHASIGMAD $2, V1, $15, V2 // 104196c2 - - LXVD2X (R3)(R4), VS1 // 7c241e98 - LXV 16(R3), VS1 // f4230011 - LXVL R3, R4, VS1 // 7c23221a - LXVLL R3, R4, VS1 // 7c23225a - LXVX R3, R4, VS1 // 7c232218 - LXSDX (R3)(R4), VS1 // 7c241c98 - STXVD2X VS1, (R3)(R4) // 7c241f98 - STXV VS1,16(R3) // f4230015 - STXVL VS1, R3, R4 // 7c23231a - STXVLL VS1, R3, R4 // 7c23235a - STXVX VS1, R3, R4 // 7c232318 - STXSDX VS1, (R3)(R4) // 7c241d98 - LXSIWAX (R3)(R4), VS1 // 7c241898 - STXSIWX VS1, (R3)(R4) // 7c241918 - MFVSRD VS1, R3 // 7c230066 - MTVSRD R3, VS1 // 7c230166 - XXLAND VS1, VS2, VS3 // f0611410 - XXLOR VS1, VS2, VS3 // f0611490 - XXLORC VS1, VS2, VS3 // f0611550 - XXLXOR VS1, VS2, VS3 // f06114d0 - XXSEL VS1, VS2, VS3, VS4 // f08110f0 - XXMRGHW VS1, VS2, VS3 // f0611090 - XXSPLTW VS1, $1, VS2 // f0410a90 - XXPERM VS1, VS2, VS3 // f06110d0 - XXSLDWI VS1, VS2, $1, VS3 // f0611110 - XSCVDPSP VS1, VS2 // f0400c24 - XVCVDPSP VS1, VS2 // f0400e24 - XSCVSXDDP VS1, VS2 // f0400de0 - XVCVDPSXDS VS1, VS2 // f0400f60 - XVCVSXDDP VS1, VS2 // f0400fe0 - - MOVD R3, LR // 7c6803a6 - MOVD R3, CTR // 7c6903a6 - MOVD R3, XER // 7c6103a6 - MOVD LR, R3 // 7c6802a6 - MOVD CTR, R3 // 7c6902a6 - MOVD XER, R3 // 7c6102a6 - MOVFL CR3, CR1 // 4c8c0000 - - RET diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index dcabb3cd6a..090fefb4d8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2159,7 +2159,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { /* Z23-form, 3-register operands + CY field */ func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { - return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7 + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<9 } /* X-form, 3-register operands + EH field */ -- cgit v1.3 From 5ed81a3d14aa4eda5de87d7fe074b4c913b58511 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 3 Nov 2020 16:59:25 -0600 Subject: cmd/asm: fix rlwnm reg,reg,const,reg encoding on ppc64 The wrong value for the first reg parameter was selected. Likewise the wrong opcode was selected. This should match rlwnm (rrr type), not rlwinm (irr type). Similarly, fix the optab matching rules so clrlslwi does not match reg,reg,const,reg arguments. This is not a valid operand combination for clrlslwi. Fixes #42368 Change-Id: I4eb16d45a760b9fd3f497ef9863f82465351d39f Reviewed-on: https://go-review.googlesource.com/c/go/+/267421 Reviewed-by: Cherry Zhang Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64.s | 2 ++ src/cmd/internal/obj/ppc64/asm9.go | 26 ++++++-------------------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 2b1191c44b..8f6eb14f73 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -282,7 +282,9 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e + RLWNM R3, R4, $7, R6 // 5c861f7e RLWNMCC $3, R4, $7, R6 // 54861f7f + RLWNMCC R3, R4, $7, R6 // 5c861f7f RLDMI $0, R4, $7, R6 // 7886076c RLDMICC $0, R4, $7, R6 // 7886076d RLDIMI $0, R4, $7, R6 // 788601cc diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 090fefb4d8..775d27d8e8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -174,6 +174,7 @@ var optab = []Optab{ {ASRAD, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0}, {ARLWMI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLWMI, C_REG, C_REG, C_LCON, C_REG, 63, 4, 0}, + {ACLRLSLWI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLDMI, C_SCON, C_REG, C_LCON, C_REG, 30, 4, 0}, {ARLDC, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, {ARLDCL, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, @@ -1911,7 +1912,6 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) - opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -2010,6 +2010,7 @@ func buildop(ctxt *obj.Link) { AADDEX, ACMPEQB, AECIWX, + ACLRLSLWI, obj.ANOP, obj.ATEXT, obj.AUNDEF, @@ -3413,25 +3414,10 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { } case 63: /* rlwmi b,s,$mask,a */ - v := c.regoff(&p.From) - switch p.As { - case ACLRLSLWI: - n := c.regoff(p.GetFrom3()) - if n > v || v >= 32 { - // Message will match operands from the ISA even though in the - // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) - } - // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n - // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) - default: - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 - } + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 -- cgit v1.3 From 63fd764502e08d067293a93d6d1a566951255ce5 Mon Sep 17 00:00:00 2001 From: Derek Parker Date: Wed, 28 Oct 2020 20:54:27 +0000 Subject: cmd/internal/obj: add prologue_end DWARF stmt for ppc64 This patch adds a prologue_end statement to the DWARF information for the ppc64 arch. Prologue end is used by the Delve debugger in order to determine where to set a breakpoint to avoid the stacksplit prologue. Updates #36612 Change-Id: Ifb16c1476fe716a0bf493c5486d1d88ebe8d0253 GitHub-Last-Rev: 77a217206d529df8bf8d4ef10a5347b6ae524612 GitHub-Pull-Request: golang/go#42261 Reviewed-on: https://go-review.googlesource.com/c/go/+/266019 Run-TryBot: David Chase TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Alessandro Arzilli Trust: Dmitri Shuralyov --- src/cmd/internal/obj/ppc64/obj9.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index 3ab19de602..fddf552156 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -32,6 +32,7 @@ package ppc64 import ( "cmd/internal/obj" "cmd/internal/objabi" + "cmd/internal/src" "cmd/internal/sys" ) @@ -672,6 +673,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // save the link register and update the stack, since that code is // called directly from C/C++ and can't clobber REGTMP (R31). if autosize != 0 && c.cursym.Name != "runtime.racecallbackthunk" { + var prologueEnd *obj.Prog // Save the link register and update the SP. MOVDU is used unless // the frame size is too large. The link register must be saved // even for non-empty leaf functions so that traceback works. @@ -685,6 +687,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Type = obj.TYPE_REG q.To.Reg = REGTMP + prologueEnd = q + q = obj.Appendp(q, c.newprog) q.As = AMOVDU q.Pos = p.Pos @@ -720,6 +724,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = int64(-autosize) q.To.Reg = REGSP + prologueEnd = q + q = obj.Appendp(q, c.newprog) q.As = AADD q.Pos = p.Pos @@ -730,8 +736,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = +autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } + prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have -- cgit v1.3 From cb674b5c13d331fd5cef5bae7a2a67e5e0d41f7d Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 13 Nov 2020 14:44:25 -0600 Subject: cmd/compile,cmd/asm: fix function pointer call perf regression on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit by inserting hint when using bclrl. Using this instruction as subroutine call is not the expected default behavior, and as a result confuses the branch predictor. The default expected behavior is a conditional return from a subroutine. We can change this assumption by encoding a hint this is not a subroutine return. The regex benchmarks are a pretty good example of how much this hint can help generic ppc64le code on a power9 machine: name old time/op new time/op delta Find 606ns ± 0% 447ns ± 0% -26.27% FindAllNoMatches 309ns ± 0% 205ns ± 0% -33.72% FindString 609ns ± 0% 451ns ± 0% -26.04% FindSubmatch 734ns ± 0% 594ns ± 0% -19.07% FindStringSubmatch 706ns ± 0% 574ns ± 0% -18.83% Literal 177ns ± 0% 136ns ± 0% -22.89% NotLiteral 4.69µs ± 0% 2.34µs ± 0% -50.14% MatchClass 6.05µs ± 0% 3.26µs ± 0% -46.08% MatchClass_InRange 5.93µs ± 0% 3.15µs ± 0% -46.86% ReplaceAll 3.15µs ± 0% 2.18µs ± 0% -30.77% AnchoredLiteralShortNonMatch 156ns ± 0% 109ns ± 0% -30.61% AnchoredLiteralLongNonMatch 192ns ± 0% 136ns ± 0% -29.34% AnchoredShortMatch 268ns ± 0% 209ns ± 0% -22.00% AnchoredLongMatch 472ns ± 0% 357ns ± 0% -24.30% OnePassShortA 1.16µs ± 0% 0.87µs ± 0% -25.03% NotOnePassShortA 1.34µs ± 0% 1.20µs ± 0% -10.63% OnePassShortB 940ns ± 0% 655ns ± 0% -30.29% NotOnePassShortB 873ns ± 0% 703ns ± 0% -19.52% OnePassLongPrefix 258ns ± 0% 155ns ± 0% -40.13% OnePassLongNotPrefix 943ns ± 0% 529ns ± 0% -43.89% MatchParallelShared 591ns ± 0% 436ns ± 0% -26.31% MatchParallelCopied 596ns ± 0% 435ns ± 0% -27.10% QuoteMetaAll 186ns ± 0% 186ns ± 0% -0.16% QuoteMetaNone 55.9ns ± 0% 55.9ns ± 0% +0.02% Compile/Onepass 9.64µs ± 0% 9.26µs ± 0% -3.97% Compile/Medium 21.7µs ± 0% 20.6µs ± 0% -4.90% Compile/Hard 174µs ± 0% 174µs ± 0% +0.07% Match/Easy0/16 7.35ns ± 0% 7.34ns ± 0% -0.11% Match/Easy0/32 116ns ± 0% 97ns ± 0% -16.27% Match/Easy0/1K 592ns ± 0% 562ns ± 0% -5.04% Match/Easy0/32K 12.6µs ± 0% 12.5µs ± 0% -0.64% Match/Easy0/1M 556µs ± 0% 556µs ± 0% -0.00% Match/Easy0/32M 17.7ms ± 0% 17.7ms ± 0% +0.05% Match/Easy0i/16 7.34ns ± 0% 7.35ns ± 0% +0.10% Match/Easy0i/32 2.82µs ± 0% 1.64µs ± 0% -41.71% Match/Easy0i/1K 83.2µs ± 0% 48.2µs ± 0% -42.06% Match/Easy0i/32K 2.13ms ± 0% 1.80ms ± 0% -15.34% Match/Easy0i/1M 68.1ms ± 0% 57.6ms ± 0% -15.31% Match/Easy0i/32M 2.18s ± 0% 1.80s ± 0% -17.52% Match/Easy1/16 7.36ns ± 0% 7.34ns ± 0% -0.24% Match/Easy1/32 118ns ± 0% 96ns ± 0% -18.72% Match/Easy1/1K 2.46µs ± 0% 1.58µs ± 0% -35.65% Match/Easy1/32K 80.2µs ± 0% 54.6µs ± 0% -31.92% Match/Easy1/1M 2.75ms ± 0% 1.88ms ± 0% -31.66% Match/Easy1/32M 87.5ms ± 0% 59.8ms ± 0% -31.62% Match/Medium/16 7.34ns ± 0% 7.34ns ± 0% +0.01% Match/Medium/32 2.60µs ± 0% 1.50µs ± 0% -42.61% Match/Medium/1K 78.1µs ± 0% 43.7µs ± 0% -44.06% Match/Medium/32K 2.08ms ± 0% 1.52ms ± 0% -27.11% Match/Medium/1M 66.5ms ± 0% 48.6ms ± 0% -26.96% Match/Medium/32M 2.14s ± 0% 1.60s ± 0% -25.18% Match/Hard/16 7.35ns ± 0% 7.35ns ± 0% +0.03% Match/Hard/32 3.58µs ± 0% 2.44µs ± 0% -31.82% Match/Hard/1K 108µs ± 0% 75µs ± 0% -31.04% Match/Hard/32K 2.79ms ± 0% 2.25ms ± 0% -19.30% Match/Hard/1M 89.4ms ± 0% 72.2ms ± 0% -19.26% Match/Hard/32M 2.91s ± 0% 2.37s ± 0% -18.60% Match/Hard1/16 11.1µs ± 0% 8.3µs ± 0% -25.07% Match/Hard1/32 21.4µs ± 0% 16.1µs ± 0% -24.85% Match/Hard1/1K 658µs ± 0% 498µs ± 0% -24.27% Match/Hard1/32K 12.2ms ± 0% 11.7ms ± 0% -4.60% Match/Hard1/1M 391ms ± 0% 374ms ± 0% -4.40% Match/Hard1/32M 12.6s ± 0% 12.0s ± 0% -4.68% Match_onepass_regex/16 870ns ± 0% 611ns ± 0% -29.79% Match_onepass_regex/32 1.58µs ± 0% 1.08µs ± 0% -31.48% Match_onepass_regex/1K 45.7µs ± 0% 30.3µs ± 0% -33.58% Match_onepass_regex/32K 1.45ms ± 0% 0.97ms ± 0% -33.20% Match_onepass_regex/1M 46.2ms ± 0% 30.9ms ± 0% -33.01% Match_onepass_regex/32M 1.46s ± 0% 0.99s ± 0% -32.02% name old alloc/op new alloc/op delta Find 0.00B 0.00B 0.00% FindAllNoMatches 0.00B 0.00B 0.00% FindString 0.00B 0.00B 0.00% FindSubmatch 48.0B ± 0% 48.0B ± 0% 0.00% FindStringSubmatch 32.0B ± 0% 32.0B ± 0% 0.00% Compile/Onepass 4.02kB ± 0% 4.02kB ± 0% 0.00% Compile/Medium 9.39kB ± 0% 9.39kB ± 0% 0.00% Compile/Hard 84.7kB ± 0% 84.7kB ± 0% 0.00% Match_onepass_regex/16 0.00B 0.00B 0.00% Match_onepass_regex/32 0.00B 0.00B 0.00% Match_onepass_regex/1K 0.00B 0.00B 0.00% Match_onepass_regex/32K 0.00B 0.00B 0.00% Match_onepass_regex/1M 5.00B ± 0% 3.00B ± 0% -40.00% Match_onepass_regex/32M 136B ± 0% 68B ± 0% -50.00% name old allocs/op new allocs/op delta Find 0.00 0.00 0.00% FindAllNoMatches 0.00 0.00 0.00% FindString 0.00 0.00 0.00% FindSubmatch 1.00 ± 0% 1.00 ± 0% 0.00% FindStringSubmatch 1.00 ± 0% 1.00 ± 0% 0.00% Compile/Onepass 52.0 ± 0% 52.0 ± 0% 0.00% Compile/Medium 112 ± 0% 112 ± 0% 0.00% Compile/Hard 424 ± 0% 424 ± 0% 0.00% Match_onepass_regex/16 0.00 0.00 0.00% Match_onepass_regex/32 0.00 0.00 0.00% Match_onepass_regex/1K 0.00 0.00 0.00% Match_onepass_regex/32K 0.00 0.00 0.00% Match_onepass_regex/1M 0.00 0.00 0.00% Match_onepass_regex/32M 2.00 ± 0% 1.00 ± 0% -50.00% name old speed new speed delta QuoteMetaAll 75.2MB/s ± 0% 75.3MB/s ± 0% +0.15% QuoteMetaNone 465MB/s ± 0% 465MB/s ± 0% -0.02% Match/Easy0/16 2.18GB/s ± 0% 2.18GB/s ± 0% +0.10% Match/Easy0/32 276MB/s ± 0% 330MB/s ± 0% +19.46% Match/Easy0/1K 1.73GB/s ± 0% 1.82GB/s ± 0% +5.29% Match/Easy0/32K 2.60GB/s ± 0% 2.62GB/s ± 0% +0.64% Match/Easy0/1M 1.89GB/s ± 0% 1.89GB/s ± 0% +0.00% Match/Easy0/32M 1.89GB/s ± 0% 1.89GB/s ± 0% -0.05% Match/Easy0i/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.10% Match/Easy0i/32 11.4MB/s ± 0% 19.5MB/s ± 0% +71.48% Match/Easy0i/1K 12.3MB/s ± 0% 21.2MB/s ± 0% +72.62% Match/Easy0i/32K 15.4MB/s ± 0% 18.2MB/s ± 0% +18.12% Match/Easy0i/1M 15.4MB/s ± 0% 18.2MB/s ± 0% +18.12% Match/Easy0i/32M 15.4MB/s ± 0% 18.6MB/s ± 0% +21.21% Match/Easy1/16 2.17GB/s ± 0% 2.18GB/s ± 0% +0.24% Match/Easy1/32 271MB/s ± 0% 333MB/s ± 0% +23.07% Match/Easy1/1K 417MB/s ± 0% 648MB/s ± 0% +55.38% Match/Easy1/32K 409MB/s ± 0% 600MB/s ± 0% +46.88% Match/Easy1/1M 381MB/s ± 0% 558MB/s ± 0% +46.33% Match/Easy1/32M 383MB/s ± 0% 561MB/s ± 0% +46.25% Match/Medium/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.01% Match/Medium/32 12.3MB/s ± 0% 21.4MB/s ± 0% +74.13% Match/Medium/1K 13.1MB/s ± 0% 23.4MB/s ± 0% +78.73% Match/Medium/32K 15.7MB/s ± 0% 21.6MB/s ± 0% +37.23% Match/Medium/1M 15.8MB/s ± 0% 21.6MB/s ± 0% +36.93% Match/Medium/32M 15.7MB/s ± 0% 21.0MB/s ± 0% +33.67% Match/Hard/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.03% Match/Hard/32 8.93MB/s ± 0% 13.10MB/s ± 0% +46.70% Match/Hard/1K 9.48MB/s ± 0% 13.74MB/s ± 0% +44.94% Match/Hard/32K 11.7MB/s ± 0% 14.5MB/s ± 0% +23.87% Match/Hard/1M 11.7MB/s ± 0% 14.5MB/s ± 0% +23.87% Match/Hard/32M 11.6MB/s ± 0% 14.2MB/s ± 0% +22.86% Match/Hard1/16 1.44MB/s ± 0% 1.93MB/s ± 0% +34.03% Match/Hard1/32 1.49MB/s ± 0% 1.99MB/s ± 0% +33.56% Match/Hard1/1K 1.56MB/s ± 0% 2.05MB/s ± 0% +31.41% Match/Hard1/32K 2.68MB/s ± 0% 2.80MB/s ± 0% +4.48% Match/Hard1/1M 2.68MB/s ± 0% 2.80MB/s ± 0% +4.48% Match/Hard1/32M 2.66MB/s ± 0% 2.79MB/s ± 0% +4.89% Match_onepass_regex/16 18.4MB/s ± 0% 26.2MB/s ± 0% +42.41% Match_onepass_regex/32 20.2MB/s ± 0% 29.5MB/s ± 0% +45.92% Match_onepass_regex/1K 22.4MB/s ± 0% 33.8MB/s ± 0% +50.54% Match_onepass_regex/32K 22.6MB/s ± 0% 33.9MB/s ± 0% +49.67% Match_onepass_regex/1M 22.7MB/s ± 0% 33.9MB/s ± 0% +49.27% Match_onepass_regex/32M 23.0MB/s ± 0% 33.9MB/s ± 0% +47.14% Fixes #42709 Change-Id: Ice07fec2de4c5b1302febf8c2978ae8c1e4fd3e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/271337 Reviewed-by: Cherry Zhang Trust: Lynn Boger Trust: Carlos Eduardo Seo --- src/cmd/compile/internal/ppc64/ssa.go | 3 +++ src/cmd/internal/obj/ppc64/asm9.go | 11 +++++++++++ 2 files changed, 14 insertions(+) (limited to 'src/cmd/internal/obj/ppc64') diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 3888aa6527..3e20c44a4c 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -1781,6 +1781,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { pp := s.Call(v) pp.To.Reg = ppc64.REG_LR + // Insert a hint this is not a subroutine return. + pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1}) + if gc.Ctxt.Flag_shared { // When compiling Go into PIC, the function we just // called via pointer might have been implemented in diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 775d27d8e8..41e263b2c0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -334,6 +334,7 @@ var optab = []Optab{ {ABC, C_SCON, C_REG, C_NONE, C_SBRA, 16, 4, 0}, {ABC, C_SCON, C_REG, C_NONE, C_LBRA, 17, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_LR, 18, 4, 0}, + {ABR, C_NONE, C_NONE, C_SCON, C_LR, 18, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_CTR, 18, 4, 0}, {ABR, C_REG, C_NONE, C_NONE, C_CTR, 18, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0}, @@ -2844,6 +2845,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case 18: /* br/bl (lr/ctr); bc/bcl bo,bi,(lr/ctr) */ var v int32 + var bh uint32 = 0 if p.As == ABC || p.As == ABCL { v = c.regoff(&p.From) & 31 } else { @@ -2865,6 +2867,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v = 0 } + // Insert optional branch hint for bclr[l]/bcctr[l] + if p.From3Type() != obj.TYPE_NONE { + bh = uint32(p.GetFrom3().Offset) + if bh == 2 || bh > 3 { + log.Fatalf("BH must be 0,1,3 for %v", p) + } + o1 |= bh << 11 + } + if p.As == ABL || p.As == ABCL { o1 |= 1 } -- cgit v1.3