From dfdc3880b01d46d1d8125ab9eea0606b2fa5b819 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 20 Aug 2020 17:02:18 +0800 Subject: cmd/internal/obj/arm64: enable some SIMD instructions Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD instructions required by the issue #40725. And FMOVQ instrucion is used to move a large constant to a Vn register. Add test cases. Fixes #40725 Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549 Reviewed-on: https://go-review.googlesource.com/c/go/+/249758 Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 15 +++++++++++++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index f0c716a2b5..451ca749ba 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -145,6 +145,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e VZIP2 V25.S2, V14.S2, V25.S2 // d979990e + VUXTL V30.B8, V30.H8 // dea7082f + VUXTL V30.H4, V29.S4 // dda7102f + VUXTL V29.S2, V2.D2 // a2a7202f + VUXTL2 V30.H8, V30.S4 // dea7106f + VUXTL2 V29.S4, V2.D2 // a2a7206f + VUXTL2 V30.B16, V2.H8 // c2a7086f + VBIT V21.B16, V25.B16, V4.B16 // 241fb56e + VBSL V23.B16, V3.B16, V7.B16 // 671c776e + VCMTST V2.B8, V29.B8, V2.B8 // a28f220e + VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e + VSUB V2.B8, V30.B8, V30.B8 // de87222e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 @@ -186,6 +197,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVS $(0.96875), F3 // 03f02d1e FMOVD $(28.0), F4 // 0490671e +// move a large constant to a Vd. + FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 + FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 9f377817a9..2a911b4cce 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -340,4 +340,9 @@ TEXT errors(SB),$0 MRS PMSWINC_EL0, R3 // ERROR "system register is not readable" MRS OSLAR_EL1, R3 // ERROR "system register is not readable" VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset" + VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement" + VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" + VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" + VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" RET -- cgit v1.3 From d7ab277eed4d2e5ede4f3361adf42d4ad76ced8f Mon Sep 17 00:00:00 2001 From: Junchen Li Date: Mon, 31 Aug 2020 13:32:33 +0800 Subject: cmd/asm: add more SIMD instructions on arm64 This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested by #40725. And since UXTL* are aliases of USHLL*, this CL also merges them into one case. Updates #40725 Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816 Reviewed-on: https://go-review.googlesource.com/c/go/+/253659 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/asm/testdata/arm64.s | 20 +++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 8 ++ src/cmd/internal/obj/arm64/a.out.go | 9 +- src/cmd/internal/obj/arm64/anames.go | 9 +- src/cmd/internal/obj/arm64/asm7.go | 109 +++++++++++++------------ 5 files changed, 100 insertions(+), 55 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 451ca749ba..e106ff2ae1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -156,6 +156,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VCMTST V2.B8, V29.B8, V2.B8 // a28f220e VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e VSUB V2.B8, V30.B8, V30.B8 // de87222e + VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e + VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e + VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e + VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e + VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e + VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e + VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e + VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e + VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e + VUSHLL $0, V30.B8, V30.H8 // dea7082f + VUSHLL $0, V30.H4, V29.S4 // dda7102f + VUSHLL $0, V29.S2, V2.D2 // a2a7202f + VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f + VUSHLL2 $0, V30.H8, V30.S4 // dea7106f + VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f + VUSHLL $7, V30.B8, V30.H8 // dea70f2f + VUSHLL $15, V30.H4, V29.S4 // dda71f2f + VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f + VBIF V0.B8, V30.B8, V1.B8 // c11fe02e + VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 2a911b4cce..20b1f3e9f0 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -345,4 +345,12 @@ TEXT errors(SB),$0 VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" + VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch" + VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement" + VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch" + VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch" + VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range" + VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" + VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" + VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index ab065e07e5..2839da1437 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -954,6 +954,7 @@ const ( AVADD AVADDP AVAND + AVBIF AVCMEQ AVCNT AVEOR @@ -986,6 +987,12 @@ const ( AVEXT AVRBIT AVUSHR + AVUSHLL + AVUSHLL2 + AVUXTL + AVUXTL2 + AVUZP1 + AVUZP2 AVSHL AVSRI AVBSL @@ -994,8 +1001,6 @@ const ( AVZIP1 AVZIP2 AVCMTST - AVUXTL - AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 8961f04b0c..48c066abfd 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -461,6 +461,7 @@ var Anames = []string{ "VADD", "VADDP", "VAND", + "VBIF", "VCMEQ", "VCNT", "VEOR", @@ -493,6 +494,12 @@ var Anames = []string{ "VEXT", "VRBIT", "VUSHR", + "VUSHLL", + "VUSHLL2", + "VUXTL", + "VUXTL2", + "VUZP1", + "VUZP2", "VSHL", "VSRI", "VBSL", @@ -501,7 +508,5 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", - "VUXTL", - "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7ce18d0f13..df4bbbbd35 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -480,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ @@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVBSL, t) oprangeset(AVBIT, t) oprangeset(AVCMTST, t) + oprangeset(AVUZP1, t) + oprangeset(AVUZP2, t) + oprangeset(AVBIF, t) case AVADD: oprangeset(AVSUB, t) @@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) { case AVUXTL: oprangeset(AVUXTL2, t) + case AVUSHLL: + oprangeset(AVUSHLL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } switch p.As { - case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: if af != ARNG_16B && af != ARNG_8B { c.ctxt.Diag("invalid arrangement: %v", p) } @@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { size = 0 case AVBSL: size = 1 - case AVORR, AVBIT: + case AVORR, AVBIT, AVBIF: size = 2 case AVFMLA, AVFMLS: if af == ARNG_2D { @@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) - case 102: // VUXTL{2} Vn., Vd. - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - var Q, immh uint32 - switch at { - case ARNG_8H: - if af == ARNG_8B { - immh = 1 - Q = 0 - } else if af == ARNG_16B { - immh = 1 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_4S: - if af == ARNG_4H { - immh = 2 - Q = 0 - } else if af == ARNG_8H { - immh = 2 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_2D: - if af == ARNG_2S { - immh = 4 - Q = 0 - } else if af == ARNG_4S { - immh = 4 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } + case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ + o1 = c.opirr(p, p.As) + rf := p.Reg + af := uint8((p.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + shift := int(p.From.Offset) + if p.As == AVUXTL || p.As == AVUXTL2 { + rf = p.From.Reg + af = uint8((p.From.Reg >> 5) & 15) + shift = 0 + } + + pack := func(q, x, y uint8) uint32 { + return uint32(q)<<16 | uint32(x)<<8 | uint32(y) + } + + var Q uint8 = uint8(o1>>30) & 1 + var immh, width uint8 + switch pack(Q, af, at) { + case pack(0, ARNG_8B, ARNG_8H): + immh, width = 1, 8 + case pack(1, ARNG_16B, ARNG_8H): + immh, width = 1, 8 + case pack(0, ARNG_4H, ARNG_4S): + immh, width = 2, 16 + case pack(1, ARNG_8H, ARNG_4S): + immh, width = 2, 16 + case pack(0, ARNG_2S, ARNG_2D): + immh, width = 4, 32 + case pack(1, ARNG_4S, ARNG_2D): + immh, width = 4, 32 default: c.ctxt.Diag("operand mismatch: %v\n", p) } - - if p.As == AVUXTL && Q == 1 { - c.ctxt.Diag("operand mismatch: %v\n", p) + if !(0 <= shift && shift <= int(width-1)) { + c.ctxt.Diag("shift amount out of range: %v\n", p) } - if p.As == AVUXTL2 && Q == 0 { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - - o1 = c.oprrr(p, p.As) - rf := int((p.From.Reg) & 31) - rt := int((p.To.Reg) & 31) - o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) + o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) } out[0] = o1 out[1] = o2 @@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + case AVBIF: + return 1<<29 | 7<<25 | 7<<21 | 7<<10 + case AVBIT: return 1<<29 | 0x75<<21 | 7<<10 @@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVCMTST: return 0xE<<24 | 1<<21 | 0x23<<10 - case AVUXTL, AVUXTL2: - return 0x5e<<23 | 0x29<<10 + case AVUZP1: + return 7<<25 | 3<<11 + + case AVUZP2: + return 7<<25 | 1<<14 | 3<<11 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + + case AVUSHLL, AVUXTL: + return 1<<29 | 15<<24 | 0x29<<10 + + case AVUSHLL2, AVUXTL2: + return 3<<29 | 15<<24 | 0x29<<10 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From a86b6f23f08fd42154bc5bbfa417da6ee5ef48fb Mon Sep 17 00:00:00 2001 From: diaxu01 Date: Thu, 2 Apr 2020 02:39:28 +0000 Subject: cmd/internal/obj/arm64: optimize the instruction of moving long effective stack address Currently, when the offset of "MOVD $offset(Rn), Rd" is a large positive constant or a negative constant, the assembler will load this offset from the constant pool.This patch gets rid of the constant pool by encoding the offset into two ADD instructions if it's a large positive constant or one SUB instruction if negative. For very large negative offset, it is rarely used, here we don't optimize this case. Optimized case 1: MOVD $-0x100000(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: SUB $0x100000, R7, R0 Optimized case 2: MOVD $0x123468(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: ADD $0x123000, R7, R27; ADD $0x000468, R27, R0 1. Binary size before/after. binary size change pkg/linux_arm64 +4KB pkg/tool/linux_arm64 no change go no change gofmt no change 2. go1 benckmark. name old time/op new time/op delta pkg:test/bench/go1 goos:linux goarch:arm64 BinaryTree17-64 7335721401.800000ns +-40% 6264542009.800000ns +-14% ~ (p=0.421 n=5+5) Fannkuch11-64 3886551822.600000ns +- 0% 3875870590.200000ns +- 0% ~ (p=0.151 n=5+5) FmtFprintfEmpty-64 82.960000ns +- 1% 83.900000ns +- 2% +1.13% (p=0.048 n=5+5) FmtFprintfString-64 149.200000ns +- 1% 148.000000ns +- 0% -0.80% (p=0.016 n=5+4) FmtFprintfInt-64 177.000000ns +- 0% 178.400000ns +- 2% ~ (p=0.794 n=4+5) FmtFprintfIntInt-64 240.200000ns +- 2% 239.400000ns +- 4% ~ (p=0.302 n=5+5) FmtFprintfPrefixedInt-64 300.400000ns +- 0% 299.200000ns +- 1% ~ (p=0.119 n=5+5) FmtFprintfFloat-64 360.000000ns +- 0% 361.600000ns +- 3% ~ (p=0.349 n=4+5) FmtManyArgs-64 1064.400000ns +- 1% 1061.400000ns +- 0% ~ (p=0.087 n=5+5) GobDecode-64 12080404.400000ns +- 2% 11637601.000000ns +- 1% -3.67% (p=0.008 n=5+5) GobEncode-64 8474973.800000ns +- 2% 7977801.600000ns +- 2% -5.87% (p=0.008 n=5+5) Gzip-64 416501238.400000ns +- 0% 410463405.400000ns +- 0% -1.45% (p=0.008 n=5+5) Gunzip-64 58088415.200000ns +- 0% 58826209.600000ns +- 0% +1.27% (p=0.008 n=5+5) HTTPClientServer-64 128660.200000ns +-23% 117840.800000ns +- 8% ~ (p=0.222 n=5+5) JSONEncode-64 17547746.800000ns +- 4% 17216180.000000ns +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 80879896.000000ns +- 1% 80063737.200000ns +- 0% -1.01% (p=0.008 n=5+5) Mandelbrot200-64 5484901.600000ns +- 0% 5483614.400000ns +- 0% ~ (p=0.310 n=5+5) GoParse-64 6201166.800000ns +- 6% 6150920.600000ns +- 1% ~ (p=0.548 n=5+5) RegexpMatchEasy0_32-64 135.000000ns +- 0% 139.200000ns +- 7% ~ (p=0.643 n=5+5) RegexpMatchEasy0_1K-64 484.600000ns +- 2% 483.800000ns +- 2% ~ (p=0.984 n=5+5) RegexpMatchEasy1_32-64 128.000000ns +- 1% 124.600000ns +- 1% -2.66% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 769.400000ns +- 2% 761.400000ns +- 1% ~ (p=0.460 n=5+5) RegexpMatchMedium_32-64 12.900000ns +- 0% 12.500000ns +- 0% -3.10% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 57879.200000ns +- 1% 56512.200000ns +- 0% -2.36% (p=0.008 n=5+5) RegexpMatchHard_32-64 3091.600000ns +- 1% 3071.000000ns +- 0% -0.67% (p=0.048 n=5+5) RegexpMatchHard_1K-64 92941.200000ns +- 1% 92794.000000ns +- 0% ~ (p=1.000 n=5+5) Revcomp-64 1695605187.000000ns +-54% 1821697637.400000ns +-47% ~ (p=1.000 n=5+5) Template-64 112839686.800000ns +- 1% 109964069.200000ns +- 3% ~ (p=0.095 n=5+5) TimeParse-64 587.000000ns +- 0% 587.000000ns +- 0% ~ (all equal) TimeFormat-64 586.000000ns +- 1% 584.200000ns +- 1% ~ (p=0.659 n=5+5) [Geo mean] 81804.262218ns 80694.712973ns -1.36% name old speed new speed delta pkg:test/bench/go1 goos:linux goarch:arm64 GobDecode-64 63.6MB/s +- 2% 66.0MB/s +- 1% +3.78% (p=0.008 n=5+5) GobEncode-64 90.6MB/s +- 2% 96.2MB/s +- 2% +6.23% (p=0.008 n=5+5) Gzip-64 46.6MB/s +- 0% 47.3MB/s +- 0% +1.47% (p=0.008 n=5+5) Gunzip-64 334MB/s +- 0% 330MB/s +- 0% -1.25% (p=0.008 n=5+5) JSONEncode-64 111MB/s +- 4% 113MB/s +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 24.0MB/s +- 1% 24.2MB/s +- 0% +1.02% (p=0.008 n=5+5) GoParse-64 9.35MB/s +- 6% 9.42MB/s +- 1% ~ (p=0.571 n=5+5) RegexpMatchEasy0_32-64 237MB/s +- 0% 231MB/s +- 7% ~ (p=0.690 n=5+5) RegexpMatchEasy0_1K-64 2.11GB/s +- 2% 2.12GB/s +- 2% ~ (p=1.000 n=5+5) RegexpMatchEasy1_32-64 250MB/s +- 1% 257MB/s +- 1% +2.63% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 1.33GB/s +- 2% 1.35GB/s +- 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32-64 77.6MB/s +- 0% 79.8MB/s +- 0% +2.80% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 17.7MB/s +- 1% 18.1MB/s +- 0% +2.41% (p=0.008 n=5+5) RegexpMatchHard_32-64 10.4MB/s +- 1% 10.4MB/s +- 0% ~ (p=0.056 n=5+5) RegexpMatchHard_1K-64 11.0MB/s +- 1% 11.0MB/s +- 0% ~ (p=0.984 n=5+5) Revcomp-64 188MB/s +-71% 155MB/s +-71% ~ (p=1.000 n=5+5) Template-64 17.2MB/s +- 1% 17.7MB/s +- 3% ~ (p=0.095 n=5+5) [Geo mean] 79.2MB/s 79.3MB/s +0.24% Change-Id: I593ac3e7037afafc3605ad4b0cfb51d5dd88015d Reviewed-on: https://go-review.googlesource.com/c/go/+/232438 Trust: Alberto Donizetti Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 15 ++++++++-- src/cmd/internal/obj/arm64/a.out.go | 7 +++-- src/cmd/internal/obj/arm64/anames7.go | 1 + src/cmd/internal/obj/arm64/asm7.go | 46 +++++++++++++++++++++---------- 4 files changed, 50 insertions(+), 19 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e106ff2ae1..acfb16b096 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -340,8 +340,19 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2 MOVD $0, R1 // 010080d2 MOVD $-1, R1 // 01008092 - MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 - MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 + MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + + MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // fb074091610b0091 + MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // fb0740917f231c91 + MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // fb08409161070091 + MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // fbfc7f9161ff3f91 + + MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1 + MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1 + MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1 + MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1 + MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1 // // CLS diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 2839da1437..b3c9e9a18e 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -410,9 +410,10 @@ const ( C_FCON // floating-point constant C_VCONADDR // 64-bit memory address - C_AACON // ADDCON offset in auto constant $a(FP) - C_LACON // 32-bit offset in auto constant $a(FP) - C_AECON // ADDCON offset in extern constant $e(SB) + C_AACON // ADDCON offset in auto constant $a(FP) + C_AACON2 // 24-bit offset in auto constant $a(FP) + C_LACON // 32-bit offset in auto constant $a(FP) + C_AECON // ADDCON offset in extern constant $e(SB) // TODO(aram): only one branch class should be enough C_SBRA // for TYPE_BRANCH diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index e1703fc4ab..96c9f788d9 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -36,6 +36,7 @@ var cnames7 = []string{ "FCON", "VCONADDR", "AACON", + "AACON2", "LACON", "AECON", "SBRA", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index df4bbbbd35..fc2033d689 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -391,7 +391,11 @@ var optab = []Optab{ {AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 16, 0, NOTUSETMP, 0}, {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, - {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON, C_NONE, C_NONE, C_RSP, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON2, C_NONE, C_NONE, C_RSP, 4, 8, REGFROM, 0, 0}, + + /* load long effective stack address (load int32 offset and add) */ + {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, // Move a large constant to a Vn. {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, @@ -594,9 +598,6 @@ var optab = []Optab{ {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, - /* load long effective stack address (load int32 offset and add) */ - {AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0}, - /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -1361,6 +1362,10 @@ func isaddcon(v int64) bool { return v <= 0xFFF } +func isaddcon2(v int64) bool { + return 0 <= v && v <= 0xFFFFFF +} + // isbitcon reports whether a constant can be encoded into a logical instruction. // bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64, // which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones. @@ -1889,10 +1894,14 @@ func (c *ctxt7) aclass(a *obj.Addr) int { default: return C_GOK } - - if isaddcon(c.instoffset) { + cf := c.instoffset + if isaddcon(cf) || isaddcon(-cf) { return C_AACON } + if isaddcon2(cf) { + return C_AACON2 + } + return C_LACON case obj.TYPE_BRANCH: @@ -2046,7 +2055,7 @@ func cmp(a int, b int) bool { return cmp(C_LCON, b) case C_LACON: - if b == C_AACON { + if b == C_AACON || b == C_AACON2 { return true } @@ -3062,11 +3071,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(r&31) << 5) | uint32(rt&31) - case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R */ - o1 = c.opirr(p, p.As) - + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R; mov $addcon2, R */ rt := int(p.To.Reg) r := int(o.param) + if r == 0 { r = REGZERO } else if r == REGFROM { @@ -3075,13 +3083,23 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if r == 0 { r = REGSP } + v := int32(c.regoff(&p.From)) - if (v & 0xFFF000) != 0 { - v >>= 12 - o1 |= 1 << 22 /* shift, by 12 */ + var op int32 + if v < 0 { + v = -v + op = int32(c.opirr(p, ASUB)) + } else { + op = int32(c.opirr(p, AADD)) + } + + if int(o.size) == 8 { + o1 = c.oaddi(p, op, v&0xfff000, r, REGTMP) + o2 = c.oaddi(p, op, v&0x000fff, REGTMP, rt) + break } - o1 |= ((uint32(v) & 0xFFF) << 10) | (uint32(r&31) << 5) | uint32(rt&31) + o1 = c.oaddi(p, op, v, r, rt) case 5: /* b s; bl s */ o1 = c.opbra(p, p.As) -- cgit v1.3 From 967465da2975fe4322080703ce5a77ea90752829 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 31 Aug 2020 09:43:40 -0400 Subject: cmd/compile: use combined shifts to improve array addressing on ppc64x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 + src/cmd/compile/internal/ppc64/ssa.go | 36 ++ src/cmd/compile/internal/ssa/gen/PPC64.rules | 63 ++- src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 5 + src/cmd/compile/internal/ssa/opGen.go | 45 ++ src/cmd/compile/internal/ssa/rewrite.go | 38 ++ src/cmd/compile/internal/ssa/rewritePPC64.go | 787 +++++++++++++++++++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 4 + src/cmd/internal/obj/ppc64/anames.go | 4 + src/cmd/internal/obj/ppc64/asm9.go | 74 ++- test/codegen/shift.go | 55 ++ 11 files changed, 1087 insertions(+), 28 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 10a05ec402..e26f6f8933 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -284,6 +284,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICLCC $0, R4, $15, R6 // 788603c1 RLDICR $0, R4, $15, R6 // 788603c4 RLDICRCC $0, R4, $15, R6 // 788603c5 + RLDIC $0, R4, $15, R6 // 788603c8 + RLDICCC $0, R4, $15, R6 // 788603c9 + CLRLSLWI $16, R5, $8, R4 // 54a4861e + CLRLSLDI $2, R4, $24, R3 // 78831588 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index f8d9ac2379..4a83a0bdd7 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -565,6 +565,42 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p = s.Prog(obj.ANOP) gc.Patch(pbover, p) + case ssa.OpPPC64CLRLSLWI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64CLRLSLDI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + // Mask has been set as sh + case ssa.OpPPC64RLDICL: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index e5fb1e98c2..774d5096de 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -79,6 +79,23 @@ (Abs ...) => (FABS ...) (FMA ...) => (FMADD ...) +// Lowering extension +// Note: we always extend to 64 bits even though some ops don't need that many result bits. +(SignExt8to(16|32|64) ...) => (MOVBreg ...) +(SignExt16to(32|64) ...) => (MOVHreg ...) +(SignExt32to64 ...) => (MOVWreg ...) + +(ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) +(ZeroExt16to(32|64) ...) => (MOVHZreg ...) +(ZeroExt32to64 ...) => (MOVWZreg ...) + +(Trunc(16|32|64)to8 x) && isSigned(t) => (MOVBreg x) +(Trunc(16|32|64)to8 x) => (MOVBZreg x) +(Trunc(32|64)to16 x) && isSigned(t) => (MOVHreg x) +(Trunc(32|64)to16 x) => (MOVHZreg x) +(Trunc64to32 x) && isSigned(t) => (MOVWreg x) +(Trunc64to32 x) => (MOVWZreg x) + // Lowering constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const(32|64)F ...) => (FMOV(S|D)const ...) @@ -780,6 +797,21 @@ (MOVWreg y:(MOVWZreg x)) => (MOVWreg x) (MOVWZreg y:(MOVWreg x)) => (MOVWZreg x) +// Truncate then logical then truncate: omit first, lesser or equal truncate +(MOVWZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVWZreg ((OR|XOR|AND) x y)) +(MOVHZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVHZreg ((OR|XOR|AND) x y)) +(MOVHZreg ((OR|XOR|AND) x (MOVHZreg y))) => (MOVHZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVHZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVBZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) + +(MOV(B|H|W)Zreg z:(ANDconst [c] (MOVBZload ptr x))) => z +(MOVBZreg z:(AND y (MOVBZload ptr x))) => z +(MOV(H|W)Zreg z:(ANDconst [c] (MOVHZload ptr x))) => z +(MOVHZreg z:(AND y (MOVHZload ptr x))) => z +(MOVWZreg z:(ANDconst [c] (MOVWZload ptr x))) => z +(MOVWZreg z:(AND y (MOVWZload ptr x))) => z + // Arithmetic constant ops (ADD x (MOVDconst [c])) && is32Bit(c) => (ADDconst [c] x) @@ -949,23 +981,6 @@ (AtomicAnd8 ...) => (LoweredAtomicAnd8 ...) (AtomicOr8 ...) => (LoweredAtomicOr8 ...) -// Lowering extension -// Note: we always extend to 64 bits even though some ops don't need that many result bits. -(SignExt8to(16|32|64) ...) => (MOVBreg ...) -(SignExt16to(32|64) ...) => (MOVHreg ...) -(SignExt32to64 ...) => (MOVWreg ...) - -(ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) -(ZeroExt16to(32|64) ...) => (MOVHZreg ...) -(ZeroExt32to64 ...) => (MOVWZreg ...) - -(Trunc(16|32|64)to8 x) && isSigned(t) => (MOVBreg x) -(Trunc(16|32|64)to8 x) => (MOVBZreg x) -(Trunc(32|64)to16 x) && isSigned(t) => (MOVHreg x) -(Trunc(32|64)to16 x) => (MOVHZreg x) -(Trunc64to32 x) && isSigned(t) => (MOVWreg x) -(Trunc64to32 x) => (MOVWZreg x) - (Slicemask x) => (SRADconst (NEG x) [63]) // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide @@ -996,6 +1011,20 @@ (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) (MOVWZreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) +// Implement clrsldi and clrslwi extended mnemonics as described in +// ISA 3.0 section C.8. AuxInt field contains values needed for +// the instructions, packed together since there is only one available. +(SLDconst [c] z:(MOVBZreg x)) && c < 8 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) +(SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) +(SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) + +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) +(SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) +(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 37706b2dd9..ed99c40cd2 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -206,6 +206,11 @@ func init() { {name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64 {name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32 + // The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA. + // The constant shift values are packed into the aux int32. + {name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int32"}, // arg0 extract bits identified by shift params" + {name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, // + {name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, // {name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d95943231a..f00dc3f7f5 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1853,6 +1853,9 @@ const ( OpPPC64SLW OpPPC64ROTL OpPPC64ROTLW + OpPPC64RLDICL + OpPPC64CLRLSLWI + OpPPC64CLRLSLDI OpPPC64LoweredAdd64Carry OpPPC64SRADconst OpPPC64SRAWconst @@ -24672,6 +24675,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "RLDICL", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ARLDICL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "CLRLSLWI", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ACLRLSLWI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "CLRLSLDI", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ACLRLSLDI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "LoweredAdd64Carry", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 2ab310ad85..d9c3e455a0 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1325,6 +1325,44 @@ func hasSmallRotate(c *Config) bool { } } +func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { + if sh < 0 || sh >= sz { + panic("PPC64 shift arg sh out of range") + } + if mb < 0 || mb >= sz { + panic("PPC64 shift arg mb out of range") + } + if me < 0 || me >= sz { + panic("PPC64 shift arg me out of range") + } + return int32(sh<<16 | mb<<8 | me) +} + +func GetPPC64Shiftsh(auxint int64) int64 { + return int64(int8(auxint >> 16)) +} + +func GetPPC64Shiftmb(auxint int64) int64 { + return int64(int8(auxint >> 8)) +} + +func GetPPC64Shiftme(auxint int64) int64 { + return int64(int8(auxint)) +} + +// Catch the simple ones first +// TODO: Later catch more cases +func isPPC64ValidShiftMask(v int64) bool { + if ((v + 1) & v) == 0 { + return true + } + return false +} + +func getPPC64ShiftMaskLength(v int64) int64 { + return int64(bits.Len64(uint64(v))) +} + // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. func armBFAuxInt(lsb, width int64) arm64BitField { if lsb < 0 || lsb > 63 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 152cdfdf4d..12b08824b5 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -586,8 +586,12 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ROTLW(v) case OpPPC64SLD: return rewriteValuePPC64_OpPPC64SLD(v) + case OpPPC64SLDconst: + return rewriteValuePPC64_OpPPC64SLDconst(v) case OpPPC64SLW: return rewriteValuePPC64_OpPPC64SLW(v) + case OpPPC64SLWconst: + return rewriteValuePPC64_OpPPC64SLWconst(v) case OpPPC64SRAD: return rewriteValuePPC64_OpPPC64SRAD(v) case OpPPC64SRAW: @@ -6565,6 +6569,255 @@ func rewriteValuePPC64_OpPPC64MOVBZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBZreg (OR x (MOVWZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVWZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVWZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR x (MOVHZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVHZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVHZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR x (MOVBZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVBZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVBZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVBZreg z:(AND y (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVBZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVBZreg x:(MOVBZload _ _)) // result: x for { @@ -8507,6 +8760,197 @@ func rewriteValuePPC64_OpPPC64MOVHZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHZreg (OR x (MOVWZreg y))) + // result: (MOVHZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR x (MOVWZreg y))) + // result: (MOVHZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND x (MOVWZreg y))) + // result: (MOVHZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (OR x (MOVHZreg y))) + // result: (MOVHZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR x (MOVHZreg y))) + // result: (MOVHZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND x (MOVHZreg y))) + // result: (MOVHZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(AND y (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVHZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVHZreg x:(MOVBZload _ _)) // result: x for { @@ -9657,6 +10101,139 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWZreg (OR x (MOVWZreg y))) + // result: (MOVWZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (XOR x (MOVWZreg y))) + // result: (MOVWZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (AND x (MOVWZreg y))) + // result: (MOVWZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVWZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(AND y (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVWZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVWZreg x:(MOVBZload _ _)) // result: x for { @@ -12197,6 +12774,111 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLDconst [c] z:(MOVBZreg x)) + // cond: c < 8 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(c < 8 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 56, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVHZreg x)) + // cond: c < 16 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(c < 16 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 48, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVWZreg x)) + // cond: c < 32 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(c < 32 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12215,6 +12897,111 @@ func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLWconst [c] z:(MOVBZreg x)) + // cond: z.Uses == 1 && c < 8 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 8) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 24, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVHZreg x)) + // cond: z.Uses == 1 && c < 16 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 16) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 16, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVWZreg x)) + // cond: z.Uses == 1 && c < 24 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 24) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 8b32692778..f438803fb5 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -575,6 +575,7 @@ const ( ARLWMICC ARLWNM ARLWNMCC + ACLRLSLWI ASLW ASLWCC ASRW @@ -716,6 +717,9 @@ const ( ARLDCLCC ARLDICL ARLDICLCC + ARLDIC + ARLDICCC + ACLRLSLDI AROTL AROTLW ASLBIA diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index 287011877c..accd87fe00 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -180,6 +180,7 @@ var Anames = []string{ "RLWMICC", "RLWNM", "RLWNMCC", + "CLRLSLWI", "SLW", "SLWCC", "SRW", @@ -312,6 +313,9 @@ var Anames = []string{ "RLDCLCC", "RLDICL", "RLDICLCC", + "RLDIC", + "RLDICCC", + "CLRLSLDI", "ROTL", "ROTLW", "SLBIA", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 98b453de6c..60dda72507 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1904,6 +1904,7 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) + opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -1922,6 +1923,9 @@ func buildop(ctxt *obj.Link) { opset(ARLDICLCC, r0) opset(ARLDICR, r0) opset(ARLDICRCC, r0) + opset(ARLDIC, r0) + opset(ARLDICCC, r0) + opset(ACLRLSLDI, r0) case AFMOVD: opset(AFMOVDCC, r0) @@ -2734,13 +2738,31 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case ARLDICR, ARLDICRCC: me := int(d) sh := c.regoff(&p.From) + if me < 0 || me > 63 || sh > 63 { + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) - case ARLDICL, ARLDICLCC: + case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC: mb := int(d) sh := c.regoff(&p.From) + if mb < 0 || mb > 63 || sh > 63 { + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) + case ACLRLSLDI: + // This is an extended mnemonic defined in the ISA section C.8.1 + // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // It maps onto RLDIC so is directly generated here based on the operands from + // the clrlsldi. + b := int(d) + n := c.regoff(&p.From) + if n > int32(b) || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + } + o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) + default: c.ctxt.Diag("unexpected op in rldc case\n%v", p) a = 0 @@ -3354,18 +3376,43 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case 62: /* rlwmi $sh,s,$mask,a */ v := c.regoff(&p.From) - - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It maps onto rlwinm which is directly generated here. + if v < 0 || v > 32 || b > 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 63: /* rlwmi b,s,$mask,a */ - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + v := c.regoff(&p.From) + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + if v > b || b > 32 { + // Message will match operands from the ISA even though in the + // code it uses 'v' + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It generates the rlwinm directly here. + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 @@ -4277,6 +4324,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ARLDICRCC: return OPVCC(30, 0, 0, 1) | 2<<1 // rldicr. + case ARLDIC: + return OPVCC(30, 0, 0, 0) | 4<<1 // rldic + case ARLDICCC: + return OPVCC(30, 0, 0, 1) | 4<<1 // rldic. + case ASYSCALL: return OPVCC(17, 1, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 5e50ea6bff..32214851b5 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -150,6 +150,61 @@ func lshGuarded64(v int64, s uint) int64 { panic("shift too large") } +func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) { + + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f := tab[byte(v)^b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)&b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)|b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)&h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)^h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)|h] + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + f += tab[v&0xff] + // ppc64le:-".*AND",".*CLRLSLWI" + // ppc64:-".*AND",".*CLRLSLWI" + f += 2*uint32(uint16(d)) + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + g := 2*uint64(uint32(d)) + return f, g +} + +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { + + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f := (v8 &0xF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f += byte(v16)<<3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + g := (v16 & 0xFF) << 3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h := (v32 & 0xFFFFF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h += uint32(v64)<<4 + // ppc64le:-"AND","CLRLSLDI" + // ppc64:-"AND","CLRLSLDI" + i := (v64 & 0xFFFFFFFF) << 5 + return f, g, h, i +} + func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { // ppc64le:-".*MOVW" -- cgit v1.3 From 75fab04b83a832eb84bec9e1f23d395a342c865c Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Thu, 17 Sep 2020 15:31:07 -0400 Subject: cmd/asm: make asm -S flag consistent with compile -S flag Change things so that the -S command line option for the assembler works the same as -S in the compiler, e.g. you can use -S=2 to get additional detail. Change-Id: I7bdfba39a98e67c7ae4b93019e171b188bb99a2d Reviewed-on: https://go-review.googlesource.com/c/go/+/255717 Trust: Than McIntosh Run-TryBot: Than McIntosh Reviewed-by: Cherry Zhang Reviewed-by: David Chase TryBot-Result: Go Bot --- src/cmd/asm/internal/flags/flags.go | 7 ++++--- src/cmd/asm/main.go | 4 +--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index 1df9df9563..64024cc97d 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -17,7 +17,6 @@ import ( var ( Debug = flag.Bool("debug", false, "dump instructions as they are parsed") OutputFile = flag.String("o", "", "output file; default foo.o for /a/b/c/foo.s as first argument") - PrintOut = flag.Bool("S", false, "print assembly and machine code") TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths") Shared = flag.Bool("shared", false, "generate code that can be linked into a shared library") Dynlink = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries") @@ -28,14 +27,16 @@ var ( ) var ( - D MultiFlag - I MultiFlag + D MultiFlag + I MultiFlag + PrintOut int ) func init() { flag.Var(&D, "D", "predefined symbol with optional simple value -D=identifier=value; can be set multiple times") flag.Var(&I, "I", "include directory; can be set multiple times") objabi.AddVersionFlag() // -V + objabi.Flagcount("S", "print assembly and machine code", &PrintOut) } // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2. diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index a6eb44de73..fd079a2ccd 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -35,9 +35,7 @@ func main() { flags.Parse() ctxt := obj.Linknew(architecture.LinkArch) - if *flags.PrintOut { - ctxt.Debugasm = 1 - } + ctxt.Debugasm = flags.PrintOut ctxt.Flag_dynlink = *flags.Dynlink ctxt.Flag_shared = *flags.Shared || *flags.Dynlink ctxt.IsAsm = true -- cgit v1.3 From fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Wed, 16 Sep 2020 14:05:18 +0800 Subject: cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64 The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used 128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector register `Vd`. Because Go does not have 128-bit integers for now, the assembler will report an error of `immediate out of range` when assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction. This patch lets 128-bit integers take two 64-bit operands, for the high and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to move `$hi<<64+$lo' into 128-bit register `Vd`. In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS' and uses them to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively Update the go doc. Fixes #40725 Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a Reviewed-on: https://go-review.googlesource.com/c/go/+/255900 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 18 +++++++++------ src/cmd/asm/internal/asm/asm.go | 21 ++++++++--------- src/cmd/asm/internal/asm/testdata/arm64.s | 6 +++-- src/cmd/internal/obj/arm64/a.out.go | 4 +++- src/cmd/internal/obj/arm64/anames.go | 4 +++- src/cmd/internal/obj/arm64/asm7.go | 38 ++++++++++++++++--------------- src/cmd/internal/obj/arm64/doc.go | 10 ++++++++ 7 files changed, 60 insertions(+), 41 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 3817fcd5c2..e643889aef 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool { return false } +// IsARM64TBL reports whether the op (as defined by an arm64.A* +// constant) is one of the TBL-like instructions and one of its +// inputs does not fit into prog.Reg, so require special handling. +func IsARM64TBL(op obj.As) bool { + switch op { + case arm64.AVTBL, arm64.AVMOVQ: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. @@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) { return 0, false } -// IsARM64TBL reports whether the op (as defined by an arm64.A* -// constant) is one of the table lookup instructions that require special -// handling. -func IsARM64TBL(op obj.As) bool { - return op == arm64.AVTBL -} - // ARM64RegisterExtension parses an ARM64 register with extension or arrangement. func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error { Rnum := (reg & 31) + int16(num<<5) diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 42e217dc23..7878d74549 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -622,8 +622,9 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.SetFrom3(a[1]) prog.To = a[2] case sys.ARM64: - // ARM64 instructions with one input and two outputs. - if arch.IsARM64STLXR(op) { + switch { + case arch.IsARM64STLXR(op): + // ARM64 instructions with one input and two outputs. prog.From = a[0] prog.To = a[1] if a[2].Type != obj.TYPE_REG { @@ -631,20 +632,16 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { return } prog.RegTo2 = a[2].Reg - break - } - if arch.IsARM64TBL(op) { + case arch.IsARM64TBL(op): + // one of its inputs does not fit into prog.Reg. prog.From = a[0] - if a[1].Type != obj.TYPE_REGLIST { - p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1])) - } prog.SetFrom3(a[1]) prog.To = a[2] - break + default: + prog.From = a[0] + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] } - prog.From = a[0] - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] case sys.I386: prog.From = a[0] prog.SetFrom3(a[1]) diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index acfb16b096..e277c04b7c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD $(28.0), F4 // 0490671e // move a large constant to a Vd. - FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 - FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 + VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 + VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 + VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index b3c9e9a18e..1ca41c15ba 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -875,7 +875,9 @@ const ( AFLDPS AFMOVD AFMOVS - AFMOVQ + AVMOVQ + AVMOVD + AVMOVS AFMULD AFMULS AFNEGD diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 48c066abfd..900cdba817 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,7 +381,9 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", - "FMOVQ", + "VMOVQ", + "VMOVD", + "VMOVS", "FMULD", "FMULS", "FNEGD", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index fc2033d689..ee4a33eef4 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 { const ( // Optab.flag LFROM = 1 << 0 // p.From uses constant pool - LTO = 1 << 1 // p.To uses constant pool - NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP + LFROM3 = 1 << 1 // p.From3 uses constant pool + LTO = 1 << 2 // p.To uses constant pool + NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP ) var optab = []Optab{ @@ -397,10 +398,10 @@ var optab = []Optab{ /* load long effective stack address (load int32 offset and add) */ {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, - // Move a large constant to a Vn. - {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("zero-width instruction\n%v", p) } } - switch o.flag & (LFROM | LTO) { - case LFROM: + if o.flag&LFROM != 0 { c.addpool(p, &p.From) - - case LTO: + } + if o.flag&LFROM3 != 0 { + c.addpool(p, p.GetFrom3()) + } + if o.flag<O != 0 { c.addpool(p, &p.To) - break } if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */ @@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { sz := 4 if a.Type == obj.TYPE_CONST { - if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) { - // out of range -0x80000000 ~ 0xffffffff, must store 64-bit + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. t.As = ADWORD sz = 8 } // else store 32-bit @@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AFMOVQ: + case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) - case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ @@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { } else { fp, w := 0, 0 switch as { - case AFMOVS: + case AFMOVS, AVMOVS: fp = 1 w = 0 /* 32-bit SIMD/FP */ - case AFMOVD: + case AFMOVD, AVMOVD: fp = 1 w = 1 /* 64-bit SIMD/FP */ - case AFMOVQ: + case AVMOVQ: fp = 1 w = 2 /* 128-bit SIMD/FP */ diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 7515217544..efd4577f56 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it MOVD $1, R1 RET +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately. + + Examples: + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211 + Special Cases. (1) umov is written as VMOV. -- cgit v1.3 From a424f6e45e29960c933a7ccc1cd8fc9bb2766f15 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 23 Sep 2020 11:06:39 -0400 Subject: cmd/asm,cmd/compile,cmd/internal/obj/ppc64: add extswsli support on power9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the extswsli instruction which combines extsw followed by a shift. New benchmark demonstrates the improvement: name old time/op new time/op delta ExtShift 1.34µs ± 0% 1.30µs ± 0% -3.15% (p=0.057 n=4+3) Change-Id: I21b410676fdf15d20e0cbbaa75d7c6dcd3bbb7b0 Reviewed-on: https://go-review.googlesource.com/c/go/+/257017 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 1 + src/cmd/compile/internal/gc/bench_test.go | 12 ++++ src/cmd/compile/internal/ppc64/ssa.go | 2 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 15 ++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 36 ++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 2 + src/cmd/internal/obj/ppc64/anames.go | 2 + src/cmd/internal/obj/ppc64/asm9.go | 104 +++++++++++++++++---------- test/codegen/shift.go | 7 +- 11 files changed, 142 insertions(+), 42 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index e26f6f8933..88a7609ba8 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -266,6 +266,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 SRDCC R3, R4 // 7c841c37 ROTLW $16, R3, R4 // 5464803e ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index 09aaf428c3..a2887f2f7b 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -20,6 +20,18 @@ func BenchmarkLoadAdd(b *testing.B) { } } +// Added for ppc64 extswsli on power9 +func BenchmarkExtShift(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s ^= int64(x[i]+32) * 8 + } + globl = s + } +} + func BenchmarkModify(b *testing.B) { a := make([]int64, 1024) v := globl diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 4a83a0bdd7..a5fbdaffba 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 774d5096de..de30d003e6 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1025,6 +1025,8 @@ (SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) (SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) (SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +// special case for power9 +(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index ed99c40cd2..28317928a8 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -223,6 +223,7 @@ func init() { {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits + {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1fc0f7ea79..1fe00c7026 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1865,6 +1865,7 @@ const ( OpPPC64SLWconst OpPPC64ROTLconst OpPPC64ROTLWconst + OpPPC64EXTSWSLconst OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24849,6 +24850,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "EXTSWSLconst", + auxType: auxInt64, + argLen: 1, + asm: ppc64.AEXTSWSLI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "CNTLZD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 12b08824b5..29ec3992f2 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12877,6 +12877,24 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } break } + // match: (SLDconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { @@ -13000,6 +13018,24 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } break } + // match: (SLWconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index f438803fb5..4c97302f83 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -733,6 +733,8 @@ const ( ASRAD ASRADCC ASRDCC + AEXTSWSLI + AEXTSWSLICC ASTDCCC ATD diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index accd87fe00..fca4b3e355 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -329,6 +329,8 @@ var Anames = []string{ "SRAD", "SRADCC", "SRDCC", + "EXTSWSLI", + "EXTSWSLICC", "STDCCC", "TD", "DWORD", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 60dda72507..9f06bdf8b3 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -160,6 +160,8 @@ var optab = []Optab{ {ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, {ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0}, {ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0}, {ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0}, @@ -1877,6 +1879,9 @@ func buildop(ctxt *obj.Link) { case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRAWCC, r0) + case AEXTSWSLI: + opset(AEXTSWSLICC, r0) + case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRADCC, r0) @@ -2189,49 +2194,54 @@ func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 { return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5 } +func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 { + return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 +} + func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } const ( /* each rhs is OPVCC(_, _, _, _) */ - OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 - OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 - OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 - OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 - OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 - OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 - OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 - OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 - OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 - OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 - OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 - OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 - OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 - OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 - OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 - OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 - OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 - OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 - OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 - OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 - OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 - OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 - OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 - OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 - OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 - OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 - OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 - OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 - OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 - OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 - OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 - OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 - OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 - OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 - OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 - OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 - OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 + OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 + OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 + OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 + OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 + OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 + OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 + OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 + OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 + OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 + OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 + OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 + OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 + OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 + OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 + OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 + OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 + OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 + OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 + OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 + OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 + OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 + OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 + OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 + OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 + OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 + OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 + OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 + OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 + OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 + OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 + OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 + OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 + OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_EXTSWSLI = 31<<26 | 445<<2 ) func oclass(a *obj.Addr) int { @@ -2965,14 +2975,21 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case AROTL: a = int(0) op = OP_RLDICL + case AEXTSWSLI: + a = int(v) default: c.ctxt.Diag("unexpected op in sldi case\n%v", p) a = 0 o1 = 0 } - o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) - if p.As == ASLDCC || p.As == ASRDCC { + if p.As == AEXTSWSLI || p.As == AEXTSWSLICC { + o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v)) + + } else { + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) + } + if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC { o1 |= 1 // Set the condition code bit } @@ -4350,6 +4367,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ASRADCC: return OPVCC(31, 794, 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + case ASRW: return OPVCC(31, 536, 0, 0) case ASRWCC: @@ -5013,6 +5035,10 @@ func (c *ctxt9) opirr(a obj.As) uint32 { return OPVCC(31, (413 << 1), 0, 0) case ASRADCC: return OPVCC(31, (413 << 1), 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) case ASTSW: return OPVCC(31, 725, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 32214851b5..abc4b091c9 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -182,7 +182,7 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt return f, g } -func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) { // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -202,7 +202,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, u // ppc64le:-"AND","CLRLSLDI" // ppc64:-"AND","CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 - return f, g, h, i + // ppc64le/power9:-"SLD","EXTSWSLI" + // ppc64/power9:-"SLD","EXTSWSLI" + j := int64(x32+32)*8 + return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { -- cgit v1.3 From cc2a5cf4b8b0aeaccd3dd439f8d3d68f25eef358 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 28 Sep 2020 18:20:12 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: fix some shift rules due to a regression A recent change to improve shifts was generating some invalid cases when the rule was based on an AND. The extended mnemonics CLRLSLDI and CLRLSLWI only allow certain values for the operands and in the mask case those values were not being checked properly. This adds a check to those rules to verify that the 'b' and 'n' values used when an AND was part of the rule have correct values. There was a bug in some diag messages in asm9. The message expected 3 values but only provided 2. Those are corrected here also. The test/codegen/shift.go was updated to add a few more cases to check for the case mentioned here. Some of the comments that mention the order of operands in these extended mnemonics were wrong and those have been corrected. Fixes #41683. Change-Id: If5bb860acaa5051b9e0cd80784b2868b85898c31 Reviewed-on: https://go-review.googlesource.com/c/go/+/258138 Run-TryBot: Lynn Boger Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo TryBot-Result: Go Bot Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 ++-- src/cmd/compile/internal/ppc64/ssa.go | 12 +++++----- src/cmd/compile/internal/ssa/gen/PPC64.rules | 9 ++++--- src/cmd/compile/internal/ssa/rewrite.go | 4 ++-- src/cmd/compile/internal/ssa/rewritePPC64.go | 34 +++++++-------------------- src/cmd/internal/obj/ppc64/asm9.go | 35 ++++++++++++++-------------- test/codegen/shift.go | 17 ++++++++------ 7 files changed, 50 insertions(+), 65 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 88a7609ba8..869f8c2d4f 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -287,8 +287,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICRCC $0, R4, $15, R6 // 788603c5 RLDIC $0, R4, $15, R6 // 788603c8 RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $16, R5, $8, R4 // 54a4861e - CLRLSLDI $2, R4, $24, R3 // 78831588 + CLRLSLWI $8, R5, $6, R4 // 54a430b2 + CLRLSLDI $24, R4, $4, R3 // 78832508 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index a5fbdaffba..d83b2df379 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -570,9 +570,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r @@ -582,9 +582,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index de30d003e6..83ee4c499b 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1018,13 +1018,12 @@ (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) -(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) -(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) -(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) -(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) -(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9f4de83a77..5d8b3ddc4e 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1380,8 +1380,8 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } -// Catch the simple ones first -// TODO: Later catch more cases +// This verifies that the mask occupies the +// rightmost bits. func isPPC64ValidShiftMask(v int64) bool { if ((v + 1) & v) == 0 { return true diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 29ec3992f2..9822637b05 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12831,7 +12831,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12841,7 +12841,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLDI) @@ -12850,7 +12850,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12867,7 +12867,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLDI) @@ -12953,26 +12953,8 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { v.AddArg(x) return true } - // match: (SLWconst [c] z:(MOVWZreg x)) - // cond: z.Uses == 1 && c < 24 - // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) - for { - c := auxIntToInt64(v.AuxInt) - z := v_0 - if z.Op != OpPPC64MOVWZreg { - break - } - x := z.Args[0] - if !(z.Uses == 1 && c < 24) { - break - } - v.reset(OpPPC64CLRLSLWI) - v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) - v.AddArg(x) - return true - } // match: (SLWconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12982,7 +12964,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLWI) @@ -12991,7 +12973,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { return true } // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -13008,7 +12990,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLWI) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9f06bdf8b3..928e299f43 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2749,7 +2749,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { me := int(d) sh := c.regoff(&p.From) if me < 0 || me > 63 || sh > 63 { - c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) @@ -2757,19 +2757,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { mb := int(d) sh := c.regoff(&p.From) if mb < 0 || mb > 63 || sh > 63 { - c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) case ACLRLSLDI: // This is an extended mnemonic defined in the ISA section C.8.1 - // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // clrlsldi ra,rs,b,n --> rldic ra,rs,n,b-n // It maps onto RLDIC so is directly generated here based on the operands from // the clrlsldi. - b := int(d) - n := c.regoff(&p.From) - if n > int32(b) || b > 63 { - c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + n := int32(d) + b := c.regoff(&p.From) + if n > b || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p) } o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) @@ -3395,14 +3395,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) + n := c.regoff(p.GetFrom3()) // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It maps onto rlwinm which is directly generated here. - if v < 0 || v > 32 || b > 32 { - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + if n > v || v >= 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) @@ -3414,16 +3415,16 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) - if v > b || b > 32 { + n := c.regoff(p.GetFrom3()) + if n > v || v >= 32 { // Message will match operands from the ISA even though in the // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index abc4b091c9..bbfc85ffbb 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -187,8 +187,8 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" f := (v8 &0xF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" + // ppc64le:"CLRLSLWI" + // ppc64:"CLRLSLWI" f += byte(v16)<<3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -196,12 +196,15 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" h := (v32 & 0xFFFFF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" - h += uint32(v64)<<4 - // ppc64le:-"AND","CLRLSLDI" - // ppc64:-"AND","CLRLSLDI" + // ppc64le:"CLRLSLDI" + // ppc64:"CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 + // ppc64le:-"CLRLSLDI" + // ppc64:-"CLRLSLDI" + i += (v64 & 0xFFFFFFF) << 38 + // ppc64le/power9:-"CLRLSLDI" + // ppc64/power9:-"CLRLSLDI" + i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" j := int64(x32+32)*8 -- cgit v1.3 From fe2cfb74ba6352990f5b41260b99e80f78e4a90a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 1 Oct 2020 14:49:33 -0700 Subject: all: drop 387 support My last 387 CL. So sad ... ... ... ... not! Fixes #40255 Change-Id: I8d4ddb744b234b8adc735db2f7c3c7b6d8bbdfa4 Reviewed-on: https://go-review.googlesource.com/c/go/+/258957 Trust: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/endtoend_test.go | 7 +- src/cmd/compile/internal/gc/float_test.go | 19 -- src/cmd/compile/internal/gc/go.go | 5 - src/cmd/compile/internal/gc/ssa.go | 15 +- src/cmd/compile/internal/ssa/config.go | 6 - src/cmd/compile/internal/ssa/gen/386.rules | 10 +- src/cmd/compile/internal/ssa/gen/386Ops.go | 14 - src/cmd/compile/internal/ssa/opGen.go | 13 - src/cmd/compile/internal/ssa/regalloc.go | 14 - src/cmd/compile/internal/ssa/rewrite386.go | 84 ++---- src/cmd/compile/internal/x86/387.go | 403 ----------------------------- src/cmd/compile/internal/x86/galign.go | 17 +- src/cmd/compile/internal/x86/ssa.go | 2 - src/cmd/dist/build.go | 15 -- src/cmd/dist/buildruntime.go | 2 - src/cmd/dist/cpuid_386.s | 16 -- src/cmd/dist/cpuid_amd64.s | 16 -- src/cmd/dist/cpuid_default.s | 10 - src/cmd/dist/util_gc.go | 12 - src/cmd/dist/util_gccgo.go | 13 - src/cmd/go/alldocs.go | 3 - src/cmd/go/internal/cfg/cfg.go | 3 - src/cmd/go/internal/envcmd/env.go | 5 +- src/cmd/go/internal/help/helpdoc.go | 3 - src/cmd/go/internal/work/exec.go | 4 +- src/cmd/internal/objabi/util.go | 9 +- src/internal/cfg/cfg.go | 1 - src/reflect/all_test.go | 18 -- src/runtime/mkpreempt.go | 33 +-- src/runtime/preempt_386.s | 45 ++-- src/runtime/vlrt.go | 5 +- test/codegen/arithmetic.go | 6 +- test/codegen/floats.go | 19 +- test/codegen/math.go | 2 +- test/codegen/memops.go | 32 +-- test/run.go | 12 +- 36 files changed, 97 insertions(+), 796 deletions(-) delete mode 100644 src/cmd/compile/internal/x86/387.go delete mode 100644 src/cmd/dist/cpuid_386.s delete mode 100644 src/cmd/dist/cpuid_amd64.s delete mode 100644 src/cmd/dist/cpuid_default.s (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 0759b7d10f..15202dc5dc 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -353,12 +353,7 @@ func testErrors(t *testing.T, goarch, file string) { } func Test386EndToEnd(t *testing.T) { - defer func(old string) { objabi.GO386 = old }(objabi.GO386) - for _, go386 := range []string{"387", "sse2"} { - t.Logf("GO386=%v", go386) - objabi.GO386 = go386 - testEndToEnd(t, "386", "386") - } + testEndToEnd(t, "386", "386") } func TestARMEndToEnd(t *testing.T) { diff --git a/src/cmd/compile/internal/gc/float_test.go b/src/cmd/compile/internal/gc/float_test.go index 6ae363be22..c619d25705 100644 --- a/src/cmd/compile/internal/gc/float_test.go +++ b/src/cmd/compile/internal/gc/float_test.go @@ -6,17 +6,9 @@ package gc import ( "math" - "os" - "runtime" "testing" ) -// For GO386=387, make sure fucomi* opcodes are not used -// for comparison operations. -// Note that this test will fail only on a Pentium MMX -// processor (with GOARCH=386 GO386=387), as it just runs -// some code and looks for an unimplemented instruction fault. - //go:noinline func compare1(a, b float64) bool { return a < b @@ -137,9 +129,6 @@ func TestFloatCompareFolded(t *testing.T) { } } -// For GO386=387, make sure fucomi* opcodes are not used -// for float->int conversions. - //go:noinline func cvt1(a float64) uint64 { return uint64(a) @@ -370,14 +359,6 @@ func TestFloat32StoreToLoadConstantFold(t *testing.T) { // are not converted to quiet NaN (qNaN) values during compilation. // See issue #27193 for more information. - // TODO: this method for detecting 387 won't work if the compiler has been - // built using GOARCH=386 GO386=387 and either the target is a different - // architecture or the GO386=387 environment variable is not set when the - // test is run. - if runtime.GOARCH == "386" && os.Getenv("GO386") == "387" { - t.Skip("signaling NaNs are not propagated on 387 (issue #27516)") - } - // signaling NaNs { const nan = uint32(0x7f800001) // sNaN diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 9079ce2afc..2fbdf71055 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -259,7 +259,6 @@ type Arch struct { REGSP int MAXWIDTH int64 - Use387 bool // should 386 backend use 387 FP instructions instead of sse2. SoftFloat bool PadFrame func(int64) int64 @@ -328,10 +327,6 @@ var ( BoundsCheckFunc [ssa.BoundsKindCount]*obj.LSym ExtendCheckFunc [ssa.BoundsKindCount]*obj.LSym - // GO386=387 - ControlWord64trunc, - ControlWord32 *obj.LSym - // Wasm WasmMove, WasmZero, diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 1d50cefe54..32394c4b1a 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -62,9 +62,6 @@ func initssaconfig() { _ = types.NewPtr(types.Errortype) // *error types.NewPtrCacheEnabled = false ssaConfig = ssa.NewConfig(thearch.LinkArch.Name, *types_, Ctxt, Debug['N'] == 0) - if thearch.LinkArch.Name == "386" { - ssaConfig.Set387(thearch.Use387) - } ssaConfig.SoftFloat = thearch.SoftFloat ssaConfig.Race = flag_race ssaCaches = make([]ssa.Cache, nBackendWorkers) @@ -175,10 +172,6 @@ func initssaconfig() { ExtendCheckFunc[ssa.BoundsSlice3CU] = sysvar("panicExtendSlice3CU") } - // GO386=387 runtime definitions - ControlWord64trunc = sysvar("controlWord64trunc") // uint16 - ControlWord32 = sysvar("controlWord32") // uint16 - // Wasm (all asm funcs with special ABIs) WasmMove = sysvar("wasmMove") WasmZero = sysvar("wasmZero") @@ -5946,9 +5939,7 @@ type SSAGenState struct { // bstart remembers where each block starts (indexed by block ID) bstart []*obj.Prog - // 387 port: maps from SSE registers (REG_X?) to 387 registers (REG_F?) - SSEto387 map[int16]int16 - // Some architectures require a 64-bit temporary for FP-related register shuffling. Examples include x86-387, PPC, and Sparc V8. + // Some architectures require a 64-bit temporary for FP-related register shuffling. Examples include PPC and Sparc V8. ScratchFpMem *Node maxarg int64 // largest frame size for arguments to calls made by the function @@ -6115,10 +6106,6 @@ func genssa(f *ssa.Func, pp *Progs) { progToBlock[s.pp.next] = f.Blocks[0] } - if thearch.Use387 { - s.SSEto387 = map[int16]int16{} - } - s.ScratchFpMem = e.scratchFpMem if Ctxt.Flag_locationlists { diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 88a406deb9..649b5ba820 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -38,7 +38,6 @@ type Config struct { useSSE bool // Use SSE for non-float operations useAvg bool // Use optimizations that need Avg* operations useHmul bool // Use optimizations that need Hmul* operations - use387 bool // GO386=387 SoftFloat bool // Race bool // race detector enabled NeedsFpScratch bool // No direct move between GP and FP register sets @@ -387,9 +386,4 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config return c } -func (c *Config) Set387(b bool) { - c.NeedsFpScratch = b - c.use387 = b -} - func (c *Config) Ctxt() *obj.Link { return c.ctxt } diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 4a8244eb27..6a0b87cab4 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -38,10 +38,8 @@ (Xor(32|16|8) ...) => (XORL ...) (Neg(32|16|8) ...) => (NEGL ...) -(Neg32F x) && !config.use387 => (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) -(Neg64F x) && !config.use387 => (PXOR x (MOVSDconst [math.Copysign(0, -1)])) -(Neg32F x) && config.use387 => (FCHS x) -(Neg64F x) && config.use387 => (FCHS x) +(Neg32F x) => (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) +(Neg64F x) => (PXOR x (MOVSDconst [math.Copysign(0, -1)])) (Com(32|16|8) ...) => (NOTL ...) @@ -670,8 +668,8 @@ // Merge load/store to op ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go index ddabde7d3d..737b99c371 100644 --- a/src/cmd/compile/internal/ssa/gen/386Ops.go +++ b/src/cmd/compile/internal/ssa/gen/386Ops.go @@ -51,17 +51,6 @@ var regNames386 = []string{ "SB", } -// Notes on 387 support. -// - The 387 has a weird stack-register setup for floating-point registers. -// We use these registers when SSE registers are not available (when GO386=387). -// - We use the same register names (X0-X7) but they refer to the 387 -// floating-point registers. That way, most of the SSA backend is unchanged. -// - The instruction generation pass maintains an SSE->387 register mapping. -// This mapping is updated whenever the FP stack is pushed or popped so that -// we can always find a given SSE register even when the TOS pointer has changed. -// - To facilitate the mapping from SSE to 387, we enforce that -// every basic block starts and ends with an empty floating-point stack. - func init() { // Make map from reg names to reg integers. if len(regNames386) > 64 { @@ -552,9 +541,6 @@ func init() { {name: "FlagGT_UGT"}, // signed > and unsigned < {name: "FlagGT_ULT"}, // signed > and unsigned > - // Special op for -x on 387 - {name: "FCHS", argLength: 1, reg: fp11}, - // Special ops for PIC floating-point constants. // MOVSXconst1 loads the address of the constant-pool entry into a register. // MOVSXconst2 loads the constant from that address. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 9fe943c2e0..d7d2b24a48 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -536,7 +536,6 @@ const ( Op386FlagLT_UGT Op386FlagGT_UGT Op386FlagGT_ULT - Op386FCHS Op386MOVSSconst1 Op386MOVSDconst1 Op386MOVSSconst2 @@ -6060,18 +6059,6 @@ var opcodeTable = [...]opInfo{ argLen: 0, reg: regInfo{}, }, - { - name: "FCHS", - argLen: 1, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 - }, - outputs: []outputInfo{ - {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 - }, - }, - }, { name: "MOVSSconst1", auxType: auxFloat32, diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 64c6aed3e7..691530ec0b 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -625,9 +625,6 @@ func (s *regAllocState) init(f *Func) { s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch) } } - if s.f.Config.use387 { - s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go) - } // Linear scan register allocation can be influenced by the order in which blocks appear. // Decouple the register allocation order from the generated block order. @@ -1024,9 +1021,6 @@ func (s *regAllocState) regalloc(f *Func) { if phiRegs[i] != noRegister { continue } - if s.f.Config.use387 && v.Type.IsFloat() { - continue // 387 can't handle floats in registers between blocks - } m := s.compatRegs(v.Type) &^ phiUsed &^ s.used if m != 0 { r := pickReg(m) @@ -1528,11 +1522,6 @@ func (s *regAllocState) regalloc(f *Func) { s.freeUseRecords = u } - // Spill any values that can't live across basic block boundaries. - if s.f.Config.use387 { - s.freeRegs(s.f.Config.fpRegMask) - } - // If we are approaching a merge point and we are the primary // predecessor of it, find live values that we use soon after // the merge point and promote them to registers now. @@ -1562,9 +1551,6 @@ func (s *regAllocState) regalloc(f *Func) { continue } v := s.orig[vid] - if s.f.Config.use387 && v.Type.IsFloat() { - continue // 387 can't handle floats in registers between blocks - } m := s.compatRegs(v.Type) &^ s.used if m&^desired.avoid != 0 { m &^= desired.avoid diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index fc1e0541b2..0f08160f44 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -1310,10 +1310,8 @@ func rewriteValue386_Op386ADDLmodify(v *Value) bool { func rewriteValue386_Op386ADDSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (ADDSDload x [off] {sym} ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -1326,7 +1324,7 @@ func rewriteValue386_Op386ADDSD(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } v.reset(Op386ADDSDload) @@ -1395,10 +1393,8 @@ func rewriteValue386_Op386ADDSDload(v *Value) bool { func rewriteValue386_Op386ADDSS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (ADDSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (ADDSSload x [off] {sym} ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -1411,7 +1407,7 @@ func rewriteValue386_Op386ADDSS(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } v.reset(Op386ADDSSload) @@ -2640,10 +2636,8 @@ func rewriteValue386_Op386CMPWload(v *Value) bool { func rewriteValue386_Op386DIVSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (DIVSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (DIVSDload x [off] {sym} ptr mem) for { x := v_0 @@ -2655,7 +2649,7 @@ func rewriteValue386_Op386DIVSD(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } v.reset(Op386DIVSDload) @@ -2722,10 +2716,8 @@ func rewriteValue386_Op386DIVSDload(v *Value) bool { func rewriteValue386_Op386DIVSS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (DIVSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (DIVSSload x [off] {sym} ptr mem) for { x := v_0 @@ -2737,7 +2729,7 @@ func rewriteValue386_Op386DIVSS(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } v.reset(Op386DIVSSload) @@ -6104,10 +6096,8 @@ func rewriteValue386_Op386MULLload(v *Value) bool { func rewriteValue386_Op386MULSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (MULSDload x [off] {sym} ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -6120,7 +6110,7 @@ func rewriteValue386_Op386MULSD(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } v.reset(Op386MULSDload) @@ -6189,10 +6179,8 @@ func rewriteValue386_Op386MULSDload(v *Value) bool { func rewriteValue386_Op386MULSS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (MULSSload x [off] {sym} ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -6205,7 +6193,7 @@ func rewriteValue386_Op386MULSS(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } v.reset(Op386MULSSload) @@ -8187,10 +8175,8 @@ func rewriteValue386_Op386SUBLmodify(v *Value) bool { func rewriteValue386_Op386SUBSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (SUBSDload x [off] {sym} ptr mem) for { x := v_0 @@ -8202,7 +8188,7 @@ func rewriteValue386_Op386SUBSD(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } v.reset(Op386SUBSDload) @@ -8269,10 +8255,8 @@ func rewriteValue386_Op386SUBSDload(v *Value) bool { func rewriteValue386_Op386SUBSS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (SUBSSload x [off] {sym} ptr mem) for { x := v_0 @@ -8284,7 +8268,7 @@ func rewriteValue386_Op386SUBSS(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l)) { + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } v.reset(Op386SUBSSload) @@ -10043,68 +10027,32 @@ func rewriteValue386_OpMove(v *Value) bool { func rewriteValue386_OpNeg32F(v *Value) bool { v_0 := v.Args[0] b := v.Block - config := b.Func.Config typ := &b.Func.Config.Types // match: (Neg32F x) - // cond: !config.use387 // result: (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) for { x := v_0 - if !(!config.use387) { - break - } v.reset(Op386PXOR) v0 := b.NewValue0(v.Pos, Op386MOVSSconst, typ.Float32) v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1))) v.AddArg2(x, v0) return true } - // match: (Neg32F x) - // cond: config.use387 - // result: (FCHS x) - for { - x := v_0 - if !(config.use387) { - break - } - v.reset(Op386FCHS) - v.AddArg(x) - return true - } - return false } func rewriteValue386_OpNeg64F(v *Value) bool { v_0 := v.Args[0] b := v.Block - config := b.Func.Config typ := &b.Func.Config.Types // match: (Neg64F x) - // cond: !config.use387 // result: (PXOR x (MOVSDconst [math.Copysign(0, -1)])) for { x := v_0 - if !(!config.use387) { - break - } v.reset(Op386PXOR) v0 := b.NewValue0(v.Pos, Op386MOVSDconst, typ.Float64) v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1)) v.AddArg2(x, v0) return true } - // match: (Neg64F x) - // cond: config.use387 - // result: (FCHS x) - for { - x := v_0 - if !(config.use387) { - break - } - v.reset(Op386FCHS) - v.AddArg(x) - return true - } - return false } func rewriteValue386_OpNeq16(v *Value) bool { v_1 := v.Args[1] diff --git a/src/cmd/compile/internal/x86/387.go b/src/cmd/compile/internal/x86/387.go deleted file mode 100644 index 594adb2cd5..0000000000 --- a/src/cmd/compile/internal/x86/387.go +++ /dev/null @@ -1,403 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package x86 - -import ( - "cmd/compile/internal/gc" - "cmd/compile/internal/ssa" - "cmd/compile/internal/types" - "cmd/internal/obj" - "cmd/internal/obj/x86" - "math" -) - -// Generates code for v using 387 instructions. -func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) { - // The SSA compiler pretends that it has an SSE backend. - // If we don't have one of those, we need to translate - // all the SSE ops to equivalent 387 ops. That's what this - // function does. - - switch v.Op { - case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: - iv := uint64(v.AuxInt) - if iv == 0x0000000000000000 { // +0.0 - s.Prog(x86.AFLDZ) - } else if iv == 0x3ff0000000000000 { // +1.0 - s.Prog(x86.AFLD1) - } else if iv == 0x8000000000000000 { // -0.0 - s.Prog(x86.AFLDZ) - s.Prog(x86.AFCHS) - } else if iv == 0xbff0000000000000 { // -1.0 - s.Prog(x86.AFLD1) - s.Prog(x86.AFCHS) - } else if iv == 0x400921fb54442d18 { // +pi - s.Prog(x86.AFLDPI) - } else if iv == 0xc00921fb54442d18 { // -pi - s.Prog(x86.AFLDPI) - s.Prog(x86.AFCHS) - } else { // others - p := s.Prog(loadPush(v.Type)) - p.From.Type = obj.TYPE_FCONST - p.From.Val = math.Float64frombits(iv) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - } - popAndSave(s, v) - - case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: - p := s.Prog(loadPush(v.Type)) - p.From.Type = obj.TYPE_MEM - p.From.Reg = v.Args[0].Reg() - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - popAndSave(s, v) - - case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8: - p := s.Prog(loadPush(v.Type)) - p.From.Type = obj.TYPE_MEM - p.From.Reg = v.Args[0].Reg() - gc.AddAux(&p.From, v) - switch v.Op { - case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: - p.From.Scale = 1 - p.From.Index = v.Args[1].Reg() - if p.From.Index == x86.REG_SP { - p.From.Reg, p.From.Index = p.From.Index, p.From.Reg - } - case ssa.Op386MOVSSloadidx4: - p.From.Scale = 4 - p.From.Index = v.Args[1].Reg() - case ssa.Op386MOVSDloadidx8: - p.From.Scale = 8 - p.From.Index = v.Args[1].Reg() - } - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - popAndSave(s, v) - - case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore: - // Push to-be-stored value on top of stack. - push(s, v.Args[1]) - - // Pop and store value. - var op obj.As - switch v.Op { - case ssa.Op386MOVSSstore: - op = x86.AFMOVFP - case ssa.Op386MOVSDstore: - op = x86.AFMOVDP - } - p := s.Prog(op) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_MEM - p.To.Reg = v.Args[0].Reg() - gc.AddAux(&p.To, v) - - case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8: - push(s, v.Args[2]) - var op obj.As - switch v.Op { - case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4: - op = x86.AFMOVFP - case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8: - op = x86.AFMOVDP - } - p := s.Prog(op) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_MEM - p.To.Reg = v.Args[0].Reg() - gc.AddAux(&p.To, v) - switch v.Op { - case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: - p.To.Scale = 1 - p.To.Index = v.Args[1].Reg() - if p.To.Index == x86.REG_SP { - p.To.Reg, p.To.Index = p.To.Index, p.To.Reg - } - case ssa.Op386MOVSSstoreidx4: - p.To.Scale = 4 - p.To.Index = v.Args[1].Reg() - case ssa.Op386MOVSDstoreidx8: - p.To.Scale = 8 - p.To.Index = v.Args[1].Reg() - } - - case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, - ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD: - if v.Reg() != v.Args[0].Reg() { - v.Fatalf("input[0] and output not in same register %s", v.LongString()) - } - - // Push arg1 on top of stack - push(s, v.Args[1]) - - // Set precision if needed. 64 bits is the default. - switch v.Op { - case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: - // Save AX so we can use it as scratch space. - p := s.Prog(x86.AMOVL) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_AX - s.AddrScratch(&p.To) - // Install a 32-bit version of the control word. - installControlWord(s, gc.ControlWord32, x86.REG_AX) - // Restore AX. - p = s.Prog(x86.AMOVL) - s.AddrScratch(&p.From) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_AX - } - - var op obj.As - switch v.Op { - case ssa.Op386ADDSS, ssa.Op386ADDSD: - op = x86.AFADDDP - case ssa.Op386SUBSS, ssa.Op386SUBSD: - op = x86.AFSUBDP - case ssa.Op386MULSS, ssa.Op386MULSD: - op = x86.AFMULDP - case ssa.Op386DIVSS, ssa.Op386DIVSD: - op = x86.AFDIVDP - } - p := s.Prog(op) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_REG - p.To.Reg = s.SSEto387[v.Reg()] + 1 - - // Restore precision if needed. - switch v.Op { - case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: - restoreControlWord(s) - } - - case ssa.Op386UCOMISS, ssa.Op386UCOMISD: - push(s, v.Args[0]) - - // Compare. - p := s.Prog(x86.AFUCOMP) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_REG - p.To.Reg = s.SSEto387[v.Args[1].Reg()] + 1 - - // Save AX. - p = s.Prog(x86.AMOVL) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_AX - s.AddrScratch(&p.To) - - // Move status word into AX. - p = s.Prog(x86.AFSTSW) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_AX - - // Then move the flags we need to the integer flags. - s.Prog(x86.ASAHF) - - // Restore AX. - p = s.Prog(x86.AMOVL) - s.AddrScratch(&p.From) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_AX - - case ssa.Op386SQRTSD: - push(s, v.Args[0]) - s.Prog(x86.AFSQRT) - popAndSave(s, v) - - case ssa.Op386FCHS: - push(s, v.Args[0]) - s.Prog(x86.AFCHS) - popAndSave(s, v) - - case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD: - p := s.Prog(x86.AMOVL) - p.From.Type = obj.TYPE_REG - p.From.Reg = v.Args[0].Reg() - s.AddrScratch(&p.To) - p = s.Prog(x86.AFMOVL) - s.AddrScratch(&p.From) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - popAndSave(s, v) - - case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL: - push(s, v.Args[0]) - - // Load control word which truncates (rounds towards zero). - installControlWord(s, gc.ControlWord64trunc, v.Reg()) - - // Now do the conversion. - p := s.Prog(x86.AFMOVLP) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - s.AddrScratch(&p.To) - p = s.Prog(x86.AMOVL) - s.AddrScratch(&p.From) - p.To.Type = obj.TYPE_REG - p.To.Reg = v.Reg() - - // Restore control word. - restoreControlWord(s) - - case ssa.Op386CVTSS2SD: - // float32 -> float64 is a nop - push(s, v.Args[0]) - popAndSave(s, v) - - case ssa.Op386CVTSD2SS: - // Round to nearest float32. - push(s, v.Args[0]) - p := s.Prog(x86.AFMOVFP) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - s.AddrScratch(&p.To) - p = s.Prog(x86.AFMOVF) - s.AddrScratch(&p.From) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - popAndSave(s, v) - - case ssa.OpLoadReg: - if !v.Type.IsFloat() { - ssaGenValue(s, v) - return - } - // Load+push the value we need. - p := s.Prog(loadPush(v.Type)) - gc.AddrAuto(&p.From, v.Args[0]) - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - // Move the value to its assigned register. - popAndSave(s, v) - - case ssa.OpStoreReg: - if !v.Type.IsFloat() { - ssaGenValue(s, v) - return - } - push(s, v.Args[0]) - var op obj.As - switch v.Type.Size() { - case 4: - op = x86.AFMOVFP - case 8: - op = x86.AFMOVDP - } - p := s.Prog(op) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - gc.AddrAuto(&p.To, v) - - case ssa.OpCopy: - if !v.Type.IsFloat() { - ssaGenValue(s, v) - return - } - push(s, v.Args[0]) - popAndSave(s, v) - - case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter: - flush387(s) // Calls must empty the FP stack. - fallthrough // then issue the call as normal - default: - ssaGenValue(s, v) - } -} - -// push pushes v onto the floating-point stack. v must be in a register. -func push(s *gc.SSAGenState, v *ssa.Value) { - p := s.Prog(x86.AFMOVD) - p.From.Type = obj.TYPE_REG - p.From.Reg = s.SSEto387[v.Reg()] - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 -} - -// popAndSave pops a value off of the floating-point stack and stores -// it in the register assigned to v. -func popAndSave(s *gc.SSAGenState, v *ssa.Value) { - r := v.Reg() - if _, ok := s.SSEto387[r]; ok { - // Pop value, write to correct register. - p := s.Prog(x86.AFMOVDP) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_REG - p.To.Reg = s.SSEto387[v.Reg()] + 1 - } else { - // Don't actually pop value. This 387 register is now the - // new home for the not-yet-assigned-a-home SSE register. - // Increase the register mapping of all other registers by one. - for rSSE, r387 := range s.SSEto387 { - s.SSEto387[rSSE] = r387 + 1 - } - s.SSEto387[r] = x86.REG_F0 - } -} - -// loadPush returns the opcode for load+push of the given type. -func loadPush(t *types.Type) obj.As { - if t.Size() == 4 { - return x86.AFMOVF - } - return x86.AFMOVD -} - -// flush387 removes all entries from the 387 floating-point stack. -func flush387(s *gc.SSAGenState) { - for k := range s.SSEto387 { - p := s.Prog(x86.AFMOVDP) - p.From.Type = obj.TYPE_REG - p.From.Reg = x86.REG_F0 - p.To.Type = obj.TYPE_REG - p.To.Reg = x86.REG_F0 - delete(s.SSEto387, k) - } -} - -func ssaGenBlock387(s *gc.SSAGenState, b, next *ssa.Block) { - // Empty the 387's FP stack before the block ends. - flush387(s) - - ssaGenBlock(s, b, next) -} - -// installControlWord saves the current floating-point control -// word and installs a new one loaded from cw. -// scratchReg must be an unused register. -// This call must be paired with restoreControlWord. -// Bytes 4-5 of the scratch space (s.AddrScratch) are used between -// this call and restoreControlWord. -func installControlWord(s *gc.SSAGenState, cw *obj.LSym, scratchReg int16) { - // Save current control word. - p := s.Prog(x86.AFSTCW) - s.AddrScratch(&p.To) - p.To.Offset += 4 - - // Materialize address of new control word. - // Note: this must be a seperate instruction to handle PIE correctly. - // See issue 41503. - p = s.Prog(x86.ALEAL) - p.From.Type = obj.TYPE_MEM - p.From.Name = obj.NAME_EXTERN - p.From.Sym = cw - p.To.Type = obj.TYPE_REG - p.To.Reg = scratchReg - - // Load replacement control word. - p = s.Prog(x86.AFLDCW) - p.From.Type = obj.TYPE_MEM - p.From.Reg = scratchReg -} -func restoreControlWord(s *gc.SSAGenState) { - p := s.Prog(x86.AFLDCW) - s.AddrScratch(&p.From) - p.From.Offset += 4 -} diff --git a/src/cmd/compile/internal/x86/galign.go b/src/cmd/compile/internal/x86/galign.go index 56c6989d93..2d20b6a6d0 100644 --- a/src/cmd/compile/internal/x86/galign.go +++ b/src/cmd/compile/internal/x86/galign.go @@ -7,26 +7,13 @@ package x86 import ( "cmd/compile/internal/gc" "cmd/internal/obj/x86" - "cmd/internal/objabi" - "fmt" - "os" ) func Init(arch *gc.Arch) { arch.LinkArch = &x86.Link386 arch.REGSP = x86.REGSP - switch v := objabi.GO386; v { - case "387": - arch.Use387 = true - arch.SSAGenValue = ssaGenValue387 - arch.SSAGenBlock = ssaGenBlock387 - case "sse2": - arch.SSAGenValue = ssaGenValue - arch.SSAGenBlock = ssaGenBlock - default: - fmt.Fprintf(os.Stderr, "unsupported setting GO386=%s\n", v) - gc.Exit(1) - } + arch.SSAGenValue = ssaGenValue + arch.SSAGenBlock = ssaGenBlock arch.MAXWIDTH = (1 << 32) - 1 arch.ZeroRange = zerorange diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go index c21ac32297..74a4570770 100644 --- a/src/cmd/compile/internal/x86/ssa.go +++ b/src/cmd/compile/internal/x86/ssa.go @@ -852,8 +852,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers gc.Warnl(v.Pos, "generated nil check") } - case ssa.Op386FCHS: - v.Fatalf("FCHS in non-387 mode") case ssa.OpClobber: p := s.Prog(x86.AMOVL) p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index 3ac742fa55..5d62c1e8fa 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -30,7 +30,6 @@ var ( gohostos string goos string goarm string - go386 string gomips string gomips64 string goppc64 string @@ -142,16 +141,6 @@ func xinit() { } goarm = b - b = os.Getenv("GO386") - if b == "" { - if cansse2() { - b = "sse2" - } else { - b = "387" - } - } - go386 = b - b = os.Getenv("GOMIPS") if b == "" { b = "hardfloat" @@ -223,7 +212,6 @@ func xinit() { defaultldso = os.Getenv("GO_LDSO") // For tools being invoked but also for os.ExpandEnv. - os.Setenv("GO386", go386) os.Setenv("GOARCH", goarch) os.Setenv("GOARM", goarm) os.Setenv("GOHOSTARCH", gohostarch) @@ -1165,9 +1153,6 @@ func cmdenv() { if goarch == "arm" { xprintf(format, "GOARM", goarm) } - if goarch == "386" { - xprintf(format, "GO386", go386) - } if goarch == "mips" || goarch == "mipsle" { xprintf(format, "GOMIPS", gomips) } diff --git a/src/cmd/dist/buildruntime.go b/src/cmd/dist/buildruntime.go index 2744951597..67d1d72db4 100644 --- a/src/cmd/dist/buildruntime.go +++ b/src/cmd/dist/buildruntime.go @@ -41,7 +41,6 @@ func mkzversion(dir, file string) { // package objabi // // const defaultGOROOT = -// const defaultGO386 = // const defaultGOARM = // const defaultGOMIPS = // const defaultGOMIPS64 = @@ -70,7 +69,6 @@ func mkzbootstrap(file string) { fmt.Fprintln(&buf) fmt.Fprintf(&buf, "import \"runtime\"\n") fmt.Fprintln(&buf) - fmt.Fprintf(&buf, "const defaultGO386 = `%s`\n", go386) fmt.Fprintf(&buf, "const defaultGOARM = `%s`\n", goarm) fmt.Fprintf(&buf, "const defaultGOMIPS = `%s`\n", gomips) fmt.Fprintf(&buf, "const defaultGOMIPS64 = `%s`\n", gomips64) diff --git a/src/cmd/dist/cpuid_386.s b/src/cmd/dist/cpuid_386.s deleted file mode 100644 index 65fbb2dcb7..0000000000 --- a/src/cmd/dist/cpuid_386.s +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !gccgo - -TEXT ·cpuid(SB),$0-8 - MOVL ax+4(FP), AX - CPUID - MOVL info+0(FP), DI - MOVL AX, 0(DI) - MOVL BX, 4(DI) - MOVL CX, 8(DI) - MOVL DX, 12(DI) - RET - diff --git a/src/cmd/dist/cpuid_amd64.s b/src/cmd/dist/cpuid_amd64.s deleted file mode 100644 index ea0b9d4dc9..0000000000 --- a/src/cmd/dist/cpuid_amd64.s +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !gccgo - -TEXT ·cpuid(SB),$0-12 - MOVL ax+8(FP), AX - CPUID - MOVQ info+0(FP), DI - MOVL AX, 0(DI) - MOVL BX, 4(DI) - MOVL CX, 8(DI) - MOVL DX, 12(DI) - RET - diff --git a/src/cmd/dist/cpuid_default.s b/src/cmd/dist/cpuid_default.s deleted file mode 100644 index 6412a507a9..0000000000 --- a/src/cmd/dist/cpuid_default.s +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !386,!amd64,!gccgo - -#include "textflag.h" - -TEXT ·cpuid(SB),NOSPLIT,$0-0 - RET diff --git a/src/cmd/dist/util_gc.go b/src/cmd/dist/util_gc.go index 698beef704..17a0e6fbb5 100644 --- a/src/cmd/dist/util_gc.go +++ b/src/cmd/dist/util_gc.go @@ -6,18 +6,6 @@ package main -func cpuid(info *[4]uint32, ax uint32) - -func cansse2() bool { - if gohostarch != "386" && gohostarch != "amd64" { - return false - } - - var info [4]uint32 - cpuid(&info, 1) - return info[3]&(1<<26) != 0 // SSE2 -} - // useVFPv1 tries to execute one VFPv1 instruction on ARM. // It will crash the current process if VFPv1 is missing. func useVFPv1() diff --git a/src/cmd/dist/util_gccgo.go b/src/cmd/dist/util_gccgo.go index f9f01dc048..dc897236fb 100644 --- a/src/cmd/dist/util_gccgo.go +++ b/src/cmd/dist/util_gccgo.go @@ -6,19 +6,6 @@ package main -/* -int supports_sse2() { -#if defined(__i386__) || defined(__x86_64__) - return __builtin_cpu_supports("sse2"); -#else - return 0; -#endif -} -*/ -import "C" - -func cansse2() bool { return C.supports_sse2() != 0 } - func useVFPv1() {} func useVFPv3() {} diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index 4bc87008ff..500682ed02 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -1853,9 +1853,6 @@ // GOARM // For GOARCH=arm, the ARM architecture for which to compile. // Valid values are 5, 6, 7. -// GO386 -// For GOARCH=386, the floating point instruction set. -// Valid values are 387, sse2. // GOMIPS // For GOARCH=mips{,le}, whether to use floating point instructions. // Valid values are hardfloat (default), softfloat. diff --git a/src/cmd/go/internal/cfg/cfg.go b/src/cmd/go/internal/cfg/cfg.go index 9bf1db73ef..ebbaf04115 100644 --- a/src/cmd/go/internal/cfg/cfg.go +++ b/src/cmd/go/internal/cfg/cfg.go @@ -244,7 +244,6 @@ var ( // Used in envcmd.MkEnv and build ID computations. GOARM = envOr("GOARM", fmt.Sprint(objabi.GOARM)) - GO386 = envOr("GO386", objabi.GO386) GOMIPS = envOr("GOMIPS", objabi.GOMIPS) GOMIPS64 = envOr("GOMIPS64", objabi.GOMIPS64) GOPPC64 = envOr("GOPPC64", fmt.Sprintf("%s%d", "power", objabi.GOPPC64)) @@ -268,8 +267,6 @@ func GetArchEnv() (key, val string) { switch Goarch { case "arm": return "GOARM", GOARM - case "386": - return "GO386", GO386 case "mips", "mipsle": return "GOMIPS", GOMIPS case "mips64", "mips64le": diff --git a/src/cmd/go/internal/envcmd/env.go b/src/cmd/go/internal/envcmd/env.go index 7bd75f7305..ee0bb0d0b2 100644 --- a/src/cmd/go/internal/envcmd/env.go +++ b/src/cmd/go/internal/envcmd/env.go @@ -497,7 +497,10 @@ func lineToKey(line string) string { } // sortKeyValues sorts a sequence of lines by key. -// It differs from sort.Strings in that GO386= sorts after GO=. +// It differs from sort.Strings in that keys which are GOx where x is an ASCII +// character smaller than = sort after GO=. +// (There are no such keys currently. It used to matter for GO386 which was +// removed in Go 1.16.) func sortKeyValues(lines []string) { sort.Slice(lines, func(i, j int) bool { return lineToKey(lines[i]) < lineToKey(lines[j]) diff --git a/src/cmd/go/internal/help/helpdoc.go b/src/cmd/go/internal/help/helpdoc.go index 0ae5fd7ca9..befa10a0e4 100644 --- a/src/cmd/go/internal/help/helpdoc.go +++ b/src/cmd/go/internal/help/helpdoc.go @@ -581,9 +581,6 @@ Architecture-specific environment variables: GOARM For GOARCH=arm, the ARM architecture for which to compile. Valid values are 5, 6, 7. - GO386 - For GOARCH=386, the floating point instruction set. - Valid values are 387, sse2. GOMIPS For GOARCH=mips{,le}, whether to use floating point instructions. Valid values are hardfloat (default), softfloat. diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go index 51fc2b588d..e68b322c7d 100644 --- a/src/cmd/go/internal/work/exec.go +++ b/src/cmd/go/internal/work/exec.go @@ -271,7 +271,7 @@ func (b *Builder) buildActionID(a *Action) cache.ActionID { fmt.Fprintf(h, "asm %q %q %q\n", b.toolID("asm"), forcedAsmflags, p.Internal.Asmflags) } - // GO386, GOARM, GOMIPS, etc. + // GOARM, GOMIPS, etc. key, val := cfg.GetArchEnv() fmt.Fprintf(h, "%s=%s\n", key, val) @@ -1175,7 +1175,7 @@ func (b *Builder) printLinkerConfig(h io.Writer, p *load.Package) { fmt.Fprintf(h, "linkflags %q\n", p.Internal.Ldflags) } - // GO386, GOARM, GOMIPS, etc. + // GOARM, GOMIPS, etc. key, val := cfg.GetArchEnv() fmt.Fprintf(h, "%s=%s\n", key, val) diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go index b81b73a022..cedb2d0a26 100644 --- a/src/cmd/internal/objabi/util.go +++ b/src/cmd/internal/objabi/util.go @@ -24,7 +24,6 @@ var ( GOROOT = envOr("GOROOT", defaultGOROOT) GOARCH = envOr("GOARCH", defaultGOARCH) GOOS = envOr("GOOS", defaultGOOS) - GO386 = envOr("GO386", defaultGO386) GOAMD64 = goamd64() GOARM = goarm() GOMIPS = gomips() @@ -136,6 +135,14 @@ func init() { if GOARCH != "amd64" { Regabi_enabled = 0 } + + if v := os.Getenv("GO386"); v != "" && v != "sse2" { + msg := fmt.Sprintf("unsupported setting GO386=%s", v) + if v == "387" { + msg += ". 387 support was dropped in Go 1.16. Consider using gccgo instead." + } + log.Fatal(msg) + } } // Note: must agree with runtime.framepointer_enabled. diff --git a/src/internal/cfg/cfg.go b/src/internal/cfg/cfg.go index bdbe9df3e7..023429e441 100644 --- a/src/internal/cfg/cfg.go +++ b/src/internal/cfg/cfg.go @@ -32,7 +32,6 @@ const KnownEnv = ` FC GCCGO GO111MODULE - GO386 GOARCH GOARM GOBIN diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index ec87ec0c8a..0684eab973 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -4265,24 +4265,6 @@ var gFloat32 float32 func TestConvertNaNs(t *testing.T) { const snan uint32 = 0x7f800001 - - // Test to see if a store followed by a load of a signaling NaN - // maintains the signaling bit. The only platform known to fail - // this test is 386,GO386=387. The real test below will always fail - // if the platform can't even store+load a float without mucking - // with the bits. - gFloat32 = math.Float32frombits(snan) - runtime.Gosched() // make sure we don't optimize the store/load away - r := math.Float32bits(gFloat32) - if r != snan { - // This should only happen on 386,GO386=387. We have no way to - // test for 387, so we just make sure we're at least on 386. - if runtime.GOARCH != "386" { - t.Errorf("store/load of sNaN not faithful") - } - t.Skip("skipping test, float store+load not faithful") - } - type myFloat32 float32 x := V(myFloat32(math.Float32frombits(snan))) y := x.Convert(TypeOf(float32(0))) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index c2e14cdcd6..c5bfb0f207 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -190,40 +190,25 @@ func (l *layout) restore() { func gen386() { p("PUSHFL") - // Save general purpose registers. + // Assign stack offsets. var l = layout{sp: "SP"} for _, reg := range regNames386 { - if reg == "SP" || strings.HasPrefix(reg, "X") { + if reg == "SP" { continue } - l.add("MOVL", reg, 4) - } - - // Save the 387 state. - l.addSpecial( - "FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)", - "FRSTOR %d(SP)", - 108) - - // Save SSE state only if supported. - lSSE := layout{stack: l.stack, sp: "SP"} - for i := 0; i < 8; i++ { - lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16) + if strings.HasPrefix(reg, "X") { + l.add("MOVUPS", reg, 16) + } else { + l.add("MOVL", reg, 4) + } } - p("ADJSP $%d", lSSE.stack) + p("ADJSP $%d", l.stack) p("NOP SP") l.save() - p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse") - lSSE.save() - label("nosse:") p("CALL ·asyncPreempt2(SB)") - p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2") - lSSE.restore() - label("nosse2:") l.restore() - p("ADJSP $%d", -lSSE.stack) - + p("ADJSP $%d", -l.stack) p("POPFL") p("RET") } diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index a00ac8f385..5c9b8ea224 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -5,7 +5,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHFL - ADJSP $264 + ADJSP $156 NOP SP MOVL AX, 0(SP) MOVL CX, 4(SP) @@ -14,32 +14,23 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVL BP, 16(SP) MOVL SI, 20(SP) MOVL DI, 24(SP) - FSAVE 28(SP) - FLDCW runtime·controlWord64(SB) - CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 - JNE nosse - MOVUPS X0, 136(SP) - MOVUPS X1, 152(SP) - MOVUPS X2, 168(SP) - MOVUPS X3, 184(SP) - MOVUPS X4, 200(SP) - MOVUPS X5, 216(SP) - MOVUPS X6, 232(SP) - MOVUPS X7, 248(SP) -nosse: + MOVUPS X0, 28(SP) + MOVUPS X1, 44(SP) + MOVUPS X2, 60(SP) + MOVUPS X3, 76(SP) + MOVUPS X4, 92(SP) + MOVUPS X5, 108(SP) + MOVUPS X6, 124(SP) + MOVUPS X7, 140(SP) CALL ·asyncPreempt2(SB) - CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 - JNE nosse2 - MOVUPS 248(SP), X7 - MOVUPS 232(SP), X6 - MOVUPS 216(SP), X5 - MOVUPS 200(SP), X4 - MOVUPS 184(SP), X3 - MOVUPS 168(SP), X2 - MOVUPS 152(SP), X1 - MOVUPS 136(SP), X0 -nosse2: - FRSTOR 28(SP) + MOVUPS 140(SP), X7 + MOVUPS 124(SP), X6 + MOVUPS 108(SP), X5 + MOVUPS 92(SP), X4 + MOVUPS 76(SP), X3 + MOVUPS 60(SP), X2 + MOVUPS 44(SP), X1 + MOVUPS 28(SP), X0 MOVL 24(SP), DI MOVL 20(SP), SI MOVL 16(SP), BP @@ -47,6 +38,6 @@ nosse2: MOVL 8(SP), DX MOVL 4(SP), CX MOVL 0(SP), AX - ADJSP $-264 + ADJSP $-156 POPFL RET diff --git a/src/runtime/vlrt.go b/src/runtime/vlrt.go index 38e0b32801..996c0611fd 100644 --- a/src/runtime/vlrt.go +++ b/src/runtime/vlrt.go @@ -263,7 +263,7 @@ func slowdodiv(n, d uint64) (q, r uint64) { return q, n } -// Floating point control word values for GOARCH=386 GO386=387. +// Floating point control word values. // Bits 0-5 are bits to disable floating-point exceptions. // Bits 8-9 are the precision control: // 0 = single precision a.k.a. float32 @@ -273,6 +273,5 @@ func slowdodiv(n, d uint64) (q, r uint64) { // 3 = round toward zero var ( controlWord64 uint16 = 0x3f + 2<<8 + 0<<10 - controlWord32 = 0x3f + 0<<8 + 0<<10 - controlWord64trunc = 0x3f + 2<<8 + 3<<10 + controlWord64trunc uint16 = 0x3f + 2<<8 + 3<<10 ) diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 0bdb66a376..30f39a8da1 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -125,7 +125,7 @@ func Mul_n120(n int) int { func MulMemSrc(a []uint32, b []float32) { // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+` a[0] *= a[1] - // 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+` + // 386:`MULSS\s4\([A-Z]+\),\sX[0-9]+` // amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+` b[0] *= b[1] } @@ -167,7 +167,7 @@ func MergeMuls5(a, n int) int { // -------------- // func DivMemSrc(a []float64) { - // 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+` + // 386:`DIVSD\s8\([A-Z]+\),\sX[0-9]+` // amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+` a[0] /= a[1] } @@ -211,7 +211,7 @@ func ConstDivs(n1 uint, n2 int) (uint, int) { func FloatDivs(a []float32) float32 { // amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+` - // 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+` + // 386:`DIVSS\s8\([A-Z]+\),\sX[0-9]+` return a[1] / a[2] } diff --git a/test/codegen/floats.go b/test/codegen/floats.go index 3fae1a327c..d115800a67 100644 --- a/test/codegen/floats.go +++ b/test/codegen/floats.go @@ -6,8 +6,6 @@ package codegen -import "math" - // This file contains codegen tests related to arithmetic // simplifications and optimizations on float types. // For codegen tests on integer types, see arithmetic.go. @@ -17,8 +15,7 @@ import "math" // --------------------- // func Mul2(f float64) float64 { - // 386/sse2:"ADDSD",-"MULSD" - // 386/387:"FADDDP",-"FMULDP" + // 386:"ADDSD",-"MULSD" // amd64:"ADDSD",-"MULSD" // arm/7:"ADDD",-"MULD" // arm64:"FADDD",-"FMULD" @@ -28,8 +25,7 @@ func Mul2(f float64) float64 { } func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { - // 386/sse2:"MULSD",-"DIVSD" - // 386/387:"FMULDP",-"FDIVDP" + // 386:"MULSD",-"DIVSD" // amd64:"MULSD",-"DIVSD" // arm/7:"MULD",-"DIVD" // arm64:"FMULD",-"FDIVD" @@ -37,8 +33,7 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { // ppc64le:"FMUL",-"FDIV" x := f1 / 16.0 - // 386/sse2:"MULSD",-"DIVSD" - // 386/387:"FMULDP",-"FDIVDP" + // 386:"MULSD",-"DIVSD" // amd64:"MULSD",-"DIVSD" // arm/7:"MULD",-"DIVD" // arm64:"FMULD",-"FDIVD" @@ -46,8 +41,7 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { // ppc64le:"FMUL",-"FDIVD" y := f2 / 0.125 - // 386/sse2:"ADDSD",-"DIVSD",-"MULSD" - // 386/387:"FADDDP",-"FDIVDP",-"FMULDP" + // 386:"ADDSD",-"DIVSD",-"MULSD" // amd64:"ADDSD",-"DIVSD",-"MULSD" // arm/7:"ADDD",-"MULD",-"DIVD" // arm64:"FADDD",-"FMULD",-"FDIVD" @@ -58,11 +52,6 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { return x, y, z } -func getPi() float64 { - // 386/387:"FLDPI" - return math.Pi -} - func indexLoad(b0 []float32, b1 float32, idx int) float32 { // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+` return b0[idx] * b1 diff --git a/test/codegen/math.go b/test/codegen/math.go index 1ebfda0405..fe678eea23 100644 --- a/test/codegen/math.go +++ b/test/codegen/math.go @@ -46,7 +46,7 @@ func approx(x float64) { func sqrt(x float64) float64 { // amd64:"SQRTSD" - // 386/387:"FSQRT" 386/sse2:"SQRTSD" + // 386:"SQRTSD" // arm64:"FSQRTD" // arm/7:"SQRTD" // mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD" diff --git a/test/codegen/memops.go b/test/codegen/memops.go index a234283146..4b003ad861 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -175,33 +175,33 @@ func idxInt64(x, y []int64, i int) { func idxFloat32(x, y []float32, i int) { var t float32 - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` - // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // 386: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` t = x[i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` - // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` y[i+1] = t - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` - // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // 386: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` - // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` y[16*i+1] = t } func idxFloat64(x, y []float64, i int) { var t float64 - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` - // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // 386: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` t = x[i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` - // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // 386: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` y[i+1] = t - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` - // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // 386: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` - // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // 386: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` y[16*i+1] = t } diff --git a/test/run.go b/test/run.go index 95b94b7277..77710fd89a 100644 --- a/test/run.go +++ b/test/run.go @@ -1489,7 +1489,7 @@ var ( // value[0] is the variant-changing environment variable, and values[1:] // are the supported variants. archVariants = map[string][]string{ - "386": {"GO386", "387", "sse2"}, + "386": {}, "amd64": {}, "arm": {"GOARM", "5", "6", "7"}, "arm64": {}, @@ -1511,12 +1511,12 @@ type wantedAsmOpcode struct { found bool // true if the opcode check matched at least one in the output } -// A build environment triplet separated by slashes (eg: linux/386/sse2). +// A build environment triplet separated by slashes (eg: linux/arm/7). // The third field can be empty if the arch does not support variants (eg: "plan9/amd64/") type buildEnv string // Environ returns the environment it represents in cmd.Environ() "key=val" format -// For instance, "linux/386/sse2".Environ() returns {"GOOS=linux", "GOARCH=386", "GO386=sse2"} +// For instance, "linux/arm/7".Environ() returns {"GOOS=linux", "GOARCH=arm", "GOARM=7"} func (b buildEnv) Environ() []string { fields := strings.Split(string(b), "/") if len(fields) != 3 { @@ -1571,11 +1571,11 @@ func (t *test) wantedAsmOpcodes(fn string) asmChecks { var arch, subarch, os string switch { - case archspec[2] != "": // 3 components: "linux/386/sse2" + case archspec[2] != "": // 3 components: "linux/arm/7" os, arch, subarch = archspec[0], archspec[1][1:], archspec[2][1:] - case archspec[1] != "": // 2 components: "386/sse2" + case archspec[1] != "": // 2 components: "arm/7" os, arch, subarch = "linux", archspec[0], archspec[1][1:] - default: // 1 component: "386" + default: // 1 component: "arm" os, arch, subarch = "linux", archspec[0], "" if arch == "wasm" { os = "js" -- cgit v1.3 From bdab5df40f474c7768a945ef4fcf5aab634f7af5 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Fri, 2 Oct 2020 17:51:13 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: use mulli where possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support to allow the use of mulli when one of the multiply operands is a constant that fits in 16 bits. This especially helps in the case where this instruction appears in a loop since the load of the constant is not being moved out of the loop. Some improvements seen in compress/flate on power9: Decode/Digits/Huffman/1e4 259µs ± 0% 261µs ± 0% +0.57% (p=1.000 n=1+1) Decode/Digits/Huffman/1e5 2.43ms ± 0% 2.45ms ± 0% +0.79% (p=1.000 n=1+1) Decode/Digits/Huffman/1e6 23.9ms ± 0% 24.2ms ± 0% +0.86% (p=1.000 n=1+1) Decode/Digits/Speed/1e4 278µs ± 0% 279µs ± 0% +0.34% (p=1.000 n=1+1) Decode/Digits/Speed/1e5 2.80ms ± 0% 2.81ms ± 0% +0.29% (p=1.000 n=1+1) Decode/Digits/Speed/1e6 28.0ms ± 0% 28.1ms ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e4 278µs ± 0% 278µs ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e5 2.68ms ± 0% 2.69ms ± 0% +0.19% (p=1.000 n=1+1) Decode/Digits/Default/1e6 26.6ms ± 0% 26.6ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e4 278µs ± 0% 278µs ± 0% +0.00% (p=1.000 n=1+1) Decode/Digits/Compression/1e5 2.68ms ± 0% 2.69ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e6 26.6ms ± 0% 26.6ms ± 0% +0.07% (p=1.000 n=1+1) Decode/Newton/Huffman/1e4 322µs ± 0% 312µs ± 0% -2.84% (p=1.000 n=1+1) Decode/Newton/Huffman/1e5 3.11ms ± 0% 2.91ms ± 0% -6.41% (p=1.000 n=1+1) Decode/Newton/Huffman/1e6 31.4ms ± 0% 29.3ms ± 0% -6.85% (p=1.000 n=1+1) Decode/Newton/Speed/1e4 282µs ± 0% 269µs ± 0% -4.69% (p=1.000 n=1+1) Decode/Newton/Speed/1e5 2.29ms ± 0% 2.20ms ± 0% -4.13% (p=1.000 n=1+1) Decode/Newton/Speed/1e6 22.7ms ± 0% 21.3ms ± 0% -6.06% (p=1.000 n=1+1) Decode/Newton/Default/1e4 254µs ± 0% 237µs ± 0% -6.60% (p=1.000 n=1+1) Decode/Newton/Default/1e5 1.86ms ± 0% 1.75ms ± 0% -5.99% (p=1.000 n=1+1) Decode/Newton/Default/1e6 18.1ms ± 0% 17.4ms ± 0% -4.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e4 254µs ± 0% 244µs ± 0% -3.91% (p=1.000 n=1+1) Decode/Newton/Compression/1e5 1.85ms ± 0% 1.79ms ± 0% -3.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e6 18.0ms ± 0% 17.3ms ± 0% -3.88% (p=1.000 n=1+1) Change-Id: I840320fab1c4bf64c76b001c2651ab79f23df4eb Reviewed-on: https://go-review.googlesource.com/c/go/+/259444 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 +++ src/cmd/compile/internal/gc/bench_test.go | 12 +++++++ src/cmd/compile/internal/ppc64/ssa.go | 3 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 ++ src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 30 ++++++++++++++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 54 ++++++++++++++++++++++++++++ src/cmd/internal/obj/ppc64/asm9.go | 9 ++--- 8 files changed, 111 insertions(+), 5 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 869f8c2d4f..c6d7b59aad 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -204,12 +204,16 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MULLW R3, R4 // 7c8419d6 MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 MULLWCC R3, R4, R5 // 7ca419d7 MULHW R3, R4, R5 // 7ca41896 MULHWU R3, R4, R5 // 7ca41816 MULLD R3, R4 // 7c8419d2 MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 MULLDCC R3, R4, R5 // 7ca419d3 MULHD R3, R4, R5 // 7ca41892 MULHDCC R3, R4, R5 // 7ca41893 diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index a2887f2f7b..8c4288128f 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -7,6 +7,7 @@ package gc import "testing" var globl int64 +var globl32 int32 func BenchmarkLoadAdd(b *testing.B) { x := make([]int64, 1024) @@ -42,6 +43,17 @@ func BenchmarkModify(b *testing.B) { } } +func BenchmarkMullImm(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int32 + for i := range x { + s += x[i] * 100 + } + globl32 = s + } +} + func BenchmarkConstModify(b *testing.B) { a := make([]int64, 1024) for i := 0; i < b.N; i++ { diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index d83b2df379..1ece4d999f 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, + ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 83ee4c499b..a05cfee654 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -821,6 +821,8 @@ (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) +(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x) + // Subtract from (with carry, but ignored) constant. // Note, these clobber the carry bit. (SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 28317928a8..5885660597 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -181,6 +181,8 @@ func init() { {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) + {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) + {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit) {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d7d2b24a48..051550fb17 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1832,6 +1832,8 @@ const ( OpPPC64FSUBS OpPPC64MULLD OpPPC64MULLW + OpPPC64MULLDconst + OpPPC64MULLWconst OpPPC64MADDLD OpPPC64MULHD OpPPC64MULHW @@ -24377,6 +24379,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MULLDconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "MULLWconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "MADDLD", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 9822637b05..1b8a5a78ca 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -568,6 +568,10 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64MOVWstorezero(v) case OpPPC64MTVSRD: return rewriteValuePPC64_OpPPC64MTVSRD(v) + case OpPPC64MULLD: + return rewriteValuePPC64_OpPPC64MULLD(v) + case OpPPC64MULLW: + return rewriteValuePPC64_OpPPC64MULLW(v) case OpPPC64NEG: return rewriteValuePPC64_OpPPC64NEG(v) case OpPPC64NOR: @@ -11003,6 +11007,56 @@ func rewriteValuePPC64_OpPPC64MTVSRD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64MULLD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLD x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLDconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLDconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} +func rewriteValuePPC64_OpPPC64MULLW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLW x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLWconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLWconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64NEG(v *Value) bool { v_0 := v.Args[0] // match: (NEG (ADDconst [c] x)) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 928e299f43..c2e8e9e9d0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1279,6 +1279,9 @@ func buildop(ctxt *obj.Link) { case AREMD: opset(AREMDU, r0) + case AMULLW: + opset(AMULLD, r0) + case ADIVW: /* op Rb[,Ra],Rd */ opset(AMULHW, r0) @@ -1312,7 +1315,6 @@ func buildop(ctxt *obj.Link) { opset(AMULHDCC, r0) opset(AMULHDU, r0) opset(AMULHDUCC, r0) - opset(AMULLD, r0) opset(AMULLDCC, r0) opset(AMULLDVCC, r0) opset(AMULLDV, r0) @@ -1996,7 +1998,6 @@ func buildop(ctxt *obj.Link) { AMOVB, /* macro: move byte with sign extension */ AMOVBU, /* macro: move byte with sign extension & update */ AMOVFL, - AMULLW, /* op $s[,r2],r3; op r1[,r2],r3; no cc/v */ ASUBC, /* op r1,$s,r3; op r1[,r2],r3 */ ASTSW, @@ -4990,8 +4991,8 @@ func (c *ctxt9) opirr(a obj.As) uint32 { case ADARN: return OPVCC(31, 755, 0, 0) /* darn - v3.00 */ - case AMULLW: - return OPVCC(7, 0, 0, 0) + case AMULLW, AMULLD: + return OPVCC(7, 0, 0, 0) /* mulli works with MULLW or MULLD */ case AOR: return OPVCC(24, 0, 0, 0) -- cgit v1.3 From 3036b76df0ae748856e3e0008b67241cc580e263 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Sat, 13 Jun 2020 00:06:49 +0800 Subject: cmd/asm: Add SHA3 hardware instructions for ARM64 Armv8.2-SHA introduced four SHA3-related instructions EOR3 .16B, .16B, .16B, .16B RAX1 .2D, .2D, .2D XAR .2D, .2D, .2D, # BCAX .16B, .16B, .16B, .16B We convert them into Go asm style as: VEOR3 .B16, .B16, .B16, .B16 VRAX1 .D2, .D2, .D2 VXAR $imm6, .D2, .D2, .D2 VBCAX .B16, .B16, .B16, .B16 Armv8 Reference Manual: * EOR3 (Three-way Exclusive OR) on C7.2.42 * RAX1 (Rotate and Exclusive OR) on C7.2.217 * XAR (Exclusive OR and Rotate) on C7.2.401 * BCAX (Bit Clear and Exclusive OR) on C7.2.12 Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a Reviewed-on: https://go-review.googlesource.com/c/go/+/180757 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Emmanuel Odeke --- src/cmd/asm/internal/asm/testdata/arm64.s | 4 ++ src/cmd/internal/obj/arm64/a.out.go | 4 ++ src/cmd/internal/obj/arm64/anames.go | 4 ++ src/cmd/internal/obj/arm64/asm7.go | 73 ++++++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 2 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e277c04b7c..7f495b90bb 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 SHA512H2 V4.D2, V3, V2 // 628464ce SHA512SU0 V9.D2, V8.D2 // 2881c0ce SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce + VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace + VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f VUADDLV V6.B16, V6 // c638306e @@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e VTBL V3.B8, [V27.B16], V8.B8 // 6803030e + VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce + VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 1ca41c15ba..33319e48df 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -958,9 +958,11 @@ const ( AVADDP AVAND AVBIF + AVBCAX AVCMEQ AVCNT AVEOR + AVEOR3 AVMOV AVLD1 AVLD2 @@ -989,6 +991,7 @@ const ( AVPMULL2 AVEXT AVRBIT + AVRAX1 AVUSHR AVUSHLL AVUSHLL2 @@ -1001,6 +1004,7 @@ const ( AVBSL AVBIT AVTBL + AVXAR AVZIP1 AVZIP2 AVCMTST diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 900cdba817..e5534e26b9 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -464,9 +464,11 @@ var Anames = []string{ "VADDP", "VAND", "VBIF", + "VBCAX", "VCMEQ", "VCNT", "VEOR", + "VEOR3", "VMOV", "VLD1", "VLD2", @@ -495,6 +497,7 @@ var Anames = []string{ "VPMULL2", "VEXT", "VRBIT", + "VRAX1", "VUSHR", "VUSHLL", "VUSHLL2", @@ -507,6 +510,7 @@ var Anames = []string{ "VBSL", "VBIT", "VTBL", + "VXAR", "VZIP1", "VZIP2", "VCMTST", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7c35fce106..c46066313e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -843,6 +843,8 @@ var optab = []Optab{ {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) { case AVADD: oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) case AAESD: oprangeset(AAESE, t) @@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVLD4, t) oprangeset(AVLD4R, t) + case AVEOR3: + oprangeset(AVBCAX, t) + case ASHA1H, AVCNT, AVMOV, @@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) { AVDUP, AVMOVI, APRFM, - AVEXT: + AVEXT, + AVXAR: break case obj.ANOP, @@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } else { size = 0 } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 } o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) @@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm., Vn., Vd. */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + } out[0] = o1 out[1] = o2 @@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVAND: return 7<<25 | 1<<21 | 7<<10 + case AVBCAX: + return 0xCE<<24 | 1<<21 + case AVCMEQ: return 1<<29 | 0x71<<21 | 0x23<<10 @@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVEOR: return 1<<29 | 0x71<<21 | 7<<10 + case AVEOR3: + return 0xCE << 24 + case AVORR: return 7<<25 | 5<<21 | 7<<10 case AVREV16: return 3<<26 | 2<<24 | 1<<21 | 3<<11 + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + case AVREV32: return 11<<26 | 2<<24 | 1<<21 | 1<<11 @@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: + return 0xCE<<24 | 1<<23 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From 85f829deb8adfb7c0d73acabfdb458de969f6763 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 29 Jun 2020 17:08:49 -0400 Subject: cmd/asm: reject misplaced go:build comments We are converting from using error-prone ad-hoc syntax // +build lines to less error-prone, standard boolean syntax //go:build lines. The timeline is: Go 1.16: prepare for transition - Builds still use // +build for file selection. - Source files may not contain //go:build without // +build. - Builds fail when a source file contains //go:build lines without // +build lines. <<< Go 1.17: start transition - Builds prefer //go:build for file selection, falling back to // +build for files containing only // +build. - Source files may contain //go:build without // +build (but they won't build with Go 1.16). - Gofmt moves //go:build and // +build lines to proper file locations. - Gofmt introduces //go:build lines into files with only // +build lines. - Go vet rejects files with mismatched //go:build and // +build lines. Go 1.18: complete transition - Go fix removes // +build lines, leaving behind equivalent // +build lines. This CL provides part of the <<< marked line above in the Go 1.16 step: rejecting files containing //go:build but not // +build. Reject any //go:build comments found after actual assembler code (include #include etc directives), because the go command itself doesn't read that far. For #41184. Change-Id: Ib460bfd380cce4239993980dd208afd07deff3f1 Reviewed-on: https://go-review.googlesource.com/c/go/+/240602 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- src/cmd/asm/internal/asm/endtoend_test.go | 11 +++++--- src/cmd/asm/internal/asm/parse.go | 31 ++++++++++++++++++++--- src/cmd/asm/internal/asm/testdata/buildtagerror.s | 8 ++++++ src/cmd/asm/internal/lex/input.go | 3 +++ src/cmd/asm/internal/lex/lex.go | 12 +++++---- src/cmd/asm/internal/lex/lex_test.go | 3 +++ src/cmd/asm/internal/lex/tokenizer.go | 11 +++++--- 7 files changed, 64 insertions(+), 15 deletions(-) create mode 100644 src/cmd/asm/internal/asm/testdata/buildtagerror.s (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 15202dc5dc..b21e3156ae 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -257,11 +257,11 @@ func isHexes(s string) bool { return true } -// It would be nice if the error messages began with +// It would be nice if the error messages always began with // the standard file:line: prefix, // but that's not where we are today. // It might be at the beginning but it might be in the middle of the printed instruction. -var fileLineRE = regexp.MustCompile(`(?:^|\()(testdata[/\\][0-9a-z]+\.s:[0-9]+)(?:$|\))`) +var fileLineRE = regexp.MustCompile(`(?:^|\()(testdata[/\\][0-9a-z]+\.s:[0-9]+)(?:$|\)|:)`) // Same as in test/run.go var ( @@ -281,6 +281,7 @@ func testErrors(t *testing.T, goarch, file string) { defer ctxt.Bso.Flush() failed := false var errBuf bytes.Buffer + parser.errorWriter = &errBuf ctxt.DiagFunc = func(format string, args ...interface{}) { failed = true s := fmt.Sprintf(format, args...) @@ -292,7 +293,7 @@ func testErrors(t *testing.T, goarch, file string) { pList.Firstpc, ok = parser.Parse() obj.Flushplist(ctxt, pList, nil, "") if ok && !failed { - t.Errorf("asm: %s had no errors", goarch) + t.Errorf("asm: %s had no errors", file) } errors := map[string]string{} @@ -368,6 +369,10 @@ func TestARMEndToEnd(t *testing.T) { } } +func TestGoBuildErrors(t *testing.T) { + testErrors(t, "amd64", "buildtagerror") +} + func TestARMErrors(t *testing.T) { testErrors(t, "arm", "armerror") } diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index 17d40ee415..d9dbd92cb0 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -29,6 +29,7 @@ type Parser struct { lineNum int // Line number in source file. errorLine int // Line number of last error. errorCount int // Number of errors. + sawCode bool // saw code in this file (as opposed to comments and blank lines) pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. input []lex.Token inputPos int @@ -132,6 +133,30 @@ func (p *Parser) ParseSymABIs(w io.Writer) bool { return p.errorCount == 0 } +// nextToken returns the next non-build-comment token from the lexer. +// It reports misplaced //go:build comments but otherwise discards them. +func (p *Parser) nextToken() lex.ScanToken { + for { + tok := p.lex.Next() + if tok == lex.BuildComment { + if p.sawCode { + p.errorf("misplaced //go:build comment") + } + continue + } + if tok != '\n' { + p.sawCode = true + } + if tok == '#' { + // A leftover wisp of a #include/#define/etc, + // to let us know that p.sawCode should be true now. + // Otherwise ignored. + continue + } + return tok + } +} + // line consumes a single assembly line from p.lex of the form // // {label:} WORD[.cond] [ arg {, arg} ] (';' | '\n') @@ -146,7 +171,7 @@ next: // Skip newlines. var tok lex.ScanToken for { - tok = p.lex.Next() + tok = p.nextToken() // We save the line number here so error messages from this instruction // are labeled with this line. Otherwise we complain after we've absorbed // the terminating newline and the line numbers are off by one in errors. @@ -179,11 +204,11 @@ next: items = make([]lex.Token, 0, 3) } for { - tok = p.lex.Next() + tok = p.nextToken() if len(operands) == 0 && len(items) == 0 { if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386) && tok == '.' { // Suffixes: ARM conditionals or x86 modifiers. - tok = p.lex.Next() + tok = p.nextToken() str := p.lex.Text() if tok != scanner.Ident { p.errorf("instruction suffix expected identifier, found %s", str) diff --git a/src/cmd/asm/internal/asm/testdata/buildtagerror.s b/src/cmd/asm/internal/asm/testdata/buildtagerror.s new file mode 100644 index 0000000000..5a2d65b978 --- /dev/null +++ b/src/cmd/asm/internal/asm/testdata/buildtagerror.s @@ -0,0 +1,8 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#define X 1 + +//go:build x // ERROR "misplaced //go:build comment" + diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go index a43953b515..da4ebe6d6e 100644 --- a/src/cmd/asm/internal/lex/input.go +++ b/src/cmd/asm/internal/lex/input.go @@ -109,6 +109,9 @@ func (in *Input) Next() ScanToken { in.Error("'#' must be first item on line") } in.beginningOfLine = in.hash() + in.text = "#" + return '#' + case scanner.Ident: // Is it a macro name? name := in.Stack.Text() diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go index f1f7da7911..7cd41a55a9 100644 --- a/src/cmd/asm/internal/lex/lex.go +++ b/src/cmd/asm/internal/lex/lex.go @@ -22,11 +22,13 @@ type ScanToken rune const ( // Asm defines some two-character lexemes. We make up // a rune/ScanToken value for them - ugly but simple. - LSH ScanToken = -1000 - iota // << Left shift. - RSH // >> Logical right shift. - ARR // -> Used on ARM for shift type 3, arithmetic right shift. - ROT // @> Used on ARM for shift type 4, rotate right. - macroName // name of macro that should not be expanded + LSH ScanToken = -1000 - iota // << Left shift. + RSH // >> Logical right shift. + ARR // -> Used on ARM for shift type 3, arithmetic right shift. + ROT // @> Used on ARM for shift type 4, rotate right. + Include // included file started here + BuildComment // //go:build or +build comment + macroName // name of macro that should not be expanded ) // IsRegisterShift reports whether the token is one of the ARM register shift operators. diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go index f606ffe07b..51679d2fbc 100644 --- a/src/cmd/asm/internal/lex/lex_test.go +++ b/src/cmd/asm/internal/lex/lex_test.go @@ -281,6 +281,9 @@ func drain(input *Input) string { if tok == scanner.EOF { return buf.String() } + if tok == '#' { + continue + } if buf.Len() > 0 { buf.WriteByte('.') } diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go index aef9ea8636..861a2d421d 100644 --- a/src/cmd/asm/internal/lex/tokenizer.go +++ b/src/cmd/asm/internal/lex/tokenizer.go @@ -107,10 +107,13 @@ func (t *Tokenizer) Next() ScanToken { if t.tok != scanner.Comment { break } - length := strings.Count(s.TokenText(), "\n") - t.line += length - // TODO: If we ever have //go: comments in assembly, will need to keep them here. - // For now, just discard all comments. + text := s.TokenText() + t.line += strings.Count(text, "\n") + // TODO: Use constraint.IsGoBuild once it exists. + if strings.HasPrefix(text, "//go:build") { + t.tok = BuildComment + break + } } switch t.tok { case '\n': -- cgit v1.3 From 912262b806a432a29302e0cee45e4f42ef7038a2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 19 Jul 2020 00:30:12 -0400 Subject: cmd/internal/obj: move LSym.Func into LSym.Extra This creates space for a different kind of extension field in LSym without making the struct any larger. (There are many LSym, so we care about keeping the struct small.) Change-Id: Ib16edb9e15f54c2a7351c8b875e19684058711e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/243943 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/asm.go | 2 +- src/cmd/compile/internal/gc/dwinl.go | 4 +- src/cmd/compile/internal/gc/gsubr.go | 2 +- src/cmd/compile/internal/gc/obj.go | 9 ++-- src/cmd/compile/internal/gc/pgen.go | 18 ++++---- src/cmd/compile/internal/gc/plive.go | 9 ++-- src/cmd/compile/internal/gc/scope.go | 4 +- src/cmd/compile/internal/gc/ssa.go | 10 ++-- src/cmd/internal/obj/arm/asm5.go | 8 ++-- src/cmd/internal/obj/arm/obj5.go | 34 +++++++------- src/cmd/internal/obj/arm64/asm7.go | 12 ++--- src/cmd/internal/obj/arm64/obj7.go | 36 +++++++-------- src/cmd/internal/obj/dwarf.go | 31 +++++++------ src/cmd/internal/obj/ld.go | 2 +- src/cmd/internal/obj/link.go | 23 ++++++++- src/cmd/internal/obj/mips/asm0.go | 10 ++-- src/cmd/internal/obj/mips/obj0.go | 42 ++++++++--------- src/cmd/internal/obj/objfile.go | 90 ++++++++++++++++++------------------ src/cmd/internal/obj/pass.go | 6 +-- src/cmd/internal/obj/pcln.go | 28 +++++------ src/cmd/internal/obj/plist.go | 14 +++--- src/cmd/internal/obj/ppc64/asm9.go | 10 ++-- src/cmd/internal/obj/ppc64/obj9.go | 28 +++++------ src/cmd/internal/obj/riscv/obj.go | 38 +++++++-------- src/cmd/internal/obj/s390x/asmz.go | 6 +-- src/cmd/internal/obj/s390x/objz.go | 28 +++++------ src/cmd/internal/obj/sym.go | 6 ++- src/cmd/internal/obj/wasm/wasmobj.go | 34 +++++++------- src/cmd/internal/obj/x86/asm6.go | 8 ++-- src/cmd/internal/obj/x86/obj6.go | 26 +++++------ 30 files changed, 304 insertions(+), 274 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 7878d74549..b9efa454ed 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -181,7 +181,7 @@ func (p *Parser) asmText(operands [][]lex.Token) { // Argsize set below. }, } - nameAddr.Sym.Func.Text = prog + nameAddr.Sym.Func().Text = prog prog.To.Val = int32(argSize) p.append(prog, "", true) } diff --git a/src/cmd/compile/internal/gc/dwinl.go b/src/cmd/compile/internal/gc/dwinl.go index 27e2cbcd98..5120fa1166 100644 --- a/src/cmd/compile/internal/gc/dwinl.go +++ b/src/cmd/compile/internal/gc/dwinl.go @@ -34,7 +34,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { // Walk progs to build up the InlCalls data structure var prevpos src.XPos - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := fnsym.Func().Text; p != nil; p = p.Link { if p.Pos == prevpos { continue } @@ -150,7 +150,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { start := int64(-1) curii := -1 var prevp *obj.Prog - for p := fnsym.Func.Text; p != nil; prevp, p = p, p.Link { + for p := fnsym.Func().Text; p != nil; prevp, p = p, p.Link { if prevp != nil && p.Pos == prevp.Pos { continue } diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index 480d411f49..14c217ff3b 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -199,7 +199,7 @@ func (pp *Progs) settext(fn *Node) { ptxt := pp.Prog(obj.ATEXT) pp.Text = ptxt - fn.Func.lsym.Func.Text = ptxt + fn.Func.lsym.Func().Text = ptxt ptxt.From.Type = obj.TYPE_MEM ptxt.From.Name = obj.NAME_EXTERN ptxt.From.Sym = fn.Func.lsym diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index d7f4a94041..f6557e2d15 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -305,20 +305,21 @@ func dumpglobls() { // global symbols can't be declared during parallel compilation. func addGCLocals() { for _, s := range Ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } - for _, gcsym := range []*obj.LSym{s.Func.GCArgs, s.Func.GCLocals, s.Func.GCRegs} { + for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} { if gcsym != nil && !gcsym.OnList() { ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK) } } - if x := s.Func.StackObjects; x != nil { + if x := fn.StackObjects; x != nil { attr := int16(obj.RODATA) ggloblsym(x, int32(len(x.P)), attr) x.Set(obj.AttrStatic, true) } - if x := s.Func.OpenCodedDeferInfo; x != nil { + if x := fn.OpenCodedDeferInfo; x != nil { ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK) } } diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 52b1ed351d..353f4b08c9 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -266,8 +266,8 @@ func compile(fn *Node) { dtypesym(n.Type) // Also make sure we allocate a linker symbol // for the stack object data, for the same reason. - if fn.Func.lsym.Func.StackObjects == nil { - fn.Func.lsym.Func.StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") + if fn.Func.lsym.Func().StackObjects == nil { + fn.Func.lsym.Func().StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") } } } @@ -415,7 +415,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S case PAUTO: if !n.Name.Used() { // Text == nil -> generating abstract function - if fnsym.Func.Text != nil { + if fnsym.Func().Text != nil { Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)") } continue @@ -425,7 +425,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S continue } apdecls = append(apdecls, n) - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } decls, dwarfVars := createDwarfVars(fnsym, fn.Func, apdecls) @@ -435,7 +435,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S // the function symbol to insure that the type included in DWARF // processing during linking. typesyms := []*obj.LSym{} - for t, _ := range fnsym.Func.Autot { + for t, _ := range fnsym.Func().Autot { typesyms = append(typesyms, t) } sort.Sort(obj.BySymName(typesyms)) @@ -444,7 +444,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S r.Sym = sym r.Type = objabi.R_USETYPE } - fnsym.Func.Autot = nil + fnsym.Func().Autot = nil var varScopes []ScopeID for _, decl := range decls { @@ -522,7 +522,7 @@ func createSimpleVar(fnsym *obj.LSym, n *Node) *dwarf.Var { } typename := dwarf.InfoPrefix + typesymname(n.Type) - delete(fnsym.Func.Autot, ngotype(n).Linksym()) + delete(fnsym.Func().Autot, ngotype(n).Linksym()) inlIndex := 0 if genDwarfInline > 1 { if n.Name.InlFormal() || n.Name.InlLocal() { @@ -667,7 +667,7 @@ func createDwarfVars(fnsym *obj.LSym, fn *Func, apDecls []*Node) ([]*Node, []*dw ChildIndex: -1, }) // Record go type of to insure that it gets emitted by the linker. - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } return decls, vars @@ -731,7 +731,7 @@ func createComplexVar(fnsym *obj.LSym, fn *Func, varID ssa.VarID) *dwarf.Var { } gotype := ngotype(n).Linksym() - delete(fnsym.Func.Autot, gotype) + delete(fnsym.Func().Autot, gotype) typename := dwarf.InfoPrefix + gotype.Name[len("type."):] inlIndex := 0 if genDwarfInline > 1 { diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index a9ea37701e..b471accb65 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -1552,26 +1552,27 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { // Emit the live pointer map data structures ls := e.curfn.Func.lsym - ls.Func.GCArgs, ls.Func.GCLocals, ls.Func.GCRegs = lv.emit() + fninfo := ls.Func() + fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit() p := pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCArgs + p.To.Sym = fninfo.GCArgs p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_LocalsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCLocals + p.To.Sym = fninfo.GCLocals if !go115ReduceLiveness { p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCRegs + p.To.Sym = fninfo.GCRegs } return lv.livenessMap diff --git a/src/cmd/compile/internal/gc/scope.go b/src/cmd/compile/internal/gc/scope.go index d7239d5693..e66b859e10 100644 --- a/src/cmd/compile/internal/gc/scope.go +++ b/src/cmd/compile/internal/gc/scope.go @@ -62,9 +62,9 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) { if len(marks) == 0 { return } - p0 := fnsym.Func.Text + p0 := fnsym.Func().Text scope := findScope(marks, p0.Pos) - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := p0; p != nil; p = p.Link { if p.Pos == p0.Pos { continue } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 3d5fa4cd0a..d8f627c213 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -240,7 +240,7 @@ func dvarint(x *obj.LSym, off int, v int64) int { // - Offset of where argument should be placed in the args frame when making call func (s *state) emitOpenDeferInfo() { x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer") - s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x + s.curfn.Func.lsym.Func().OpenCodedDeferInfo = x off := 0 // Compute maxargsize (max size of arguments for all defers) @@ -6108,7 +6108,7 @@ func emitStackObjects(e *ssafn, pp *Progs) { // Populate the stack object data. // Format must match runtime/stack.go:stackObjectRecord. - x := e.curfn.Func.lsym.Func.StackObjects + x := e.curfn.Func.lsym.Func().StackObjects off := 0 off = duintptr(x, off, uint64(len(vars))) for _, v := range vars { @@ -6145,7 +6145,7 @@ func genssa(f *ssa.Func, pp *Progs) { s.livenessMap = liveness(e, f, pp) emitStackObjects(e, pp) - openDeferInfo := e.curfn.Func.lsym.Func.OpenCodedDeferInfo + openDeferInfo := e.curfn.Func.lsym.Func().OpenCodedDeferInfo if openDeferInfo != nil { // This function uses open-coded defers -- write out the funcdata // info that we computed at the end of genssa. @@ -6350,7 +6350,7 @@ func genssa(f *ssa.Func, pp *Progs) { // some of the inline marks. // Use this instruction instead. p.Pos = p.Pos.WithIsStmt() // promote position to a statement - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[m]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[m]) // Make the inline mark a real nop, so it doesn't generate any code. m.As = obj.ANOP m.Pos = src.NoXPos @@ -6362,7 +6362,7 @@ func genssa(f *ssa.Func, pp *Progs) { // Any unmatched inline marks now need to be added to the inlining tree (and will generate a nop instruction). for _, p := range inlMarkList { if p.As != obj.ANOP { - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[p]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[p]) } } } diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go index 269a4223d5..ebb98b4859 100644 --- a/src/cmd/internal/obj/arm/asm5.go +++ b/src/cmd/internal/obj/arm/asm5.go @@ -390,7 +390,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var p *obj.Prog var op *obj.Prog - p = cursym.Func.Text + p = cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -482,8 +482,8 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bflag = 0 pc = 0 times++ - c.cursym.Func.Text.Pc = 0 // force re-layout the code. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { o = c.oplook(p) if int64(pc) > p.Pc { p.Pc = int64(pc) @@ -558,7 +558,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * perhaps we'd be able to parallelize the span loop above. */ - p = c.cursym.Func.Text + p = c.cursym.Func().Text c.autosize = p.To.Offset + 4 c.cursym.Grow(c.cursym.Size) diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go index 4d9187b530..f2bfb9679f 100644 --- a/src/cmd/internal/obj/arm/obj5.go +++ b/src/cmd/internal/obj/arm/obj5.go @@ -249,13 +249,13 @@ const ( func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset == -4 { // Historical way to mark NOFRAME. @@ -271,30 +271,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - cursym.Func.Locals = autoffset - cursym.Func.Args = p.To.Val.(int32) + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) /* * find leaf subroutines */ - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case ADIV, ADIVU, AMOD, AMODU: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF case ABL, ABX, obj.ADUFFZERO, obj.ADUFFCOPY: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF } } var q2 *obj.Prog - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -311,20 +311,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && cursym.Func.Text.Mark&LEAF == 0 { + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // are not identified as leaves but still have no frame. if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", cursym.Name) } - cursym.Func.Text.Mark |= LEAF + cursym.Func().Text.Mark |= LEAF } // FP offsets need an updated p.To.Offset. p.To.Offset = int64(autosize) - 4 - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -347,7 +347,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REGSP p.Spadj = autosize - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVW g_panic(g), R1 @@ -460,7 +460,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: nocache(p) - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AB p.From = obj.Addr{} @@ -508,7 +508,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } case ADIV, ADIVU, AMOD, AMODU: - if cursym.Func.Text.From.Sym.NoSplit() { + if cursym.Func().Text.From.Sym.NoSplit() { ctxt.Diag("cannot divide in NOSPLIT function") } const debugdivmod = false @@ -720,7 +720,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -751,7 +751,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -762,7 +762,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { b := obj.Appendp(pcdata, c.newprog) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH - b.To.SetTarget(c.cursym.Func.Text.Link) + b.To.SetTarget(c.cursym.Func().Text.Link) b.Spadj = +framesize return end diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index c46066313e..6b9fe27c05 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -913,7 +913,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -943,8 +943,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { alignedValue := p.From.Offset m = pcAlignPadLength(pc, alignedValue, ctxt) // Update the current text symbol alignment value. - if int32(alignedValue) > cursym.Func.Align { - cursym.Func.Align = int32(alignedValue) + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: @@ -983,7 +983,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { if p.As == ADWORD && (pc&7) != 0 { pc += 4 } @@ -1047,7 +1047,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { psz := int32(0) var i int var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) @@ -1088,7 +1088,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index f1bc2583cb..0baf51973a 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -166,7 +166,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -209,7 +209,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -220,7 +220,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { jmp := obj.Appendp(pcdata, c.newprog) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(c.cursym.Func.Text.Link) + jmp.To.SetTarget(c.cursym.Func().Text.Link) jmp.Spadj = +framesize return end @@ -441,13 +441,13 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Historical way to mark NOFRAME. @@ -463,13 +463,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF @@ -477,18 +477,18 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF } } var q *obj.Prog var q1 *obj.Prog var retjmp *obj.LSym - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: - c.cursym.Func.Text = p + c.cursym.Func().Text = p c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { @@ -514,7 +514,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) } c.autosize += extrasize - c.cursym.Func.Locals += extrasize + c.cursym.Func().Locals += extrasize // low 32 bits for autosize // high 32 bits for extrasize @@ -524,14 +524,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Offset = 0 } - if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { if c.ctxt.Debugvlog { - c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name) + c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -641,7 +641,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGFP } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retjmp = p.To.Sym p.To = obj.Addr{} - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if c.autosize != 0 { p.As = AADD p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index 9abb31b558..328fb03b24 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -46,12 +46,12 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // we expect at the start of a new sequence. stmt := true line := int64(1) - pc := s.Func.Text.Pc + pc := s.Func().Text.Pc var lastpc int64 // last PC written to line table, not last PC in func name := "" prologue, wrotePrologue := false, false // Walk the progs, generating the DWARF table. - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) // If we're not at a real instruction, keep looping! if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { @@ -103,7 +103,7 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // text address before the end sequence op. If this isn't done, // GDB will assign a line number of zero the last row in the line // table, which we don't want. - lastlen := uint64(s.Size - (lastpc - s.Func.Text.Pc)) + lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) putpclcdelta(ctxt, dctxt, lines, lastlen, 0) dctxt.AddUint8(lines, 0) // start extended opcode dwarf.Uleb128put(dctxt, lines, 1) @@ -301,26 +301,27 @@ func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, if s.Type != objabi.STEXT { ctxt.Diag("dwarfSym of non-TEXT %v", s) } - if s.Func.dwarfInfoSym == nil { - s.Func.dwarfInfoSym = &LSym{ + fn := s.Func() + if fn.dwarfInfoSym == nil { + fn.dwarfInfoSym = &LSym{ Type: objabi.SDWARFFCN, } if ctxt.Flag_locationlists { - s.Func.dwarfLocSym = &LSym{ + fn.dwarfLocSym = &LSym{ Type: objabi.SDWARFLOC, } } - s.Func.dwarfRangesSym = &LSym{ + fn.dwarfRangesSym = &LSym{ Type: objabi.SDWARFRANGE, } - s.Func.dwarfDebugLinesSym = &LSym{ + fn.dwarfDebugLinesSym = &LSym{ Type: objabi.SDWARFLINES, } if s.WasInlined() { - s.Func.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) + fn.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) } } - return s.Func.dwarfInfoSym, s.Func.dwarfLocSym, s.Func.dwarfRangesSym, s.Func.dwarfAbsFnSym, s.Func.dwarfDebugLinesSym + return fn.dwarfInfoSym, fn.dwarfLocSym, fn.dwarfRangesSym, fn.dwarfAbsFnSym, fn.dwarfDebugLinesSym } func (s *LSym) Length(dwarfContext interface{}) int64 { @@ -331,7 +332,7 @@ func (s *LSym) Length(dwarfContext interface{}) int64 { // first instruction (prog) of the specified function. This will // presumably be the file in which the function is defined. func (ctxt *Link) fileSymbol(fn *LSym) *LSym { - p := fn.Func.Text + p := fn.Func().Text if p != nil { f, _ := linkgetlineFromPos(ctxt, p.Pos) fsym := ctxt.Lookup(f) @@ -405,8 +406,8 @@ func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath str if absfn.Size != 0 { ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s) } - if s.Func == nil { - s.Func = new(FuncInfo) + if s.Func() == nil { + s.NewFuncInfo() } scopes, _ := ctxt.DebugInfo(s, absfn, curfn) dwctxt := dwCtxt{ctxt} @@ -527,8 +528,8 @@ func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) { // wrapper generation as opposed to the main inlining phase) it's // possible that we didn't cache the abstract function sym for the // text symbol -- do so now if needed. See issue 38068. - if s.Func != nil && s.Func.dwarfAbsFnSym == nil { - s.Func.dwarfAbsFnSym = absfn + if fn := s.Func(); fn != nil && fn.dwarfAbsFnSym == nil { + fn.dwarfAbsFnSym = absfn } ft.precursor[s] = fnState{precursor: fn, absfn: absfn} diff --git a/src/cmd/internal/obj/ld.go b/src/cmd/internal/obj/ld.go index 4ba52c7785..5d6c000dc6 100644 --- a/src/cmd/internal/obj/ld.go +++ b/src/cmd/internal/obj/ld.go @@ -59,7 +59,7 @@ func mkfwd(sym *LSym) { } i := 0 - for p := sym.Func.Text; p != nil && p.Link != nil; p = p.Link { + for p := sym.Func().Text; p != nil && p.Link != nil; p = p.Link { i-- if i < 0 { i = LOG - 1 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index f14b691802..ae85dbbe4e 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -38,6 +38,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "fmt" + "log" "sync" ) @@ -400,7 +401,7 @@ type LSym struct { P []byte R []Reloc - Func *FuncInfo + Extra *interface{} // *FuncInfo if present Pkg string PkgIdx int32 @@ -433,6 +434,26 @@ type FuncInfo struct { FuncInfoSym *LSym } +// NewFuncInfo allocates and returns a FuncInfo for LSym. +func (s *LSym) NewFuncInfo() *FuncInfo { + if s.Extra != nil { + log.Fatalf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra) + } + f := new(FuncInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// Func returns the *FuncInfo associated with s, or else nil. +func (s *LSym) Func() *FuncInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FuncInfo) + return f +} + type InlMark struct { // When unwinding from an instruction in an inlined body, mark // where we should unwind to. diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go index 6107974745..fd29f9fa21 100644 --- a/src/cmd/internal/obj/mips/asm0.go +++ b/src/cmd/internal/obj/mips/asm0.go @@ -410,7 +410,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -455,7 +455,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -512,7 +512,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [4]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { @@ -529,7 +529,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. @@ -1302,7 +1302,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = OP_JMP(c.opirr(p.As), uint32(v)) if p.To.Sym == nil { - p.To.Sym = c.cursym.Func.Text.From.Sym + p.To.Sym = c.cursym.Func().Text.From.Sym p.To.Offset = p.To.Target().Pc } rel := obj.Addrel(c.cursym) diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go index f19facc00c..135a8df3aa 100644 --- a/src/cmd/internal/obj/mips/obj0.go +++ b/src/cmd/internal/obj/mips/obj0.go @@ -133,11 +133,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a switch for enabling/disabling instruction scheduling nosched := true - if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil { + if c.cursym.Func().Text == nil || c.cursym.Func().Text.Link == nil { return } - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -ctxt.FixedFrameSize() { // Historical way to mark NOFRAME. @@ -153,8 +153,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -162,7 +162,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * expand BECOME pseudo */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -203,7 +203,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { AJAL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case AJMP, @@ -267,7 +267,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -288,19 +288,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { - if c.cursym.Func.Text.From.Sym.NoSplit() { + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } } p.To.Offset = int64(autosize) - ctxt.FixedFrameSize() - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -344,7 +344,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -438,7 +438,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction p.To.Sym = nil - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AJMP p.From = obj.Addr{} @@ -540,7 +540,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if c.ctxt.Arch.Family == sys.MIPS { // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access - for p = c.cursym.Func.Text; p != nil; p = p1 { + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link if p.As != AMOVD { @@ -580,7 +580,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if nosched { // if we don't do instruction scheduling, simply add // NOP after each branch instruction. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + for p = c.cursym.Func().Text; p != nil; p = p.Link { if p.Mark&BRANCH != 0 { c.addnop(p) } @@ -589,10 +589,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // instruction scheduling - q = nil // p - 1 - q1 = c.cursym.Func.Text // top of block - o := 0 // count of instructions - for p = c.cursym.Func.Text; p != nil; p = p1 { + q = nil // p - 1 + q1 = c.cursym.Func().Text // top of block + o := 0 // count of instructions + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link o++ if p.Mark&NOSCHED != 0 { @@ -791,7 +791,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -805,7 +805,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.As = AJMP p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) p.Mark |= BRANCH // placeholder for q1's jump target diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index fa60c9ad6d..a08de891d3 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -189,8 +189,8 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // object file, and the Pcln variables haven't been filled in. As such, we // need to check that Pcsp exists, and assume the other pcln variables exist // as well. Tests like test/fixedbugs/issue22200.go demonstrate this issue. - if s.Func != nil && s.Func.Pcln.Pcsp != nil { - pc := &s.Func.Pcln + if fn := s.Func(); fn != nil && fn.Pcln.Pcsp != nil { + pc := &fn.Pcln w.Bytes(pc.Pcsp.P) w.Bytes(pc.Pcfile.P) w.Bytes(pc.Pcline.P) @@ -303,8 +303,8 @@ func (w *writer) Sym(s *LSym) { name = filepath.ToSlash(name) } var align uint32 - if s.Func != nil { - align = uint32(s.Func.Align) + if fn := s.Func(); fn != nil { + align = uint32(fn.Align) } if s.ContentAddressable() { // We generally assume data symbols are natually aligned, @@ -470,38 +470,38 @@ func (w *writer) Aux(s *LSym) { if s.Gotype != nil { w.aux1(goobj.AuxGotype, s.Gotype) } - if s.Func != nil { - w.aux1(goobj.AuxFuncInfo, s.Func.FuncInfoSym) + if fn := s.Func(); fn != nil { + w.aux1(goobj.AuxFuncInfo, fn.FuncInfoSym) - for _, d := range s.Func.Pcln.Funcdata { + for _, d := range fn.Pcln.Funcdata { w.aux1(goobj.AuxFuncdata, d) } - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { - w.aux1(goobj.AuxDwarfInfo, s.Func.dwarfInfoSym) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + w.aux1(goobj.AuxDwarfInfo, fn.dwarfInfoSym) } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { - w.aux1(goobj.AuxDwarfLoc, s.Func.dwarfLocSym) + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + w.aux1(goobj.AuxDwarfLoc, fn.dwarfLocSym) } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { - w.aux1(goobj.AuxDwarfRanges, s.Func.dwarfRangesSym) + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + w.aux1(goobj.AuxDwarfRanges, fn.dwarfRangesSym) } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { - w.aux1(goobj.AuxDwarfLines, s.Func.dwarfDebugLinesSym) + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + w.aux1(goobj.AuxDwarfLines, fn.dwarfDebugLinesSym) } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { - w.aux1(goobj.AuxPcsp, s.Func.Pcln.Pcsp) + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, fn.Pcln.Pcsp) } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { - w.aux1(goobj.AuxPcfile, s.Func.Pcln.Pcfile) + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, fn.Pcln.Pcfile) } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { - w.aux1(goobj.AuxPcline, s.Func.Pcln.Pcline) + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, fn.Pcln.Pcline) } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { - w.aux1(goobj.AuxPcinline, s.Func.Pcln.Pcinline) + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, fn.Pcln.Pcinline) } - for _, pcSym := range s.Func.Pcln.Pcdata { + for _, pcSym := range fn.Pcln.Pcdata { w.aux1(goobj.AuxPcdata, pcSym) } @@ -571,34 +571,34 @@ func nAuxSym(s *LSym) int { if s.Gotype != nil { n++ } - if s.Func != nil { + if fn := s.Func(); fn != nil { // FuncInfo is an aux symbol, each Funcdata is an aux symbol - n += 1 + len(s.Func.Pcln.Funcdata) - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { + n += 1 + len(fn.Pcln.Funcdata) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { n++ } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { n++ } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { n++ } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { n++ } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { n++ } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { n++ } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { n++ } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { n++ } - n += len(s.Func.Pcln.Pcdata) + n += len(fn.Pcln.Pcdata) } return n } @@ -620,15 +620,16 @@ func genFuncInfoSyms(ctxt *Link) { var b bytes.Buffer symidx := int32(len(ctxt.defs)) for _, s := range ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } o := goobj.FuncInfo{ - Args: uint32(s.Func.Args), - Locals: uint32(s.Func.Locals), - FuncID: objabi.FuncID(s.Func.FuncID), + Args: uint32(fn.Args), + Locals: uint32(fn.Locals), + FuncID: objabi.FuncID(fn.FuncID), } - pc := &s.Func.Pcln + pc := &fn.Pcln o.Pcsp = makeSymRef(preparePcSym(pc.Pcsp)) o.Pcfile = makeSymRef(preparePcSym(pc.Pcfile)) o.Pcline = makeSymRef(preparePcSym(pc.Pcline)) @@ -670,10 +671,10 @@ func genFuncInfoSyms(ctxt *Link) { isym.Set(AttrIndexed, true) symidx++ infosyms = append(infosyms, isym) - s.Func.FuncInfoSym = isym + fn.FuncInfoSym = isym b.Reset() - dwsyms := []*LSym{s.Func.dwarfRangesSym, s.Func.dwarfLocSym, s.Func.dwarfDebugLinesSym, s.Func.dwarfInfoSym} + dwsyms := []*LSym{fn.dwarfRangesSym, fn.dwarfLocSym, fn.dwarfDebugLinesSym, fn.dwarfInfoSym} for _, s := range dwsyms { if s == nil || s.Size == 0 { continue @@ -744,14 +745,15 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { } fmt.Fprintf(ctxt.Bso, "size=%d", s.Size) if s.Type == objabi.STEXT { - fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(s.Func.Args), uint64(s.Func.Locals), uint64(s.Func.FuncID)) + fn := s.Func() + fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(fn.Args), uint64(fn.Locals), uint64(fn.FuncID)) if s.Leaf() { fmt.Fprintf(ctxt.Bso, " leaf") } } fmt.Fprintf(ctxt.Bso, "\n") if s.Type == objabi.STEXT { - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc))) if ctxt.Debugasm > 1 { io.WriteString(ctxt.Bso, p.String()) diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index 09d520b4e9..01657dd4f6 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -118,7 +118,7 @@ func checkaddr(ctxt *Link, p *Prog, a *Addr) { } func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { checkaddr(ctxt, p, &p.From) if p.GetFrom3() != nil { checkaddr(ctxt, p, p.GetFrom3()) @@ -138,7 +138,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { if p.To.Sym != nil { continue } - q := sym.Func.Text + q := sym.Func().Text for q != nil && p.To.Offset != q.Pc { if q.Forwd != nil && p.To.Offset >= q.Forwd.Pc { q = q.Forwd @@ -164,7 +164,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { } // Collapse series of jumps to jumps. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { if p.To.Target() == nil { continue } diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index ce0d3714c0..67c4f9a62b 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -35,20 +35,21 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, val := int32(-1) oldval := val - if func_.Func.Text == nil { + fn := func_.Func() + if fn.Text == nil { // Return the emtpy symbol we've built so far. return sym } - pc := func_.Func.Text.Pc + pc := fn.Text.Pc if dbg { - ctxt.Logf("%6x %6d %v\n", uint64(pc), val, func_.Func.Text) + ctxt.Logf("%6x %6d %v\n", uint64(pc), val, fn.Text) } buf := make([]byte, binary.MaxVarintLen32) started := false - for p := func_.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { // Update val. If it's not changing, keep going. val = valfunc(ctxt, func_, val, p, 0, arg) @@ -107,7 +108,7 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, if started { if dbg { - ctxt.Logf("%6x done\n", uint64(func_.Func.Text.Pc+func_.Size)) + ctxt.Logf("%6x done\n", uint64(fn.Text.Pc+func_.Size)) } v := (func_.Size - pc) / int64(ctxt.Arch.MinLC) if v < 0 { @@ -257,12 +258,12 @@ func pctopcdata(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg i } func linkpcln(ctxt *Link, cursym *LSym) { - pcln := &cursym.Func.Pcln + pcln := &cursym.Func().Pcln pcln.UsedFiles = make(map[goobj.CUFileIndex]struct{}) npcdata := 0 nfuncdata := 0 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { // Find the highest ID of any used PCDATA table. This ignores PCDATA table // that consist entirely of "-1", since that's the assumed default value. // From.Offset is table ID @@ -288,11 +289,12 @@ func linkpcln(ctxt *Link, cursym *LSym) { // Check that all the Progs used as inline markers are still reachable. // See issue #40473. - inlMarkProgs := make(map[*Prog]struct{}, len(cursym.Func.InlMarks)) - for _, inlMark := range cursym.Func.InlMarks { + fn := cursym.Func() + inlMarkProgs := make(map[*Prog]struct{}, len(fn.InlMarks)) + for _, inlMark := range fn.InlMarks { inlMarkProgs[inlMark.p] = struct{}{} } - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if _, ok := inlMarkProgs[p]; ok { delete(inlMarkProgs, p) } @@ -303,7 +305,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { pcinlineState := new(pcinlineState) pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) - for _, inlMark := range cursym.Func.InlMarks { + for _, inlMark := range fn.InlMarks { pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) } pcln.InlTree = pcinlineState.localTree @@ -316,7 +318,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // tabulate which pc and func data we have. havepc := make([]uint32, (npcdata+31)/32) havefunc := make([]uint32, (nfuncdata+31)/32) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As == AFUNCDATA { if (havefunc[p.From.Offset/32]>>uint64(p.From.Offset%32))&1 != 0 { ctxt.Diag("multiple definitions for FUNCDATA $%d", p.From.Offset) @@ -344,7 +346,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // funcdata if nfuncdata > 0 { - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As != AFUNCDATA { continue } diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index 6e33f29959..eb54c67f6a 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -81,7 +81,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string continue } found := false - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.As == AFUNCDATA && p.From.Type == TYPE_CONST && p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps { found = true break @@ -89,7 +89,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string } if !found { - p := Appendp(s.Func.Text, newprog) + p := Appendp(s.Func().Text, newprog) p.As = AFUNCDATA p.From.Type = TYPE_CONST p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps @@ -120,15 +120,15 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { // func _() { } return } - if s.Func != nil { + if s.Func() != nil { ctxt.Diag("InitTextSym double init for %s", s.Name) } - s.Func = new(FuncInfo) + s.NewFuncInfo() if s.OnList() { ctxt.Diag("symbol %s listed multiple times", s.Name) } name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1) - s.Func.FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) + s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) s.Set(AttrOnList, true) s.Set(AttrDuplicateOK, flag&DUPOK != 0) s.Set(AttrNoSplit, flag&NOSPLIT != 0) @@ -185,7 +185,7 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit stack map. func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_StackMapIndex @@ -198,7 +198,7 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit register map. func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_RegMapIndex diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c2e8e9e9d0..dcabb3cd6a 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -663,8 +663,8 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // the function alignment is not changed which might // result in 16 byte alignment but that is still fine. // TODO: alignment on AIX - if ctxt.Headtype != objabi.Haix && cursym.Func.Align < 32 { - cursym.Func.Align = 32 + if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 { + cursym.Func().Align = 32 } default: ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) @@ -673,7 +673,7 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { } func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -722,7 +722,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -784,7 +784,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index c012762a18..3ab19de602 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -402,13 +402,13 @@ func (c *ctxt9) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt9{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -424,8 +424,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -435,7 +435,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var q *obj.Prog var q1 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -541,7 +541,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -598,7 +598,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -664,7 +664,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { rel.Type = objabi.R_ADDRPOWER_PCREL } - if !c.cursym.Func.Text.From.Sym.NoSplit() { + if !c.cursym.Func().Text.From.Sym.NoSplit() { q = c.stacksplit(q, autosize) // emit split check } @@ -732,14 +732,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = 24 } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -853,7 +853,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 || c.cursym.Name == "runtime.racecallbackthunk" { p.As = ABR p.From = obj.Addr{} @@ -1161,7 +1161,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { var morestacksym *obj.LSym if c.cursym.CFunc() { morestacksym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { morestacksym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { morestacksym = c.ctxt.Lookup("runtime.morestack") diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 841b30d85c..045c2250b5 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -427,7 +427,7 @@ func InvertBranch(as obj.As) obj.As { // instruction. Must be called after progedit. func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ACALL: return true @@ -499,12 +499,12 @@ func stackOffset(a *obj.Addr, stacksize int64) { // concrete, real RISC-V instructions or directive pseudo-ops like TEXT, // PCDATA, and FUNCDATA. func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } // Generate the prologue. - text := cursym.Func.Text + text := cursym.Func().Text if text.As != obj.ATEXT { ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") return @@ -538,12 +538,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { stacksize += ctxt.FixedFrameSize() } - cursym.Func.Args = text.To.Val.(int32) - cursym.Func.Locals = int32(stacksize) + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) prologue := text - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check } @@ -567,7 +567,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), X11 @@ -647,13 +647,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Update stack-based offsets. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { stackOffset(&p.From, stacksize) stackOffset(&p.To, stacksize) } // Additional instruction rewriting. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.AGETCALLERPC: if cursym.Leaf() { @@ -733,7 +733,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Rewrite MOV pseudo-instructions. This cannot be done in // progedit, as SP offsets need to be applied before we split // up some of the Addrs. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: rewriteMOV(ctxt, newprog, p) @@ -741,7 +741,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Split immediates larger than 12-bits. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { // $imm, REG, TO case AADDI, AANDI, AORI, AXORI: @@ -858,9 +858,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { rescan := false - setPCs(cursym.Func.Text, 0) + setPCs(cursym.Func().Text, 0) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: if p.To.Type != obj.TYPE_BRANCH { @@ -917,7 +917,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Now that there are no long branches, resolve branch and jump targets. // At this point, instruction rewriting which changes the number of // instructions will break everything--don't do it! - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: switch p.To.Type { @@ -940,7 +940,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Validate all instructions - this provides nice error messages. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { for _, ins := range instructionsForProg(p) { ins.validate(ctxt) } @@ -1068,7 +1068,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.To.Type = obj.TYPE_BRANCH if cursym.CFunc() { p.To.Sym = ctxt.Lookup("runtime.morestackc") - } else if !cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = ctxt.Lookup("runtime.morestack") @@ -1083,7 +1083,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.As = AJAL p.To = obj.Addr{Type: obj.TYPE_BRANCH} p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} - p.To.SetTarget(cursym.Func.Text.Link) + p.To.SetTarget(cursym.Func().Text.Link) // placeholder for to_done's jump target p = obj.Appendp(p, newprog) @@ -1926,7 +1926,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var symcode []uint32 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AJALR: if p.To.Sym != nil { @@ -1981,7 +1981,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Arch.ByteOrder.PutUint32(p, symcode[i]) } - obj.MarkUnsafePoints(ctxt, cursym.Func.Text, newprog, isUnsafePoint, nil) + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) } func isUnsafePoint(p *obj.Prog) bool { diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index cb3a2c3196..da14dd3c41 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -447,7 +447,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -473,7 +473,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.cursym.R[nrelocs0+i] = obj.Reloc{} } c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { changed = true @@ -504,7 +504,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, nil) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, nil) } // Return whether p is an unsafe point. diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 625bb0f7b4..3af5425b36 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -205,13 +205,13 @@ func (c *ctxtz) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxtz{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -227,8 +227,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -237,7 +237,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { */ var q *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: q = p @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, ABCL: q = p - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -294,7 +294,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: @@ -356,19 +356,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -461,7 +461,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} @@ -696,7 +696,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -709,7 +709,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.As = ABR p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) return p } diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index e5d7b2cbfd..0182773f8e 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -358,7 +358,8 @@ func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) { } func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent *LSym, aux *LSym)) { - pc := &fsym.Func.Pcln + fninfo := fsym.Func() + pc := &fninfo.Pcln if flag&traverseAux == 0 { // NB: should it become necessary to walk aux sym reloc references // without walking the aux syms themselves, this can be changed. @@ -389,7 +390,8 @@ func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent fn(fsym, filesym) } } - dwsyms := []*LSym{fsym.Func.dwarfRangesSym, fsym.Func.dwarfLocSym, fsym.Func.dwarfDebugLinesSym, fsym.Func.dwarfInfoSym} + + dwsyms := []*LSym{fninfo.dwarfRangesSym, fninfo.dwarfLocSym, fninfo.dwarfDebugLinesSym, fninfo.dwarfInfoSym} for _, dws := range dwsyms { if dws == nil || dws.Size == 0 { continue diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index a9e093a8ad..f7f66a1255 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -182,14 +182,14 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { return p } - framesize := s.Func.Text.To.Offset + framesize := s.Func().Text.To.Offset if framesize < 0 { panic("bad framesize") } - s.Func.Args = s.Func.Text.To.Val.(int32) - s.Func.Locals = int32(framesize) + s.Func().Args = s.Func().Text.To.Val.(int32) + s.Func().Locals = int32(framesize) - if s.Func.Text.From.Sym.Wrapper() { + if s.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -222,7 +222,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { Offset: 0, // panic.argp } - p := s.Func.Text + p := s.Func().Text p = appendp(p, AMOVD, gpanic, regAddr(REG_R0)) p = appendp(p, AGet, regAddr(REG_R0)) @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } if framesize > 0 { - p := s.Func.Text + p := s.Func().Text p = appendp(p, AGet, regAddr(REG_SP)) p = appendp(p, AI32Const, constAddr(framesize)) p = appendp(p, AI32Sub) @@ -260,8 +260,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction var tableIdxs []uint64 tablePC := int64(0) - base := ctxt.PosTable.Pos(s.Func.Text.Pos).Base() - for p := s.Func.Text; p != nil; p = p.Link { + base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base() + for p := s.Func().Text; p != nil; p = p.Link { prevBase := base base = ctxt.PosTable.Pos(p.Pos).Base() switch p.As { @@ -313,8 +313,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { tableIdxs = append(tableIdxs, uint64(numResumePoints)) s.Size = pc + 1 - if !s.Func.Text.From.Sym.NoSplit() { - p := s.Func.Text + if !s.Func().Text.From.Sym.NoSplit() { + p := s.Func().Text if framesize <= objabi.StackSmall { // small stack: SP <= stackguard @@ -352,7 +352,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { p = appendp(p, AIf) p = appendp(p, obj.ACALL, constAddr(0)) - if s.Func.Text.From.Sym.NeedCtxt() { + if s.Func().Text.From.Sym.NeedCtxt() { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack} } else { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt} @@ -365,7 +365,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var entryPointLoopBranches []*obj.Prog var unwindExitBranches []*obj.Prog currentDepth := 0 - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -562,7 +562,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.From.Name { case obj.NAME_AUTO: p.From.Offset += int64(framesize) @@ -712,7 +712,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } { - p := s.Func.Text + p := s.Func().Text if len(unwindExitBranches) > 0 { p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack for _, b := range unwindExitBranches { @@ -749,7 +749,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { currentDepth = 0 blockDepths := make(map[*obj.Prog]int) - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -850,7 +850,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { hasLocalSP = true var regUsed [MAXREG - MINREG]bool - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.From.Reg != 0 { regUsed[p.From.Reg-MINREG] = true } @@ -896,7 +896,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { updateLocalSP(w) } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case AGet: if p.From.Type != obj.TYPE_REG { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 4940c79eaa..c412f4945d 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2050,7 +2050,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("x86 tables not initialized, call x86.instinit first") } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { p.To.SetTarget(p) } @@ -2085,7 +2085,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } var count int64 // rough count of number of instructions - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { count++ p.Back = branchShort // use short branches first time through if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { @@ -2113,7 +2113,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { c = 0 var pPrev *obj.Prog nops = nops[:0] - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { c0 := c c = pjc.padJump(ctxt, s, p, c) @@ -2227,7 +2227,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // the first instruction.) return p.From.Index == REG_TLS } - obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil) + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) } } diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 18a6afcd77..e11fa13f65 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -563,11 +563,11 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } - p := cursym.Func.Text + p := cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 @@ -602,12 +602,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } textarg := int64(p.To.Val.(int32)) - cursym.Func.Args = int32(textarg) - cursym.Func.Locals = int32(p.To.Offset) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) // TODO(rsc): Remove. - if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { - cursym.Func.Locals = 0 + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 } // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. @@ -642,7 +642,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p = load_g_cx(ctxt, p, newprog) // load g into CX } - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check } @@ -690,7 +690,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REG_BP } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -808,7 +808,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var deltasp int32 - for p = cursym.Func.Text; p != nil; p = p.Link { + for p = cursym.Func().Text; p != nil; p = p.Link { pcsize := ctxt.Arch.RegSize switch p.From.Name { case obj.NAME_AUTO: @@ -1103,7 +1103,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA end := ctxt.EndUnsafePoint(jls, newprog, -1) var last *obj.Prog - for last = cursym.Func.Text; last.Link != nil; last = last.Link { + for last = cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -1117,7 +1117,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA pcdata = ctxt.StartUnsafePoint(pcdata, newprog) call := obj.Appendp(pcdata, newprog) - call.Pos = cursym.Func.Text.Pos + call.Pos = cursym.Func().Text.Pos call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH call.To.Name = obj.NAME_EXTERN @@ -1125,7 +1125,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA switch { case cursym.CFunc(): morestack = "runtime.morestackc" - case !cursym.Func.Text.From.Sym.NeedCtxt(): + case !cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = ctxt.Lookup(morestack) @@ -1144,7 +1144,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA jmp := obj.Appendp(pcdata, newprog) jmp.As = obj.AJMP jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(cursym.Func.Text.Link) + jmp.To.SetTarget(cursym.Func().Text.Link) jmp.Spadj = +framesize jls.To.SetTarget(call) -- cgit v1.3 From e981936855383883edb5fcc85a196c485b15f0f9 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Tue, 6 Oct 2020 17:08:31 -0400 Subject: cmd/internal/obj/ppc64,cmd/asm/internal/asm/testdata: fix up ppc64 testcases When a fix was made at the end of the last release related to NOPs, it was discovered that the ppc64.s testcase was out of date and contained comments that weren't being processed. Essentially the instructions in that test were being assembled but there was no verification that the encodings weres correct. The ppc64enc.s file was mostly complete and included the valid encodings for verification. This change moves ppc64enc.s to ppc64.s and adds the instructions that were missing. This also adds a minor fix to asm9.go on the assembly of the addex that was discovered during this testing. Change-Id: Iaada1563b137849ad195fa88f32ecc9ab3e1e95f Reviewed-on: https://go-review.googlesource.com/c/go/+/260217 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Lynn Boger --- src/cmd/asm/internal/asm/endtoend_test.go | 4 - src/cmd/asm/internal/asm/testdata/ppc64.s | 2006 +++++++++----------------- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 642 --------- src/cmd/internal/obj/ppc64/asm9.go | 2 +- 4 files changed, 707 insertions(+), 1947 deletions(-) delete mode 100644 src/cmd/asm/internal/asm/testdata/ppc64enc.s (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index b21e3156ae..decf5391db 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -442,10 +442,6 @@ func TestPPC64EndToEnd(t *testing.T) { testEndToEnd(t, "ppc64", "ppc64") } -func TestPPC64Encoder(t *testing.T) { - testEndToEnd(t, "ppc64", "ppc64enc") -} - func TestRISCVEncoder(t *testing.T) { testEndToEnd(t, "riscv64", "riscvenc") } diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index ba64d84a35..2b1191c44b 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -2,1311 +2,717 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This input was created by taking the instruction productions in -// the old assembler's (9a's) grammar and hand-writing complete -// instructions for each rule, to guarantee we cover the same space. +// This contains the majority of valid opcode combinations +// available in cmd/internal/obj/ppc64/asm9.go with +// their valid instruction encodings. #include "../../../../../runtime/textflag.h" -TEXT foo(SB),DUPOK|NOSPLIT,$0 +TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + // move constants + MOVD $1, R3 // 38600001 + MOVD $-1, R4 // 3880ffff + MOVD $65535, R5 // 6005ffff + MOVD $65536, R6 // 64060001 + MOVD $-32767, R5 // 38a08001 + MOVD $-32768, R6 // 38c08000 + MOVD $1234567, R5 // 6405001260a5d687 + MOVW $1, R3 // 38600001 + MOVW $-1, R4 // 3880ffff + MOVW $65535, R5 // 6005ffff + MOVW $65536, R6 // 64060001 + MOVW $-32767, R5 // 38a08001 + MOVW $-32768, R6 // 38c08000 + MOVW $1234567, R5 // 6405001260a5d687 + MOVD 8(R3), R4 // e8830008 + MOVD (R3)(R4), R5 // 7ca4182a + MOVW 4(R3), R4 // e8830006 + MOVW (R3)(R4), R5 // 7ca41aaa + MOVWZ 4(R3), R4 // 80830004 + MOVWZ (R3)(R4), R5 // 7ca4182e + MOVH 4(R3), R4 // a8830004 + MOVH (R3)(R4), R5 // 7ca41aae + MOVHZ 2(R3), R4 // a0830002 + MOVHZ (R3)(R4), R5 // 7ca41a2e + MOVB 1(R3), R4 // 888300017c840774 + MOVB (R3)(R4), R5 // 7ca418ae7ca50774 + MOVBZ 1(R3), R4 // 88830001 + MOVBZ (R3)(R4), R5 // 7ca418ae + MOVDBR (R3)(R4), R5 // 7ca41c28 + MOVWBR (R3)(R4), R5 // 7ca41c2c + MOVHBR (R3)(R4), R5 // 7ca41e2c + + MOVDU 8(R3), R4 // e8830009 + MOVDU (R3)(R4), R5 // 7ca4186a + MOVWU (R3)(R4), R5 // 7ca41aea + MOVWZU 4(R3), R4 // 84830004 + MOVWZU (R3)(R4), R5 // 7ca4186e + MOVHU 2(R3), R4 // ac830002 + MOVHU (R3)(R4), R5 // 7ca41aee + MOVHZU 2(R3), R4 // a4830002 + MOVHZU (R3)(R4), R5 // 7ca41a6e + MOVBU 1(R3), R4 // 8c8300017c840774 + MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 + MOVBZU 1(R3), R4 // 8c830001 + MOVBZU (R3)(R4), R5 // 7ca418ee + + MOVD R4, 8(R3) // f8830008 + MOVD R5, (R3)(R4) // 7ca4192a + MOVW R4, 4(R3) // 90830004 + MOVW R5, (R3)(R4) // 7ca4192e + MOVH R4, 2(R3) // b0830002 + MOVH R5, (R3)(R4) // 7ca41b2e + MOVB R4, 1(R3) // 98830001 + MOVB R5, (R3)(R4) // 7ca419ae + MOVDBR R5, (R3)(R4) // 7ca41d28 + MOVWBR R5, (R3)(R4) // 7ca41d2c + MOVHBR R5, (R3)(R4) // 7ca41f2c + + MOVDU R4, 8(R3) // f8830009 + MOVDU R5, (R3)(R4) // 7ca4196a + MOVWU R4, 4(R3) // 94830004 + MOVWU R5, (R3)(R4) // 7ca4196e + MOVHU R4, 2(R3) // b4830002 + MOVHU R5, (R3)(R4) // 7ca41b6e + MOVBU R4, 1(R3) // 9c830001 + MOVBU R5, (R3)(R4) // 7ca419ee + + ADD $1, R3 // 38630001 + ADD $1, R3, R4 // 38830001 + ADD $-1, R4 // 3884ffff + ADD $-1, R4, R5 // 38a4ffff + ADD $65535, R5 // 601fffff7cbf2a14 + ADD $65535, R5, R6 // 601fffff7cdf2a14 + ADD $65536, R6 // 3cc60001 + ADD $65536, R6, R7 // 3ce60001 + ADD $-32767, R5 // 38a58001 + ADD $-32767, R5, R4 // 38858001 + ADD $-32768, R6 // 38c68000 + ADD $-32768, R6, R5 // 38a68000 + ADD $1234567, R5 // 641f001263ffd6877cbf2a14 + ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 + ADDEX R3, R5, $3, R6 // 7cc32f54 + ADDIS $8, R3 // 3c630008 + ADDIS $1000, R3, R4 // 3c8303e8 + + ANDCC $1, R3 // 70630001 + ANDCC $1, R3, R4 // 70640001 + ANDCC $-1, R4 // 3be0ffff7fe42039 + ANDCC $-1, R4, R5 // 3be0ffff7fe52039 + ANDCC $65535, R5 // 70a5ffff + ANDCC $65535, R5, R6 // 70a6ffff + ANDCC $65536, R6 // 74c60001 + ANDCC $65536, R6, R7 // 74c70001 + ANDCC $-32767, R5 // 3be080017fe52839 + ANDCC $-32767, R5, R4 // 3be080017fe42839 + ANDCC $-32768, R6 // 3be080007fe63039 + ANDCC $-32768, R5, R6 // 3be080007fe62839 + ANDCC $1234567, R5 // 641f001263ffd6877fe52839 + ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 + ANDISCC $1, R3 // 74630001 + ANDISCC $1000, R3, R4 // 746403e8 + + OR $1, R3 // 60630001 + OR $1, R3, R4 // 60640001 + OR $-1, R4 // 3be0ffff7fe42378 + OR $-1, R4, R5 // 3be0ffff7fe52378 + OR $65535, R5 // 60a5ffff + OR $65535, R5, R6 // 60a6ffff + OR $65536, R6 // 64c60001 + OR $65536, R6, R7 // 64c70001 + OR $-32767, R5 // 3be080017fe52b78 + OR $-32767, R5, R6 // 3be080017fe62b78 + OR $-32768, R6 // 3be080007fe63378 + OR $-32768, R6, R7 // 3be080007fe73378 + OR $1234567, R5 // 641f001263ffd6877fe52b78 + OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 + ORIS $255, R3, R4 + + XOR $1, R3 // 68630001 + XOR $1, R3, R4 // 68640001 + XOR $-1, R4 // 3be0ffff7fe42278 + XOR $-1, R4, R5 // 3be0ffff7fe52278 + XOR $65535, R5 // 68a5ffff + XOR $65535, R5, R6 // 68a6ffff + XOR $65536, R6 // 6cc60001 + XOR $65536, R6, R7 // 6cc70001 + XOR $-32767, R5 // 3be080017fe52a78 + XOR $-32767, R5, R6 // 3be080017fe62a78 + XOR $-32768, R6 // 3be080007fe63278 + XOR $-32768, R6, R7 // 3be080007fe73278 + XOR $1234567, R5 // 641f001263ffd6877fe52a78 + XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 + XORIS $15, R3, R4 + + // TODO: the order of CR operands don't match + CMP R3, R4 // 7c232000 + CMPU R3, R4 // 7c232040 + CMPW R3, R4 // 7c032000 + CMPWU R3, R4 // 7c032040 + CMPB R3,R4,R4 // 7c6423f8 + CMPEQB R3,R4,CR6 // 7f0321c0 + + // TODO: constants for ADDC? + ADD R3, R4 // 7c841a14 + ADD R3, R4, R5 // 7ca41a14 + ADDC R3, R4 // 7c841814 + ADDC R3, R4, R5 // 7ca41814 + ADDE R3, R4 // 7c841914 + ADDECC R3, R4 // 7c841915 + ADDEV R3, R4 // 7c841d14 + ADDEVCC R3, R4 // 7c841d15 + ADDV R3, R4 // 7c841e14 + ADDVCC R3, R4 // 7c841e15 + ADDCCC R3, R4, R5 // 7ca41815 + ADDME R3, R4 // 7c8301d4 + ADDMECC R3, R4 // 7c8301d5 + ADDMEV R3, R4 // 7c8305d4 + ADDMEVCC R3, R4 // 7c8305d5 + ADDCV R3, R4 // 7c841c14 + ADDCVCC R3, R4 // 7c841c15 + ADDZE R3, R4 // 7c830194 + ADDZECC R3, R4 // 7c830195 + ADDZEV R3, R4 // 7c830594 + ADDZEVCC R3, R4 // 7c830595 + SUBME R3, R4 // 7c8301d0 + SUBMECC R3, R4 // 7c8301d1 + SUBMEV R3, R4 // 7c8305d0 + SUBZE R3, R4 // 7c830190 + SUBZECC R3, R4 // 7c830191 + SUBZEV R3, R4 // 7c830590 + SUBZEVCC R3, R4 // 7c830591 + + AND R3, R4 // 7c841838 + AND R3, R4, R5 // 7c851838 + ANDN R3, R4, R5 // 7c851878 + ANDCC R3, R4, R5 // 7c851839 + OR R3, R4 // 7c841b78 + OR R3, R4, R5 // 7c851b78 + ORN R3, R4, R5 // 7c851b38 + ORCC R3, R4, R5 // 7c851b79 + XOR R3, R4 // 7c841a78 + XOR R3, R4, R5 // 7c851a78 + XORCC R3, R4, R5 // 7c851a79 + NAND R3, R4, R5 // 7c851bb8 + NANDCC R3, R4, R5 // 7c851bb9 + EQV R3, R4, R5 // 7c851a38 + EQVCC R3, R4, R5 // 7c851a39 + NOR R3, R4, R5 // 7c8518f8 + NORCC R3, R4, R5 // 7c8518f9 + + SUB R3, R4 // 7c832050 + SUB R3, R4, R5 // 7ca32050 + SUBC R3, R4 // 7c832010 + SUBC R3, R4, R5 // 7ca32010 + + MULLW R3, R4 // 7c8419d6 + MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 + + MULLWCC R3, R4, R5 // 7ca419d7 + MULHW R3, R4, R5 // 7ca41896 + + MULHWU R3, R4, R5 // 7ca41816 + MULLD R3, R4 // 7c8419d2 + MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 + + MULLDCC R3, R4, R5 // 7ca419d3 + MULHD R3, R4, R5 // 7ca41892 + MULHDCC R3, R4, R5 // 7ca41893 + + MULLWV R3, R4 // 7c841dd6 + MULLWV R3, R4, R5 // 7ca41dd6 + MULLWVCC R3, R4, R5 // 7ca41dd7 + MULHWUCC R3, R4, R5 // 7ca41817 + MULLDV R3, R4, R5 // 7ca41dd2 + MULLDVCC R3, R4, R5 // 7ca41dd3 + + DIVD R3,R4 // 7c841bd2 + DIVD R3, R4, R5 // 7ca41bd2 + DIVDCC R3,R4, R5 // 7ca41bd3 + DIVDU R3, R4, R5 // 7ca41b92 + DIVDV R3, R4, R5 // 7ca41fd2 + DIVDUCC R3, R4, R5 // 7ca41b93 + DIVDVCC R3, R4, R5 // 7ca41fd3 + DIVDUV R3, R4, R5 // 7ca41f92 + DIVDUVCC R3, R4, R5 // 7ca41f93 + DIVDE R3, R4, R5 // 7ca41b52 + DIVDECC R3, R4, R5 // 7ca41b53 + DIVDEU R3, R4, R5 // 7ca41b12 + DIVDEUCC R3, R4, R5 // 7ca41b13 + + REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 + REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 + REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 + REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 + + MADDHD R3,R4,R5,R6 // 10c32170 + MADDHDU R3,R4,R5,R6 // 10c32171 + + MODUD R3, R4, R5 // 7ca41a12 + MODUW R3, R4, R5 // 7ca41a16 + MODSD R3, R4, R5 // 7ca41e12 + MODSW R3, R4, R5 // 7ca41e16 + + SLW $8, R3, R4 // 5464402e + SLW R3, R4, R5 // 7c851830 + SLWCC R3, R4 // 7c841831 + SLD $16, R3, R4 // 786483e4 + SLD R3, R4, R5 // 7c851836 + SLDCC R3, R4 // 7c841837 + + SRW $8, R3, R4 // 5464c23e + SRW R3, R4, R5 // 7c851c30 + SRWCC R3, R4 // 7c841c31 + SRAW $8, R3, R4 // 7c644670 + SRAW R3, R4, R5 // 7c851e30 + SRAWCC R3, R4 // 7c841e31 + SRD $16, R3, R4 // 78648402 + SRD R3, R4, R5 // 7c851c36 + SRDCC R3, R4 // 7c841c37 + SRAD $16, R3, R4 // 7c648674 + SRAD R3, R4, R5 // 7c851e34 + SRDCC R3, R4 // 7c841c37 + ROTLW $16, R3, R4 // 5464803e + ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 + RLWMI $7, R3, $65535, R6 // 50663c3e + RLWMICC $7, R3, $65535, R6 // 50663c3f + RLWNM $3, R4, $7, R6 // 54861f7e + RLWNMCC $3, R4, $7, R6 // 54861f7f + RLDMI $0, R4, $7, R6 // 7886076c + RLDMICC $0, R4, $7, R6 // 7886076d + RLDIMI $0, R4, $7, R6 // 788601cc + RLDIMICC $0, R4, $7, R6 // 788601cd + RLDC $0, R4, $15, R6 // 78860728 + RLDCCC $0, R4, $15, R6 // 78860729 + RLDCL $0, R4, $7, R6 // 78860770 + RLDCLCC $0, R4, $15, R6 // 78860721 + RLDCR $0, R4, $-16, R6 // 788606f2 + RLDCRCC $0, R4, $-16, R6 // 788606f3 + RLDICL $0, R4, $15, R6 // 788603c0 + RLDICLCC $0, R4, $15, R6 // 788603c1 + RLDICR $0, R4, $15, R6 // 788603c4 + RLDICRCC $0, R4, $15, R6 // 788603c5 + RLDIC $0, R4, $15, R6 // 788603c8 + RLDICCC $0, R4, $15, R6 // 788603c9 + CLRLSLWI $16, R5, $8, R4 // 54a4422e + CLRLSLDI $24, R4, $2, R3 // 78831588 + + BEQ 0(PC) // 41820000 + BEQ CR1,0(PC) // 41860000 + BGE 0(PC) // 40800000 + BGE CR2,0(PC) // 40880000 + BGT 4(PC) // 41810010 + BGT CR3,4(PC) // 418d0010 + BLE 0(PC) // 40810000 + BLE CR4,0(PC) // 40910000 + BLT 0(PC) // 41800000 + BLT CR5,0(PC) // 41940000 + BNE 0(PC) // 40820000 + BLT CR6,0(PC) // 41980000 + JMP 8(PC) // 48000010 -//inst: -// -// load ints and bytes -// -// LMOVW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW foo<>+4(SB), R2 - MOVW 16(R1), R2 - -// LMOVW regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW (R1), R2 - MOVW (R1+R2), R3 // MOVW (R1)(R2*1), R3 - -// LMOVB rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVB addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB foo<>+3(SB), R2 - MOVB 16(R1), R2 - -// LMOVB regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB (R1), R2 - MOVB (R1+R2), R3 // MOVB (R1)(R2*1), R3 - -// -// load floats -// -// LFMOV addr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD foo<>+4(SB), F2 - FMOVD 16(R1), F2 - -// LFMOV regaddr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD (R1), F2 - -// LFMOV fimm ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD $0.1, F2 // FMOVD $(0.10000000000000001), F2 - -// LFMOV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, F2 - -// LFMOV freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, foo<>+4(SB) - FMOVD F2, 16(R1) - -// LFMOV freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, (R1) - -// -// store ints and bytes -// -// LMOVW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, foo<>+3(SB) - MOVW R1, 16(R2) - -// LMOVW rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, (R1) - MOVW R1, (R2+R3) // MOVW R1, (R2)(R3*1) - -// LMOVB rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, foo<>+3(SB) - MOVB R1, 16(R2) - -// LMOVB rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, (R1) - MOVB R1, (R2+R3) // MOVB R1, (R2)(R3*1) -// -// store floats -// -// LMOVW freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, foo<>+4(SB) - FMOVD F1, 16(R2) - -// LMOVW freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, (R1) - -// -// floating point status -// -// LMOVW fpscr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, F1 - -// LMOVW freg ',' fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL F1, FPSCR - -// LMOVW freg ',' imm ',' fpscr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - MOVFL F1, $4, FPSCR - -// LMOVW fpscr ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, CR0 - -// LMTFSB imm ',' con -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -//TODO 9a doesn't work MTFSB0 $4, 4 - -// -// field moves (mtcrf) -// -// LMOVW rreg ',' imm ',' lcr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -// TODO 9a doesn't work MOVFL R1,$4,CR - -// LMOVW rreg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR1 - -// LMOVW rreg ',' lcr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR - -// -// integer operations -// logical instructions -// shift instructions -// unary instructions -// -// LADDW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD R1, R2, R3 - -// LADDW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD $1, R2, R3 - -// LADDW rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -//TODO 9a trouble ADD R1, $2, R3 maybe swap rreg and imm - -// LADDW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD R1, R2 - -// LADDW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD $4, R1 - -// LLOGW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADDE R1, R2, R3 - -// LLOGW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADDE R1, R2 - -// LSHW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW R1, R2, R3 - -// LSHW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW R1, R2 - -// LSHW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW $4, R1, R2 - -// LSHW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg -// { -// outcode(int($1), &$2, 0, &$2); -// } - SUBME R1 // SUBME R1, R1 - -// -// multiply-accumulate -// -// LMA rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO this instruction is undefined in lex.go LMA R1, R2, R3 NOT SUPPORTED (called MAC) - -// -// move immediate: macro for cau+or, addi, addis, and other combinations -// -// LMOVW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - -// LMOVW ximm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - MOVW $foo(SB), R1 - -// condition register operations -// -// LCROP cbit ',' cbit -// { -// outcode(int($1), &$2, int($4.Reg), &$4); -// } -//TODO 9a trouble CREQV 1, 2 delete? liblink encodes like a divide (maybe wrong too) - -// LCROP cbit ',' con ',' cbit -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO 9a trouble CREQV 1, 2, 3 - -// -// condition register moves -// move from machine state register -// -// LMOVW creg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL CR0, CR1 - -// LMOVW psr ',' creg // TODO: should psr should be fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW FPSCR, CR1 - -// LMOVW lcr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW CR, R1 - -// LMOVW psr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW SPR(0), R1 - MOVW SPR(7), R1 - -// LMOVW xlreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW LR, R1 - MOVW CTR, R1 - -// LMOVW rreg ',' xlreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, LR - MOVW R1, CTR - -// LMOVW creg ',' psr // TODO doesn't exist -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW CR1, SPR(7) - -// LMOVW rreg ',' psr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, SPR(7) - -// -// branch, branch conditional -// branch conditional register -// branch conditional to count register -// -// LBRA rel -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) -label0: - BR 1(PC) // JMP 1(PC) - BEQ CR1, 2(PC) - BR label0+0 // JMP 62 - -// LBRA addr -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) - BR LR // JMP LR - BEQ CR1, 2(PC) -// BR 0(R1) // TODO should work - BEQ CR1, 2(PC) - BR foo+0(SB) // JMP foo(SB) - -// LBRA '(' xlreg ')' -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - BEQ CR1, 2(PC) - BR (CTR) // JMP CTR - -// LBRA ',' rel // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' addr // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' '(' xlreg ')' // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$4); -// } -// LBRA creg ',' rel -// { -// outcode(int($1), &$2, 0, &$4); -// } -label1: - BEQ CR1, 1(PC) - BEQ CR1, label1 // BEQ CR1, 72 - -// LBRA creg ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$4); -// } - -// LBRA creg ',' '(' xlreg ')' // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$5); -// } - -// LBRA con ',' rel // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' '(' xlreg ')' -// { -// outcode(int($1), &nullgen, int($2), &$5); -// } -// BC 4, (CTR) // TODO - should work - -// LBRA con ',' con ',' rel -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -// BC 3, 4, label1 // TODO - should work - -// LBRA con ',' con ',' addr // TODO mystery -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -//TODO 9a trouble BC 3, 3, 4(R1) - -// LBRA con ',' con ',' '(' xlreg ')' -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$7); -// } - BC 3, 3, (LR) // BC $3, R3, LR - -// -// conditional trap // TODO NOT DEFINED -// TODO these instructions are not in lex.go -// -// LTRAP rreg ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP imm ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP rreg comma -// { -// outcode(int($1), &$2, 0, &nullgen); -// } -// LTRAP comma -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - -// -// floating point operate -// -// LFCONV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FABS F1, F2 - -// LFADD freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FADD F1, F2 - -// LFADD freg ',' freg ',' freg -// { -// outcode(int($1), &$2, int($4.Reg), &$6); -// } - FADD F1, F2, F3 - -// LFMA freg ',' freg ',' freg ',' freg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - FMADD F1, F2, F3, F4 - -// LFCMP freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FCMPU F1, F2 - -// LFCMP freg ',' freg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } -// FCMPU F1, F2, CR0 - -// FTDIV FRA, FRB, BF produces -// ftdiv BF, FRA, FRB - FTDIV F1,F2,$7 - -// FTSQRT FRB, BF produces -// ftsqrt BF, FRB - FTSQRT F2,$7 - -// FCFID -// FCFIDS - - FCFID F2,F3 - FCFIDCC F3,F3 - FCFIDS F2,F3 - FCFIDSCC F2,F3 - -// -// CMP -// -// LCMP rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, R2 - -// LCMP rreg ',' imm -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, $4 - -// LCMP rreg ',' rreg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, R2, CR0 // CMP R1, CR0, R2 - -// LCMP rreg ',' imm ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, $4, CR0 // CMP R1, CR0, $4 - -// CMPB RS,RB,RA produces -// cmpb RA,RS,RB - CMPB R2,R2,R1 - -// CMPEQB RA,RB,BF produces -// cmpeqb BF,RA,RB - CMPEQB R1, R2, CR0 - -// -// rotate extended mnemonics map onto other shift instructions -// - - ROTL $12,R2,R3 - ROTL R2,R3,R4 - ROTLW $9,R2,R3 - ROTLW R2,R3,R4 - -// -// rotate and mask -// -// LRLWM imm ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $4, R1, $16, R2 - -// LRLWM imm ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $26, R1, 4, 5, R2 // RLDC $26, R1, $201326592, R2 - -// LRLWM rreg ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDCL R1, R2, $7, R3 - -// LRLWM rreg ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLWMI R1, R2, 4, 5, R3 // RLWMI R1, R2, $201326592, R3 - - -// opcodes added with constant shift counts, not masks - - RLDICR $3, R2, $24, R4 - - RLDICL $1, R2, $61, R6 - - RLDIMI $7, R2, $52, R7 - -// opcodes for right and left shifts, const and reg shift counts - - SLD $4, R3, R4 - SLD R2, R3, R4 - SLW $4, R3, R4 - SLW R2, R3, R4 - SRD $8, R3, R4 - SRD R2, R3, R4 - SRW $8, R3, R4 - SRW R2, R3, R4 - -// -// load/store multiple -// -// LMOVMW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW foo+0(SB), R2 // TODO TLS broke this! - MOVMW 4(R1), R2 - -// LMOVMW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW R1, foo+0(SB) // TODO TLS broke this! - MOVMW R1, 4(R2) - -// -// various indexed load/store -// indexed unary (eg, cache clear) -// -// LXLD regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - LSW (R1), R2 - LSW (R1+R2), R3 // LSW (R1)(R2*1), R3 - -// LXLD regaddr ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - LSW (R1), $1, R2 - LSW (R1+R2), $1, R3 // LSW (R1)(R2*1), $1, R3 - -// LXST rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - STSW R1, (R2) - STSW R1, (R2+R3) // STSW R1, (R2)(R3*1) - -// LXST rreg ',' imm ',' regaddr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - STSW R1, $1, (R2) - STSW R1, $1, (R2+R3) // STSW R1, $1, (R2)(R3*1) - -// LXMV regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR (R1), R2 - MOVHBR (R1+R2), R3 // MOVHBR (R1)(R2*1), R3 - -// LXMV rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR R1, (R2) - MOVHBR R1, (R2+R3) // MOVHBR R1, (R2)(R3*1) - -// LXOP regaddr -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - DCBF (R1) - DCBF (R1+R2) // DCBF (R1)(R2*1) - DCBF (R1), $1 - DCBF (R1)(R2*1), $1 - DCBT (R1), $1 - DCBT (R1)(R2*1), $1 - -// LDMX (RB)(RA*1),RT produces -// ldmx RT,RA,RB - LDMX (R2)(R1*1), R3 - -// Population count, X-form -// RS,RA produces -// RA,RS - POPCNTD R1,R2 - POPCNTW R1,R2 - POPCNTB R1,R2 - -// Copysign - FCPSGN F1,F2,F3 - -// Random number generator, X-form -// DARN L,RT produces -// darn RT,L - DARN $1, R1 - -// Copy/Paste facility -// RB,RA produces -// RA,RB - COPY R2,R1 - PASTECC R2,R1 - -// Modulo signed/unsigned double/word X-form -// RA,RB,RT produces -// RT,RA,RB - MODUD R3,R4,R5 - MODUW R3,R4,R5 - MODSD R3,R4,R5 - MODSW R3,R4,R5 - -// VMX instructions - -// Described as: -// , -// produces -// - -// Vector load, VX-form -// (RB)(RA*1),VRT produces -// VRT,RA,RB - LVEBX (R1)(R2*1), V0 - LVEHX (R3)(R4*1), V1 - LVEWX (R5)(R6*1), V2 - LVX (R7)(R8*1), V3 - LVXL (R9)(R10*1), V4 - LVSL (R11)(R12*1), V5 - LVSR (R14)(R15*1), V6 - -// Vector store, VX-form -// VRT,(RB)(RA*1) produces -// VRT,RA,RB - STVEBX V31, (R1)(R2*1) - STVEHX V30, (R2)(R3*1) - STVEWX V29, (R4)(R5*1) - STVX V28, (R6)(R7*1) - STVXL V27, (R9)(R9*1) - -// Vector AND, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VAND V10, V9, V8 - VANDC V15, V14, V13 - VNAND V19, V18, V17 - -// Vector OR, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VOR V26, V25, V24 - VORC V23, V22, V21 - VNOR V20, V19, V18 - VXOR V17, V16, V15 - VEQV V14, V13, V12 - -// Vector ADD, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VADDUBM V3, V2, V1 - VADDUHM V3, V2, V1 - VADDUWM V3, V2, V1 - VADDUDM V3, V2, V1 - VADDUQM V3, V2, V1 - VADDCUQ V3, V2, V1 - VADDCUW V3, V2, V1 - VADDUBS V3, V2, V1 - VADDUHS V3, V2, V1 - VADDUWS V3, V2, V1 - VADDSBS V3, V2, V1 - VADDSHS V3, V2, V1 - VADDSWS V3, V2, V1 - -// Vector ADD extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VADDEUQM V4, V3, V2, V1 - VADDECUQ V4, V3, V2, V1 - -// Vector multiply, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VMULESB V2, V3, V1 - VMULOSB V2, V3, V1 - VMULEUB V2, V3, V1 - VMULOUB V2, V3, V1 - VMULESH V2, V3, V1 - VMULOSH V2, V3, V1 - VMULEUH V2, V3, V1 - VMULOUH V2, V3, V1 - VMULESW V2, V3, V1 - VMULOSW V2, V3, V1 - VMULEUW V2, V3, V1 - VMULOUW V2, V3, V1 - VMULUWM V2, V3, V1 - -// Vector polynomial multiply-sum, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VPMSUMB V2, V3, V1 - VPMSUMH V2, V3, V1 - VPMSUMW V2, V3, V1 - VPMSUMD V2, V3, V1 - -// Vector multiply-sum, VA-form -// VRA, VRB, VRC, VRT produces -// VRT, VRA, VRB, VRC - VMSUMUDM V4, V3, V2, V1 - -// Vector SUB, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VSUBUBM V3, V2, V1 - VSUBUHM V3, V2, V1 - VSUBUWM V3, V2, V1 - VSUBUDM V3, V2, V1 - VSUBUQM V3, V2, V1 - VSUBCUQ V3, V2, V1 - VSUBCUW V3, V2, V1 - VSUBUBS V3, V2, V1 - VSUBUHS V3, V2, V1 - VSUBUWS V3, V2, V1 - VSUBSBS V3, V2, V1 - VSUBSHS V3, V2, V1 - VSUBSWS V3, V2, V1 - -// Vector SUB extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSUBEUQM V4, V3, V2, V1 - VSUBECUQ V4, V3, V2, V1 - -// Vector rotate, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VRLB V2, V1, V0 - VRLH V2, V1, V0 - VRLW V2, V1, V0 - VRLD V2, V1, V0 - -// Vector shift, VX-form -// VRA,VRB,VRT -// VRT,VRA,VRB - VSLB V2, V1, V0 - VSLH V2, V1, V0 - VSLW V2, V1, V0 - VSL V2, V1, V0 - VSLO V2, V1, V0 - VSRB V2, V1, V0 - VSRH V2, V1, V0 - VSRW V2, V1, V0 - VSR V2, V1, V0 - VSRO V2, V1, V0 - VSLD V2, V1, V0 - VSRD V2, V1, V0 - VSRAB V2, V1, V0 - VSRAH V2, V1, V0 - VSRAW V2, V1, V0 - VSRAD V2, V1, V0 - -// Vector shift by octect immediate, VA-form with SHB 4-bit field -// SHB,VRA,VRB,VRT produces -// VRT,VRA,VRB,SHB - VSLDOI $4, V2, V1, V0 - -// Vector merge odd and even word -// VRA,VRB,VRT produces -// VRT,VRA,VRB - - VMRGOW V4,V5,V6 - VMRGEW V4,V5,V6 - -// Vector count, VX-form -// VRB,VRT produces -// VRT,VRB - VCLZB V4, V5 - VCLZH V4, V5 - VCLZW V4, V5 - VCLZD V4, V5 - VPOPCNTB V4, V5 - VPOPCNTH V4, V5 - VPOPCNTW V4, V5 - VPOPCNTD V4, V5 - -// Vector compare, VC-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB -// * Note: 'CC' suffix denotes Rc=1 -// i.e. vcmpequb. v3,v1,v2 equals VCMPEQUBCC V1,V2,V3 - VCMPEQUB V3, V2, V1 - VCMPEQUBCC V3, V2, V1 - VCMPEQUH V3, V2, V1 - VCMPEQUHCC V3, V2, V1 - VCMPEQUW V3, V2, V1 - VCMPEQUWCC V3, V2, V1 - VCMPEQUD V3, V2, V1 - VCMPEQUDCC V3, V2, V1 - VCMPGTUB V3, V2, V1 - VCMPGTUBCC V3, V2, V1 - VCMPGTUH V3, V2, V1 - VCMPGTUHCC V3, V2, V1 - VCMPGTUW V3, V2, V1 - VCMPGTUWCC V3, V2, V1 - VCMPGTUD V3, V2, V1 - VCMPGTUDCC V3, V2, V1 - VCMPGTSB V3, V2, V1 - VCMPGTSBCC V3, V2, V1 - VCMPGTSH V3, V2, V1 - VCMPGTSHCC V3, V2, V1 - VCMPGTSW V3, V2, V1 - VCMPGTSWCC V3, V2, V1 - VCMPGTSD V3, V2, V1 - VCMPGTSDCC V3, V2, V1 - VCMPNEZB V3, V2, V1 - VCMPNEZBCC V3, V2, V1 - VCMPNEB V3, V2, V1 - VCMPNEBCC V3, V2, V1 - VCMPNEH V3, V2, V1 - VCMPNEHCC V3, V2, V1 - VCMPNEW V3, V2, V1 - VCMPNEWCC V3, V2, V1 - -// Vector permute, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VPERM V3, V2, V1, V0 - VPERMXOR V3, V2, V1, V0 - VPERMR V3, V2, V1, V0 - -// Vector bit permute, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VBPERMQ V3,V1,V2 - VBPERMD V3,V1,V2 - -// Vector select, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSEL V3, V2, V1, V0 - -// Vector splat, VX-form with 4-bit UIM field -// UIM,VRB,VRT produces -// VRT,VRB,UIM - VSPLTB $15, V1, V0 - VSPLTH $7, V1, V0 - VSPLTW $3, V1, V0 - -// Vector splat immediate signed, VX-form with 5-bit SIM field -// SIM,VRT produces -// VRT,SIM - VSPLTISB $31, V4 - VSPLTISH $31, V4 - VSPLTISW $31, V4 - -// Vector AES cipher, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VCIPHER V3, V2, V1 - VCIPHERLAST V3, V2, V1 - VNCIPHER V3, V2, V1 - VNCIPHERLAST V3, V2, V1 - -// Vector AES subbytes, VX-form -// VRA,VRT produces -// VRT,VRA - VSBOX V2, V1 - -// Vector SHA, VX-form with ST bit field and 4-bit SIX field -// SIX,VRA,ST,VRT produces -// VRT,VRA,ST,SIX - VSHASIGMAW $15, V1, $1, V0 - VSHASIGMAD $15, V1, $1, V0 - -// VSX instructions -// Described as: -// , -// produces -// - -// VSX load, XX1-form -// (RB)(RA*1),XT produces -// XT,RA,RB - LXVD2X (R1)(R2*1), VS0 - LXVW4X (R1)(R2*1), VS0 - LXVH8X (R1)(R2*1), VS0 - LXVB16X (R1)(R2*1), VS0 - LXVDSX (R1)(R2*1), VS0 - LXSDX (R1)(R2*1), VS0 - LXSIWAX (R1)(R2*1), VS0 - LXSIWZX (R1)(R2*1), VS0 - -// VSX load with length X-form (also left-justified) - LXVL R3,R4, VS0 - LXVLL R3,R4, VS0 - LXVX R3,R4, VS0 -// VSX load, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - LXV 32752(R1), VS0 - -// VSX store, XX1-form -// XS,(RB)(RA*1) produces -// XS,RA,RB - STXVD2X VS63, (R1)(R2*1) - STXVW4X VS63, (R1)(R2*1) - STXVH8X VS63, (R1)(R2*1) - STXVB16X VS63, (R1)(R2*1) - STXSDX VS63, (R1)(R2*1) - STXSIWX VS63, (R1)(R2*1) - -// VSX store, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - STXV VS63, -32752(R1) - -// VSX store with length, X-form (also left-justified) - STXVL VS0, R3,R4 - STXVLL VS0, R3,R4 - STXVX VS0, R3,R4 - -// VSX move from VSR, XX1-form -// XS,RA produces -// RA,XS -// Extended mnemonics accept VMX and FP registers as sources - MFVSRD VS0, R1 - MFVSRWZ VS33, R1 - MFVSRLD VS63, R1 - MFVRD V0, R1 - MFFPRD F0, R1 - -// VSX move to VSR, XX1-form -// RA,XT produces -// XT,RA -// Extended mnemonics accept VMX and FP registers as targets - MTVSRD R1, VS0 - MTVSRWA R1, VS31 - MTVSRWZ R1, VS63 - MTVSRDD R1, R2, VS0 - MTVSRWS R1, VS32 - MTVRD R1, V13 - MTFPRD R1, F24 - -// VSX AND, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLAND VS0,VS1,VS32 - XXLANDC VS0,VS1,VS32 - XXLEQV VS0,VS1,VS32 - XXLNAND VS0,VS1,VS32 - -// VSX OR, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLORC VS0,VS1,VS32 - XXLNOR VS0,VS1,VS32 - XXLORQ VS0,VS1,VS32 - XXLXOR VS0,VS1,VS32 - XXLOR VS0,VS1,VS32 - -// VSX select, XX4-form -// XA,XB,XC,XT produces -// XT,XA,XB,XC - XXSEL VS0,VS1,VS3,VS32 - -// VSX merge, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXMRGHW VS0,VS1,VS32 - XXMRGLW VS0,VS1,VS32 - -// VSX splat, XX2-form -// XB,UIM,XT produces -// XT,XB,UIM - XXSPLTW VS0,$3,VS32 - XXSPLTIB $26,VS0 - -// VSX permute, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXPERM VS0,VS1,VS32 - -// VSX permute, XX3-form -// XA,XB,DM,XT produces -// XT,XA,XB,DM - XXPERMDI VS0,VS1,$3,VS32 - -// VSX shift, XX3-form -// XA,XB,SHW,XT produces -// XT,XA,XB,SHW - XXSLDWI VS0,VS1,$3,VS32 - -// VSX byte-reverse XX2-form -// XB,XT produces -// XT,XB - XXBRQ VS0,VS1 - XXBRD VS0,VS1 - XXBRW VS0,VS1 - XXBRH VS0,VS1 - -// VSX scalar FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSP VS0,VS32 - XSCVSPDP VS0,VS32 - XSCVDPSPN VS0,VS32 - XSCVSPDPN VS0,VS32 - -// VSX vector FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSP VS0,VS32 - XVCVSPDP VS0,VS32 - -// VSX scalar FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSXDS VS0,VS32 - XSCVDPSXWS VS0,VS32 - XSCVDPUXDS VS0,VS32 - XSCVDPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVSXDDP VS0,VS32 - XSCVUXDDP VS0,VS32 - XSCVSXDSP VS0,VS32 - XSCVUXDSP VS0,VS32 - -// VSX vector FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSXDS VS0,VS32 - XVCVDPSXWS VS0,VS32 - XVCVDPUXDS VS0,VS32 - XVCVDPUXWS VS0,VS32 - XVCVSPSXDS VS0,VS32 - XVCVSPSXWS VS0,VS32 - XVCVSPUXDS VS0,VS32 - XVCVSPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVSXDDP VS0,VS32 - XVCVSXWDP VS0,VS32 - XVCVUXDDP VS0,VS32 - XVCVUXWDP VS0,VS32 - XVCVSXDSP VS0,VS32 - XVCVSXWSP VS0,VS32 - XVCVUXDSP VS0,VS32 - XVCVUXWSP VS0,VS32 - -// Multiply-Add High Doubleword -// RA,RB,RC,RT produces -// RT,RA,RB,RC - MADDHD R1,R2,R3,R4 - MADDHDU R1,R2,R3,R4 - -// Add Extended using alternate carry bit -// ADDEX RA,RB,CY,RT produces -// addex RT, RA, RB, CY - ADDEX R1, R2, $0, R3 - -// Immediate-shifted operations -// ADDIS SI, RA, RT produces -// addis RT, RA, SI - ADDIS $8, R3, R4 - ADDIS $-1, R3, R4 - -// ANDISCC UI, RS, RA produces -// andis. RA, RS, UI - ANDISCC $7, R4, R5 - -// ORIS UI, RS, RA produces -// oris RA, RS, UI - ORIS $4, R2, R3 - -// XORIS UI, RS, RA produces -// xoris RA, RS, UI - XORIS $1, R1, R2 - -// -// NOP -// -// LNOP comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } NOP - -// LNOP rreg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } NOP R2 + NOP F2 + NOP $4 + + CRAND CR1, CR2, CR3 // 4c620a02 + CRANDN CR1, CR2, CR3 // 4c620902 + CREQV CR1, CR2, CR3 // 4c620a42 + CRNAND CR1, CR2, CR3 // 4c6209c2 + CRNOR CR1, CR2, CR3 // 4c620842 + CROR CR1, CR2, CR3 // 4c620b82 + CRORN CR1, CR2, CR3 // 4c620b42 + CRXOR CR1, CR2, CR3 // 4c620982 + + ISEL $1, R3, R4, R5 // 7ca3205e + ISEL $0, R3, R4, R5 // 7ca3201e + ISEL $2, R3, R4, R5 // 7ca3209e + ISEL $3, R3, R4, R5 // 7ca320de + ISEL $4, R3, R4, R5 // 7ca3211e + POPCNTB R3, R4 // 7c6400f4 + POPCNTW R3, R4 // 7c6402f4 + POPCNTD R3, R4 // 7c6403f4 + + PASTECC R3, R4 // 7c23270d + COPY R3, R4 // 7c23260c + + // load-and-reserve + LBAR (R4)(R3*1),$1,R5 // 7ca32069 + LBAR (R4),$0,R5 // 7ca02068 + LBAR (R3),R5 // 7ca01868 + LHAR (R4)(R3*1),$1,R5 // 7ca320e9 + LHAR (R4),$0,R5 // 7ca020e8 + LHAR (R3),R5 // 7ca018e8 + LWAR (R4)(R3*1),$1,R5 // 7ca32029 + LWAR (R4),$0,R5 // 7ca02028 + LWAR (R3),R5 // 7ca01828 + LDAR (R4)(R3*1),$1,R5 // 7ca320a9 + LDAR (R4),$0,R5 // 7ca020a8 + LDAR (R3),R5 // 7ca018a8 + + STBCCC R3, (R4)(R5) // 7c65256d + STWCCC R3, (R4)(R5) // 7c65212d + STDCCC R3, (R4)(R5) // 7c6521ad + STHCCC R3, (R4)(R5) + STSW R3, (R4)(R5) + + SYNC // 7c0004ac + ISYNC // 4c00012c + LWSYNC // 7c2004ac + + DARN $1, R5 // 7ca105e6 + + DCBF (R3)(R4) // 7c0418ac + DCBI (R3)(R4) // 7c041bac + DCBST (R3)(R4) // 7c04186c + DCBZ (R3)(R4) // 7c041fec + DCBT (R3)(R4) // 7c041a2c + ICBI (R3)(R4) // 7c041fac + + // float constants + FMOVD $(0.0), F1 // f0210cd0 + FMOVD $(-0.0), F1 // f0210cd0fc200850 + + FMOVD 8(R3), F1 // c8230008 + FMOVD (R3)(R4), F1 // 7c241cae + FMOVDU 8(R3), F1 // cc230008 + FMOVDU (R3)(R4), F1 // 7c241cee + FMOVS 4(R3), F1 // c0230004 + FMOVS (R3)(R4), F1 // 7c241c2e + FMOVSU 4(R3), F1 // c4230004 + FMOVSU (R3)(R4), F1 // 7c241c6e + + FMOVD F1, 8(R3) // d8230008 + FMOVD F1, (R3)(R4) // 7c241dae + FMOVDU F1, 8(R3) // dc230008 + FMOVDU F1, (R3)(R4) // 7c241dee + FMOVS F1, 4(R3) // d0230004 + FMOVS F1, (R3)(R4) // 7c241d2e + FMOVSU F1, 4(R3) // d4230004 + FMOVSU F1, (R3)(R4) // 7c241d6e + FADD F1, F2 // fc42082a + FADD F1, F2, F3 // fc62082a + FADDCC F1, F2, F3 // fc62082b + FADDS F1, F2 // ec42082a + FADDS F1, F2, F3 // ec62082a + FADDSCC F1, F2, F3 // ec62082b + FSUB F1, F2 // fc420828 + FSUB F1, F2, F3 // fc620828 + FSUBCC F1, F2, F3 // fc620829 + FSUBS F1, F2 // ec420828 + FSUBS F1, F2, F3 // ec620828 + FSUBCC F1, F2, F3 // fc620829 + FMUL F1, F2 // fc420072 + FMUL F1, F2, F3 // fc620072 + FMULCC F1, F2, F3 // fc620073 + FMULS F1, F2 // ec420072 + FMULS F1, F2, F3 // ec620072 + FMULSCC F1, F2, F3 // ec620073 + FDIV F1, F2 // fc420824 + FDIV F1, F2, F3 // fc620824 + FDIVCC F1, F2, F3 // fc620825 + FDIVS F1, F2 // ec420824 + FDIVS F1, F2, F3 // ec620824 + FDIVSCC F1, F2, F3 // ec620825 + FMADD F1, F2, F3, F4 // fc8110fa + FMADDCC F1, F2, F3, F4 // fc8110fb + FMADDS F1, F2, F3, F4 // ec8110fa + FMADDSCC F1, F2, F3, F4 // ec8110fb + FMSUB F1, F2, F3, F4 // fc8110f8 + FMSUBCC F1, F2, F3, F4 // fc8110f9 + FMSUBS F1, F2, F3, F4 // ec8110f8 + FMSUBSCC F1, F2, F3, F4 // ec8110f9 + FNMADD F1, F2, F3, F4 // fc8110fe + FNMADDCC F1, F2, F3, F4 // fc8110ff + FNMADDS F1, F2, F3, F4 // ec8110fe + FNMADDSCC F1, F2, F3, F4 // ec8110ff + FNMSUB F1, F2, F3, F4 // fc8110fc + FNMSUBCC F1, F2, F3, F4 // fc8110fd + FNMSUBS F1, F2, F3, F4 // ec8110fc + FNMSUBSCC F1, F2, F3, F4 // ec8110fd + FSEL F1, F2, F3, F4 // fc8110ee + FSELCC F1, F2, F3, F4 // fc8110ef + FABS F1, F2 // fc400a10 + FABSCC F1, F2 // fc400a11 + FNEG F1, F2 // fc400850 + FABSCC F1, F2 // fc400a11 + FRSP F1, F2 // fc400818 + FRSPCC F1, F2 // fc400819 + FCTIW F1, F2 // fc40081c + FCTIWCC F1, F2 // fc40081d + FCTIWZ F1, F2 // fc40081e + FCTIWZCC F1, F2 // fc40081f + FCTID F1, F2 // fc400e5c + FCTIDCC F1, F2 // fc400e5d + FCTIDZ F1, F2 // fc400e5e + FCTIDZCC F1, F2 // fc400e5f + FCFID F1, F2 // fc400e9c + FCFIDCC F1, F2 // fc400e9d + FCFIDU F1, F2 // fc400f9c + FCFIDUCC F1, F2 // fc400f9d + FCFIDS F1, F2 // ec400e9c + FCFIDSCC F1, F2 // ec400e9d + FRES F1, F2 // ec400830 + FRESCC F1, F2 // ec400831 + FRIM F1, F2 // fc400bd0 + FRIMCC F1, F2 // fc400bd1 + FRIP F1, F2 // fc400b90 + FRIPCC F1, F2 // fc400b91 + FRIZ F1, F2 // fc400b50 + FRIZCC F1, F2 // fc400b51 + FRIN F1, F2 // fc400b10 + FRINCC F1, F2 // fc400b11 + FRSQRTE F1, F2 // fc400834 + FRSQRTECC F1, F2 // fc400835 + FSQRT F1, F2 // fc40082c + FSQRTCC F1, F2 // fc40082d + FSQRTS F1, F2 // ec40082c + FSQRTSCC F1, F2 // ec40082d + FCPSGN F1, F2 // fc420810 + FCPSGNCC F1, F2 // fc420811 + FCMPO F1, F2 // fc011040 + FCMPU F1, F2 // fc011000 + LVX (R3)(R4), V1 // 7c2418ce + LVXL (R3)(R4), V1 // 7c241ace + LVSL (R3)(R4), V1 // 7c24180c + LVSR (R3)(R4), V1 // 7c24184c + LVEBX (R3)(R4), V1 // 7c24180e + LVEHX (R3)(R4), V1 // 7c24184e + LVEWX (R3)(R4), V1 // 7c24188e + STVX V1, (R3)(R4) // 7c2419ce + STVXL V1, (R3)(R4) // 7c241bce + STVEBX V1, (R3)(R4) // 7c24190e + STVEHX V1, (R3)(R4) // 7c24194e + STVEWX V1, (R3)(R4) // 7c24198e + + VAND V1, V2, V3 // 10611404 + VANDC V1, V2, V3 // 10611444 + VNAND V1, V2, V3 // 10611584 + VOR V1, V2, V3 // 10611484 + VORC V1, V2, V3 // 10611544 + VXOR V1, V2, V3 // 106114c4 + VNOR V1, V2, V3 // 10611504 + VEQV V1, V2, V3 // 10611684 + VADDUBM V1, V2, V3 // 10611000 + VADDUHM V1, V2, V3 // 10611040 + VADDUWM V1, V2, V3 // 10611080 + VADDUDM V1, V2, V3 // 106110c0 + VADDUQM V1, V2, V3 // 10611100 + VADDCUQ V1, V2, V3 // 10611140 + VADDCUW V1, V2, V3 // 10611180 + VADDUBS V1, V2, V3 // 10611200 + VADDUHS V1, V2, V3 // 10611240 + VADDUWS V1, V2, V3 // 10611280 + VSUBUBM V1, V2, V3 // 10611400 + VSUBUHM V1, V2, V3 // 10611440 + VSUBUWM V1, V2, V3 // 10611480 + VSUBUDM V1, V2, V3 // 106114c0 + VSUBUQM V1, V2, V3 // 10611500 + VSUBCUQ V1, V2, V3 // 10611540 + VSUBCUW V1, V2, V3 // 10611580 + VSUBUBS V1, V2, V3 // 10611600 + VSUBUHS V1, V2, V3 // 10611640 + VSUBUWS V1, V2, V3 // 10611680 + VSUBSBS V1, V2, V3 // 10611700 + VSUBSHS V1, V2, V3 // 10611740 + VSUBSWS V1, V2, V3 // 10611780 + VSUBEUQM V1, V2, V3, V4 // 108110fe + VSUBECUQ V1, V2, V3, V4 // 108110ff + VMULESB V1, V2, V3 // 10611308 + VMULOSB V1, V2, V3 // 10611108 + VMULEUB V1, V2, V3 // 10611208 + VMULOUB V1, V2, V3 // 10611008 + VMULESH V1, V2, V3 // 10611348 + VMULOSH V1, V2, V3 // 10611148 + VMULEUH V1, V2, V3 // 10611248 + VMULOUH V1, V2, V3 // 10611048 + VMULESH V1, V2, V3 // 10611348 + VMULOSW V1, V2, V3 // 10611188 + VMULEUW V1, V2, V3 // 10611288 + VMULOUW V1, V2, V3 // 10611088 + VMULUWM V1, V2, V3 // 10611089 + VPMSUMB V1, V2, V3 // 10611408 + VPMSUMH V1, V2, V3 // 10611448 + VPMSUMW V1, V2, V3 // 10611488 + VPMSUMD V1, V2, V3 // 106114c8 + VMSUMUDM V1, V2, V3, V4 // 108110e3 + VRLB V1, V2, V3 // 10611004 + VRLH V1, V2, V3 // 10611044 + VRLW V1, V2, V3 // 10611084 + VRLD V1, V2, V3 // 106110c4 + VSLB V1, V2, V3 // 10611104 + VSLH V1, V2, V3 // 10611144 + VSLW V1, V2, V3 // 10611184 + VSL V1, V2, V3 // 106111c4 + VSLO V1, V2, V3 // 1061140c + VSRB V1, V2, V3 // 10611204 + VSRH V1, V2, V3 // 10611244 + VSRW V1, V2, V3 // 10611284 + VSR V1, V2, V3 // 106112c4 + VSRO V1, V2, V3 // 1061144c + VSLD V1, V2, V3 // 106115c4 + VSRAB V1, V2, V3 // 10611304 + VSRAH V1, V2, V3 // 10611344 + VSRAW V1, V2, V3 // 10611384 + VSRAD V1, V2, V3 // 106113c4 + VSLDOI $3, V1, V2, V3 // 106110ec + VCLZB V1, V2 // 10400f02 + VCLZH V1, V2 // 10400f42 + VCLZW V1, V2 // 10400f82 + VCLZD V1, V2 // 10400fc2 + VPOPCNTB V1, V2 // 10400f03 + VPOPCNTH V1, V2 // 10400f43 + VPOPCNTW V1, V2 // 10400f83 + VPOPCNTD V1, V2 // 10400fc3 + VCMPEQUB V1, V2, V3 // 10611006 + VCMPEQUBCC V1, V2, V3 // 10611406 + VCMPEQUH V1, V2, V3 // 10611046 + VCMPEQUHCC V1, V2, V3 // 10611446 + VCMPEQUW V1, V2, V3 // 10611086 + VCMPEQUWCC V1, V2, V3 // 10611486 + VCMPEQUD V1, V2, V3 // 106110c7 + VCMPEQUDCC V1, V2, V3 // 106114c7 + VCMPGTUB V1, V2, V3 // 10611206 + VCMPGTUBCC V1, V2, V3 // 10611606 + VCMPGTUH V1, V2, V3 // 10611246 + VCMPGTUHCC V1, V2, V3 // 10611646 + VCMPGTUW V1, V2, V3 // 10611286 + VCMPGTUWCC V1, V2, V3 // 10611686 + VCMPGTUD V1, V2, V3 // 106112c7 + VCMPGTUDCC V1, V2, V3 // 106116c7 + VCMPGTSB V1, V2, V3 // 10611306 + VCMPGTSBCC V1, V2, V3 // 10611706 + VCMPGTSH V1, V2, V3 // 10611346 + VCMPGTSHCC V1, V2, V3 // 10611746 + VCMPGTSW V1, V2, V3 // 10611386 + VCMPGTSWCC V1, V2, V3 // 10611786 + VCMPGTSD V1, V2, V3 // 106113c7 + VCMPGTSDCC V1, V2, V3 // 106117c7 + VCMPNEZB V1, V2, V3 // 10611107 + VCMPNEZBCC V1, V2, V3 // 10611507 + VCMPNEB V1, V2, V3 // 10611007 + VCMPNEBCC V1, V2, V3 // 10611407 + VCMPNEH V1, V2, V3 // 10611047 + VCMPNEHCC V1, V2, V3 // 10611447 + VCMPNEW V1, V2, V3 // 10611087 + VCMPNEWCC V1, V2, V3 // 10611487 + VPERM V1, V2, V3, V4 // 108110eb + VPERMR V1, V2, V3, V4 // 108110fb + VPERMXOR V1, V2, V3, V4 // 108110ed + VBPERMQ V1, V2, V3 // 1061154c + VBPERMD V1, V2, V3 // 106115cc + VSEL V1, V2, V3, V4 // 108110ea + VSPLTB $1, V1, V2 // 10410a0c + VSPLTH $1, V1, V2 // 10410a4c + VSPLTW $1, V1, V2 // 10410a8c + VSPLTISB $1, V1 // 1021030c + VSPLTISW $1, V1 // 1021038c + VSPLTISH $1, V1 // 1021034c + VCIPHER V1, V2, V3 // 10611508 + VCIPHERLAST V1, V2, V3 // 10611509 + VNCIPHER V1, V2, V3 // 10611548 + VNCIPHERLAST V1, V2, V3 // 10611549 + VSBOX V1, V2 // 104105c8 + VSHASIGMAW $1, V1, $15, V2 // 10418e82 + VSHASIGMAD $2, V1, $15, V2 // 104196c2 + + LXVD2X (R3)(R4), VS1 // 7c241e98 + LXVDSX (R3)(R4), VS1 // 7c241a98 + LXVH8X (R3)(R4), VS1 // 7c241e58 + LXVB16X (R3)(R4), VS1 // 7c241ed8 + LXVW4X (R3)(R4), VS1 // 7c241e18 + LXV 16(R3), VS1 // f4230011 + LXVL R3, R4, VS1 // 7c23221a + LXVLL R3, R4, VS1 // 7c23225a + LXVX R3, R4, VS1 // 7c232218 + LXSDX (R3)(R4), VS1 // 7c241c98 + STXVD2X VS1, (R3)(R4) // 7c241f98 + STXV VS1,16(R3) // f4230015 + STXVL VS1, R3, R4 // 7c23231a + STXVLL VS1, R3, R4 // 7c23235a + STXVX VS1, R3, R4 // 7c232318 + STXVB16X VS1, (R4)(R5) // 7c2527d8 + STXVH8X VS1, (R4)(R5) // 7c252758 + + STXSDX VS1, (R3)(R4) // 7c241d98 + LXSIWAX (R3)(R4), VS1 // 7c241898 + STXSIWX VS1, (R3)(R4) // 7c241918 + MFVSRD VS1, R3 // 7c230066 + MTFPRD R3, F0 // 7c030166 + MFVRD V0, R3 // 7c030067 + MFVSRLD VS63,R4 // 7fe40267 + MFVSRWZ VS33,R4 // 7c2400e7 + MTVSRD R3, VS1 // 7c230166 + MTVRD R3, V13 // 7da30167 + MTVSRWA R4, VS31 // 7fe401a6 + MTVSRWS R4, VS32 // 7c040327 + MTVSRWZ R4, VS63 // 7fe401e7 + XXBRD VS0, VS1 // f037076c + XXBRW VS1, VS2 // f04f0f6c + XXBRH VS2, VS3 // f067176c + XXLAND VS1, VS2, VS3 // f0611410 + XXLANDC VS1, VS2, VS3 // f0611450 + XXLEQV VS0, VS1, VS2 // f0400dd0 + XXLNAND VS0, VS1, VS2 // f0400d90 + XXLNOR VS0, VS1, VS32 // f0000d11 + XXLOR VS1, VS2, VS3 // f0611490 + XXLORC VS1, VS2, VS3 // f0611550 + XXLORQ VS1, VS2, VS3 // f0611490 + XXLXOR VS1, VS2, VS3 // f06114d0 + XXSEL VS1, VS2, VS3, VS4 // f08110f0 + XXMRGHW VS1, VS2, VS3 // f0611090 + XXMRGLW VS1, VS2, VS3 // f0611190 + XXSPLTW VS1, $1, VS2 // f0410a90 + XXPERM VS1, VS2, VS3 // f06110d0 + XXSLDWI VS1, VS2, $1, VS3 // f0611110 + XSCVDPSP VS1, VS2 // f0400c24 + XVCVDPSP VS1, VS2 // f0400e24 + XSCVSXDDP VS1, VS2 // f0400de0 + XVCVDPSXDS VS1, VS2 // f0400f60 + XVCVSXDDP VS1, VS2 // f0400fe0 + XSCVDPSPN VS1,VS32 // f0000c2d + XSCVDPSP VS1,VS32 // f0000c25 + XSCVDPSXDS VS1,VS32 // f0000d61 + XSCVDPSXWS VS1,VS32 // f0000961 + XSCVDPUXDS VS1,VS32 // f0000d21 + XSCVDPUXWS VS1,VS32 // f0000921 + XSCVSPDPN VS1,VS32 // f0000d2d + XSCVSPDP VS1,VS32 // f0000d25 + XSCVSXDDP VS1,VS32 // f0000de1 + XSCVSXDSP VS1,VS32 // f0000ce1 + XSCVUXDDP VS1,VS32 // f0000da1 + XSCVUXDSP VS1,VS32 // f0000ca1 + XVCVDPSP VS1,VS32 // f0000e25 + XVCVDPSXDS VS1,VS32 // f0000f61 + XVCVDPSXWS VS1,VS32 // f0000b61 + XVCVDPUXDS VS1,VS32 // f0000f21 + XVCVDPUXWS VS1,VS32 // f0000b21 + XVCVSPDP VS1,VS32 // f0000f25 + XVCVSPSXDS VS1,VS32 // f0000e61 + XVCVSPSXWS VS1,VS32 // f0000a61 + XVCVSPUXDS VS1,VS32 // f0000e21 + XVCVSPUXWS VS1,VS32 // f0000a21 + XVCVSXDDP VS1,VS32 // f0000fe1 + XVCVSXDSP VS1,VS32 // f0000ee1 + XVCVSXWDP VS1,VS32 // f0000be1 + XVCVSXWSP VS1,VS32 // f0000ae1 + XVCVUXDDP VS1,VS32 // f0000fa1 + XVCVUXDSP VS1,VS32 // f0000ea1 + XVCVUXWDP VS1,VS32 // f0000ba1 + XVCVUXWSP VS1,VS32 // f0000aa1 + + MOVD R3, LR // 7c6803a6 + MOVD R3, CTR // 7c6903a6 + MOVD R3, XER // 7c6103a6 + MOVD LR, R3 // 7c6802a6 + MOVD CTR, R3 // 7c6902a6 + MOVD XER, R3 // 7c6102a6 + MOVFL CR3, CR1 // 4c8c0000 -// LNOP freg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP F2 - -// LNOP ',' rreg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP R2 - -// LNOP ',' freg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP F2 - -// LNOP imm // SYSCALL $num: load $num to R0 before syscall and restore R0 to 0 afterwards. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP $4 - -// RET -// -// LRETRN comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - BEQ 2(PC) RET - -// More BR/BL cases, and canonical names JMP, CALL. - - BEQ 2(PC) - BR foo(SB) // JMP foo(SB) - BL foo(SB) // CALL foo(SB) - BEQ 2(PC) - JMP foo(SB) - CALL foo(SB) - RET foo(SB) - -// load-and-reserve -// L*AR (RB)(RA*1),EH,RT produces -// l*arx RT,RA,RB,EH -// -// Extended forms also accepted. Assumes RA=0, EH=0: -// L*AR (RB),RT -// L*AR (RB),EH,RT - LBAR (R4)(R3*1), $1, R5 - LBAR (R4), $0, R5 - LBAR (R3), R5 - LHAR (R4)(R3*1), $1, R5 - LHAR (R4), $0, R5 - LHAR (R3), R5 - LWAR (R4)(R3*1), $1, R5 - LWAR (R4), $0, R5 - LWAR (R3), R5 - LDAR (R4)(R3*1), $1, R5 - LDAR (R4), $0, R5 - LDAR (R3), R5 - -// END -// -// LEND comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - END diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s deleted file mode 100644 index c6d7b59aad..0000000000 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Initial set of opcode combinations based on -// improvements to processing of constant -// operands. - -// Full set will be added at a later date. - -#include "../../../../../runtime/textflag.h" - -TEXT asmtest(SB),DUPOK|NOSPLIT,$0 - // move constants - MOVD $1, R3 // 38600001 - MOVD $-1, R4 // 3880ffff - MOVD $65535, R5 // 6005ffff - MOVD $65536, R6 // 64060001 - MOVD $-32767, R5 // 38a08001 - MOVD $-32768, R6 // 38c08000 - MOVD $1234567, R5 // 6405001260a5d687 - MOVW $1, R3 // 38600001 - MOVW $-1, R4 // 3880ffff - MOVW $65535, R5 // 6005ffff - MOVW $65536, R6 // 64060001 - MOVW $-32767, R5 // 38a08001 - MOVW $-32768, R6 // 38c08000 - MOVW $1234567, R5 // 6405001260a5d687 - MOVD 8(R3), R4 // e8830008 - MOVD (R3)(R4), R5 // 7ca4182a - MOVW 4(R3), R4 // e8830006 - MOVW (R3)(R4), R5 // 7ca41aaa - MOVWZ 4(R3), R4 // 80830004 - MOVWZ (R3)(R4), R5 // 7ca4182e - MOVH 4(R3), R4 // a8830004 - MOVH (R3)(R4), R5 // 7ca41aae - MOVHZ 2(R3), R4 // a0830002 - MOVHZ (R3)(R4), R5 // 7ca41a2e - MOVB 1(R3), R4 // 888300017c840774 - MOVB (R3)(R4), R5 // 7ca418ae7ca50774 - MOVBZ 1(R3), R4 // 88830001 - MOVBZ (R3)(R4), R5 // 7ca418ae - MOVDBR (R3)(R4), R5 // 7ca41c28 - MOVWBR (R3)(R4), R5 // 7ca41c2c - MOVHBR (R3)(R4), R5 // 7ca41e2c - - MOVDU 8(R3), R4 // e8830009 - MOVDU (R3)(R4), R5 // 7ca4186a - MOVWU (R3)(R4), R5 // 7ca41aea - MOVWZU 4(R3), R4 // 84830004 - MOVWZU (R3)(R4), R5 // 7ca4186e - MOVHU 2(R3), R4 // ac830002 - MOVHU (R3)(R4), R5 // 7ca41aee - MOVHZU 2(R3), R4 // a4830002 - MOVHZU (R3)(R4), R5 // 7ca41a6e - MOVBU 1(R3), R4 // 8c8300017c840774 - MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 - MOVBZU 1(R3), R4 // 8c830001 - MOVBZU (R3)(R4), R5 // 7ca418ee - - MOVD R4, 8(R3) // f8830008 - MOVD R5, (R3)(R4) // 7ca4192a - MOVW R4, 4(R3) // 90830004 - MOVW R5, (R3)(R4) // 7ca4192e - MOVH R4, 2(R3) // b0830002 - MOVH R5, (R3)(R4) // 7ca41b2e - MOVB R4, 1(R3) // 98830001 - MOVB R5, (R3)(R4) // 7ca419ae - MOVDBR R5, (R3)(R4) // 7ca41d28 - MOVWBR R5, (R3)(R4) // 7ca41d2c - MOVHBR R5, (R3)(R4) // 7ca41f2c - - MOVDU R4, 8(R3) // f8830009 - MOVDU R5, (R3)(R4) // 7ca4196a - MOVWU R4, 4(R3) // 94830004 - MOVWU R5, (R3)(R4) // 7ca4196e - MOVHU R4, 2(R3) // b4830002 - MOVHU R5, (R3)(R4) // 7ca41b6e - MOVBU R4, 1(R3) // 9c830001 - MOVBU R5, (R3)(R4) // 7ca419ee - - ADD $1, R3 // 38630001 - ADD $1, R3, R4 // 38830001 - ADD $-1, R4 // 3884ffff - ADD $-1, R4, R5 // 38a4ffff - ADD $65535, R5 // 601fffff7cbf2a14 - ADD $65535, R5, R6 // 601fffff7cdf2a14 - ADD $65536, R6 // 3cc60001 - ADD $65536, R6, R7 // 3ce60001 - ADD $-32767, R5 // 38a58001 - ADD $-32767, R5, R4 // 38858001 - ADD $-32768, R6 // 38c68000 - ADD $-32768, R6, R5 // 38a68000 - ADD $1234567, R5 // 641f001263ffd6877cbf2a14 - ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 - ADDIS $8, R3 // 3c630008 - ADDIS $1000, R3, R4 // 3c8303e8 - - ANDCC $1, R3 // 70630001 - ANDCC $1, R3, R4 // 70640001 - ANDCC $-1, R4 // 3be0ffff7fe42039 - ANDCC $-1, R4, R5 // 3be0ffff7fe52039 - ANDCC $65535, R5 // 70a5ffff - ANDCC $65535, R5, R6 // 70a6ffff - ANDCC $65536, R6 // 74c60001 - ANDCC $65536, R6, R7 // 74c70001 - ANDCC $-32767, R5 // 3be080017fe52839 - ANDCC $-32767, R5, R4 // 3be080017fe42839 - ANDCC $-32768, R6 // 3be080007fe63039 - ANDCC $-32768, R5, R6 // 3be080007fe62839 - ANDCC $1234567, R5 // 641f001263ffd6877fe52839 - ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 - ANDISCC $1, R3 // 74630001 - ANDISCC $1000, R3, R4 // 746403e8 - - OR $1, R3 // 60630001 - OR $1, R3, R4 // 60640001 - OR $-1, R4 // 3be0ffff7fe42378 - OR $-1, R4, R5 // 3be0ffff7fe52378 - OR $65535, R5 // 60a5ffff - OR $65535, R5, R6 // 60a6ffff - OR $65536, R6 // 64c60001 - OR $65536, R6, R7 // 64c70001 - OR $-32767, R5 // 3be080017fe52b78 - OR $-32767, R5, R6 // 3be080017fe62b78 - OR $-32768, R6 // 3be080007fe63378 - OR $-32768, R6, R7 // 3be080007fe73378 - OR $1234567, R5 // 641f001263ffd6877fe52b78 - OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 - - XOR $1, R3 // 68630001 - XOR $1, R3, R4 // 68640001 - XOR $-1, R4 // 3be0ffff7fe42278 - XOR $-1, R4, R5 // 3be0ffff7fe52278 - XOR $65535, R5 // 68a5ffff - XOR $65535, R5, R6 // 68a6ffff - XOR $65536, R6 // 6cc60001 - XOR $65536, R6, R7 // 6cc70001 - XOR $-32767, R5 // 3be080017fe52a78 - XOR $-32767, R5, R6 // 3be080017fe62a78 - XOR $-32768, R6 // 3be080007fe63278 - XOR $-32768, R6, R7 // 3be080007fe73278 - XOR $1234567, R5 // 641f001263ffd6877fe52a78 - XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 - - // TODO: the order of CR operands don't match - CMP R3, R4 // 7c232000 - CMPU R3, R4 // 7c232040 - CMPW R3, R4 // 7c032000 - CMPWU R3, R4 // 7c032040 - - // TODO: constants for ADDC? - ADD R3, R4 // 7c841a14 - ADD R3, R4, R5 // 7ca41a14 - ADDC R3, R4 // 7c841814 - ADDC R3, R4, R5 // 7ca41814 - ADDE R3, R4 // 7c841914 - ADDECC R3, R4 // 7c841915 - ADDEV R3, R4 // 7c841d14 - ADDEVCC R3, R4 // 7c841d15 - ADDV R3, R4 // 7c841e14 - ADDVCC R3, R4 // 7c841e15 - ADDCCC R3, R4, R5 // 7ca41815 - ADDME R3, R4 // 7c8301d4 - ADDMECC R3, R4 // 7c8301d5 - ADDMEV R3, R4 // 7c8305d4 - ADDMEVCC R3, R4 // 7c8305d5 - ADDCV R3, R4 // 7c841c14 - ADDCVCC R3, R4 // 7c841c15 - ADDZE R3, R4 // 7c830194 - ADDZECC R3, R4 // 7c830195 - ADDZEV R3, R4 // 7c830594 - ADDZEVCC R3, R4 // 7c830595 - SUBME R3, R4 // 7c8301d0 - SUBMECC R3, R4 // 7c8301d1 - SUBMEV R3, R4 // 7c8305d0 - SUBZE R3, R4 // 7c830190 - SUBZECC R3, R4 // 7c830191 - SUBZEV R3, R4 // 7c830590 - SUBZEVCC R3, R4 // 7c830591 - - AND R3, R4 // 7c841838 - AND R3, R4, R5 // 7c851838 - ANDN R3, R4, R5 // 7c851878 - ANDCC R3, R4, R5 // 7c851839 - OR R3, R4 // 7c841b78 - OR R3, R4, R5 // 7c851b78 - ORN R3, R4, R5 // 7c851b38 - ORCC R3, R4, R5 // 7c851b79 - XOR R3, R4 // 7c841a78 - XOR R3, R4, R5 // 7c851a78 - XORCC R3, R4, R5 // 7c851a79 - NAND R3, R4, R5 // 7c851bb8 - NANDCC R3, R4, R5 // 7c851bb9 - EQV R3, R4, R5 // 7c851a38 - EQVCC R3, R4, R5 // 7c851a39 - NOR R3, R4, R5 // 7c8518f8 - NORCC R3, R4, R5 // 7c8518f9 - - SUB R3, R4 // 7c832050 - SUB R3, R4, R5 // 7ca32050 - SUBC R3, R4 // 7c832010 - SUBC R3, R4, R5 // 7ca32010 - - MULLW R3, R4 // 7c8419d6 - MULLW R3, R4, R5 // 7ca419d6 - MULLW $10, R3 // 1c63000a - MULLW $10000000, R3 // 641f009863ff96807c7f19d6 - MULLWCC R3, R4, R5 // 7ca419d7 - MULHW R3, R4, R5 // 7ca41896 - - MULHWU R3, R4, R5 // 7ca41816 - MULLD R3, R4 // 7c8419d2 - MULLD R4, R4, R5 // 7ca421d2 - MULLD $20, R4 // 1c840014 - MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 - MULLDCC R3, R4, R5 // 7ca419d3 - MULHD R3, R4, R5 // 7ca41892 - MULHDCC R3, R4, R5 // 7ca41893 - - MULLWV R3, R4 // 7c841dd6 - MULLWV R3, R4, R5 // 7ca41dd6 - MULLWVCC R3, R4, R5 // 7ca41dd7 - MULHWUCC R3, R4, R5 // 7ca41817 - MULLDV R3, R4, R5 // 7ca41dd2 - MULLDVCC R3, R4, R5 // 7ca41dd3 - - DIVD R3,R4 // 7c841bd2 - DIVD R3, R4, R5 // 7ca41bd2 - DIVDCC R3,R4, R5 // 7ca41bd3 - DIVDU R3, R4, R5 // 7ca41b92 - DIVDV R3, R4, R5 // 7ca41fd2 - DIVDUCC R3, R4, R5 // 7ca41b93 - DIVDVCC R3, R4, R5 // 7ca41fd3 - DIVDUV R3, R4, R5 // 7ca41f92 - DIVDUVCC R3, R4, R5 // 7ca41f93 - DIVDE R3, R4, R5 // 7ca41b52 - DIVDECC R3, R4, R5 // 7ca41b53 - DIVDEU R3, R4, R5 // 7ca41b12 - DIVDEUCC R3, R4, R5 // 7ca41b13 - - REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 - REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 - REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 - REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 - - MODUD R3, R4, R5 // 7ca41a12 - MODUW R3, R4, R5 // 7ca41a16 - MODSD R3, R4, R5 // 7ca41e12 - MODSW R3, R4, R5 // 7ca41e16 - - SLW $8, R3, R4 // 5464402e - SLW R3, R4, R5 // 7c851830 - SLWCC R3, R4 // 7c841831 - SLD $16, R3, R4 // 786483e4 - SLD R3, R4, R5 // 7c851836 - SLDCC R3, R4 // 7c841837 - - SRW $8, R3, R4 // 5464c23e - SRW R3, R4, R5 // 7c851c30 - SRWCC R3, R4 // 7c841c31 - SRAW $8, R3, R4 // 7c644670 - SRAW R3, R4, R5 // 7c851e30 - SRAWCC R3, R4 // 7c841e31 - SRD $16, R3, R4 // 78648402 - SRD R3, R4, R5 // 7c851c36 - SRDCC R3, R4 // 7c841c37 - SRAD $16, R3, R4 // 7c648674 - SRAD R3, R4, R5 // 7c851e34 - SRDCC R3, R4 // 7c841c37 - ROTLW $16, R3, R4 // 5464803e - ROTLW R3, R4, R5 // 5c85183e - EXTSWSLI $3, R4, R5 // 7c851ef4 - RLWMI $7, R3, $65535, R6 // 50663c3e - RLWMICC $7, R3, $65535, R6 // 50663c3f - RLWNM $3, R4, $7, R6 // 54861f7e - RLWNMCC $3, R4, $7, R6 // 54861f7f - RLDMI $0, R4, $7, R6 // 7886076c - RLDMICC $0, R4, $7, R6 // 7886076d - RLDIMI $0, R4, $7, R6 // 788601cc - RLDIMICC $0, R4, $7, R6 // 788601cd - RLDC $0, R4, $15, R6 // 78860728 - RLDCCC $0, R4, $15, R6 // 78860729 - RLDCL $0, R4, $7, R6 // 78860770 - RLDCLCC $0, R4, $15, R6 // 78860721 - RLDCR $0, R4, $-16, R6 // 788606f2 - RLDCRCC $0, R4, $-16, R6 // 788606f3 - RLDICL $0, R4, $15, R6 // 788603c0 - RLDICLCC $0, R4, $15, R6 // 788603c1 - RLDICR $0, R4, $15, R6 // 788603c4 - RLDICRCC $0, R4, $15, R6 // 788603c5 - RLDIC $0, R4, $15, R6 // 788603c8 - RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $8, R5, $6, R4 // 54a430b2 - CLRLSLDI $24, R4, $4, R3 // 78832508 - - BEQ 0(PC) // 41820000 - BGE 0(PC) // 40800000 - BGT 4(PC) // 41810030 - BLE 0(PC) // 40810000 - BLT 0(PC) // 41800000 - BNE 0(PC) // 40820000 - JMP 8(PC) // 48000020 - - CRAND CR1, CR2, CR3 // 4c620a02 - CRANDN CR1, CR2, CR3 // 4c620902 - CREQV CR1, CR2, CR3 // 4c620a42 - CRNAND CR1, CR2, CR3 // 4c6209c2 - CRNOR CR1, CR2, CR3 // 4c620842 - CROR CR1, CR2, CR3 // 4c620b82 - CRORN CR1, CR2, CR3 // 4c620b42 - CRXOR CR1, CR2, CR3 // 4c620982 - - ISEL $1, R3, R4, R5 // 7ca3205e - ISEL $0, R3, R4, R5 // 7ca3201e - ISEL $2, R3, R4, R5 // 7ca3209e - ISEL $3, R3, R4, R5 // 7ca320de - ISEL $4, R3, R4, R5 // 7ca3211e - POPCNTB R3, R4 // 7c6400f4 - POPCNTW R3, R4 // 7c6402f4 - POPCNTD R3, R4 // 7c6403f4 - - PASTECC R3, R4 // 7c23270d - COPY R3, R4 // 7c23260c - - // load-and-reserve - LBAR (R4)(R3*1),$1,R5 // 7ca32069 - LBAR (R4),$0,R5 // 7ca02068 - LBAR (R3),R5 // 7ca01868 - LHAR (R4)(R3*1),$1,R5 // 7ca320e9 - LHAR (R4),$0,R5 // 7ca020e8 - LHAR (R3),R5 // 7ca018e8 - LWAR (R4)(R3*1),$1,R5 // 7ca32029 - LWAR (R4),$0,R5 // 7ca02028 - LWAR (R3),R5 // 7ca01828 - LDAR (R4)(R3*1),$1,R5 // 7ca320a9 - LDAR (R4),$0,R5 // 7ca020a8 - LDAR (R3),R5 // 7ca018a8 - - STBCCC R3, (R4)(R5) // 7c65256d - STWCCC R3, (R4)(R5) // 7c65212d - STDCCC R3, (R4)(R5) // 7c6521ad - STHCCC R3, (R4)(R5) - - SYNC // 7c0004ac - ISYNC // 4c00012c - LWSYNC // 7c2004ac - - DCBF (R3)(R4) // 7c0418ac - DCBI (R3)(R4) // 7c041bac - DCBST (R3)(R4) // 7c04186c - DCBZ (R3)(R4) // 7c041fec - DCBT (R3)(R4) // 7c041a2c - ICBI (R3)(R4) // 7c041fac - - // float constants - FMOVD $(0.0), F1 // f0210cd0 - FMOVD $(-0.0), F1 // f0210cd0fc200850 - - FMOVD 8(R3), F1 // c8230008 - FMOVD (R3)(R4), F1 // 7c241cae - FMOVDU 8(R3), F1 // cc230008 - FMOVDU (R3)(R4), F1 // 7c241cee - FMOVS 4(R3), F1 // c0230004 - FMOVS (R3)(R4), F1 // 7c241c2e - FMOVSU 4(R3), F1 // c4230004 - FMOVSU (R3)(R4), F1 // 7c241c6e - - FMOVD F1, 8(R3) // d8230008 - FMOVD F1, (R3)(R4) // 7c241dae - FMOVDU F1, 8(R3) // dc230008 - FMOVDU F1, (R3)(R4) // 7c241dee - FMOVS F1, 4(R3) // d0230004 - FMOVS F1, (R3)(R4) // 7c241d2e - FMOVSU F1, 4(R3) // d4230004 - FMOVSU F1, (R3)(R4) // 7c241d6e - FADD F1, F2 // fc42082a - FADD F1, F2, F3 // fc62082a - FADDCC F1, F2, F3 // fc62082b - FADDS F1, F2 // ec42082a - FADDS F1, F2, F3 // ec62082a - FADDSCC F1, F2, F3 // ec62082b - FSUB F1, F2 // fc420828 - FSUB F1, F2, F3 // fc620828 - FSUBCC F1, F2, F3 // fc620829 - FSUBS F1, F2 // ec420828 - FSUBS F1, F2, F3 // ec620828 - FSUBCC F1, F2, F3 // fc620829 - FMUL F1, F2 // fc420072 - FMUL F1, F2, F3 // fc620072 - FMULCC F1, F2, F3 // fc620073 - FMULS F1, F2 // ec420072 - FMULS F1, F2, F3 // ec620072 - FMULSCC F1, F2, F3 // ec620073 - FDIV F1, F2 // fc420824 - FDIV F1, F2, F3 // fc620824 - FDIVCC F1, F2, F3 // fc620825 - FDIVS F1, F2 // ec420824 - FDIVS F1, F2, F3 // ec620824 - FDIVSCC F1, F2, F3 // ec620825 - FMADD F1, F2, F3, F4 // fc8110fa - FMADDCC F1, F2, F3, F4 // fc8110fb - FMADDS F1, F2, F3, F4 // ec8110fa - FMADDSCC F1, F2, F3, F4 // ec8110fb - FMSUB F1, F2, F3, F4 // fc8110f8 - FMSUBCC F1, F2, F3, F4 // fc8110f9 - FMSUBS F1, F2, F3, F4 // ec8110f8 - FMSUBSCC F1, F2, F3, F4 // ec8110f9 - FNMADD F1, F2, F3, F4 // fc8110fe - FNMADDCC F1, F2, F3, F4 // fc8110ff - FNMADDS F1, F2, F3, F4 // ec8110fe - FNMADDSCC F1, F2, F3, F4 // ec8110ff - FNMSUB F1, F2, F3, F4 // fc8110fc - FNMSUBCC F1, F2, F3, F4 // fc8110fd - FNMSUBS F1, F2, F3, F4 // ec8110fc - FNMSUBSCC F1, F2, F3, F4 // ec8110fd - FSEL F1, F2, F3, F4 // fc8110ee - FSELCC F1, F2, F3, F4 // fc8110ef - FABS F1, F2 // fc400a10 - FABSCC F1, F2 // fc400a11 - FNEG F1, F2 // fc400850 - FABSCC F1, F2 // fc400a11 - FRSP F1, F2 // fc400818 - FRSPCC F1, F2 // fc400819 - FCTIW F1, F2 // fc40081c - FCTIWCC F1, F2 // fc40081d - FCTIWZ F1, F2 // fc40081e - FCTIWZCC F1, F2 // fc40081f - FCTID F1, F2 // fc400e5c - FCTIDCC F1, F2 // fc400e5d - FCTIDZ F1, F2 // fc400e5e - FCTIDZCC F1, F2 // fc400e5f - FCFID F1, F2 // fc400e9c - FCFIDCC F1, F2 // fc400e9d - FCFIDU F1, F2 // fc400f9c - FCFIDUCC F1, F2 // fc400f9d - FCFIDS F1, F2 // ec400e9c - FCFIDSCC F1, F2 // ec400e9d - FRES F1, F2 // ec400830 - FRESCC F1, F2 // ec400831 - FRIM F1, F2 // fc400bd0 - FRIMCC F1, F2 // fc400bd1 - FRIP F1, F2 // fc400b90 - FRIPCC F1, F2 // fc400b91 - FRIZ F1, F2 // fc400b50 - FRIZCC F1, F2 // fc400b51 - FRIN F1, F2 // fc400b10 - FRINCC F1, F2 // fc400b11 - FRSQRTE F1, F2 // fc400834 - FRSQRTECC F1, F2 // fc400835 - FSQRT F1, F2 // fc40082c - FSQRTCC F1, F2 // fc40082d - FSQRTS F1, F2 // ec40082c - FSQRTSCC F1, F2 // ec40082d - FCPSGN F1, F2 // fc420810 - FCPSGNCC F1, F2 // fc420811 - FCMPO F1, F2 // fc011040 - FCMPU F1, F2 // fc011000 - LVX (R3)(R4), V1 // 7c2418ce - LVXL (R3)(R4), V1 // 7c241ace - LVSL (R3)(R4), V1 // 7c24180c - LVSR (R3)(R4), V1 // 7c24184c - LVEBX (R3)(R4), V1 // 7c24180e - LVEHX (R3)(R4), V1 // 7c24184e - LVEWX (R3)(R4), V1 // 7c24188e - STVX V1, (R3)(R4) // 7c2419ce - STVXL V1, (R3)(R4) // 7c241bce - STVEBX V1, (R3)(R4) // 7c24190e - STVEHX V1, (R3)(R4) // 7c24194e - STVEWX V1, (R3)(R4) // 7c24198e - - VAND V1, V2, V3 // 10611404 - VANDC V1, V2, V3 // 10611444 - VNAND V1, V2, V3 // 10611584 - VOR V1, V2, V3 // 10611484 - VORC V1, V2, V3 // 10611544 - VXOR V1, V2, V3 // 106114c4 - VNOR V1, V2, V3 // 10611504 - VEQV V1, V2, V3 // 10611684 - VADDUBM V1, V2, V3 // 10611000 - VADDUHM V1, V2, V3 // 10611040 - VADDUWM V1, V2, V3 // 10611080 - VADDUDM V1, V2, V3 // 106110c0 - VADDUQM V1, V2, V3 // 10611100 - VADDCUQ V1, V2, V3 // 10611140 - VADDCUW V1, V2, V3 // 10611180 - VADDUBS V1, V2, V3 // 10611200 - VADDUHS V1, V2, V3 // 10611240 - VADDUWS V1, V2, V3 // 10611280 - VSUBUBM V1, V2, V3 // 10611400 - VSUBUHM V1, V2, V3 // 10611440 - VSUBUWM V1, V2, V3 // 10611480 - VSUBUDM V1, V2, V3 // 106114c0 - VSUBUQM V1, V2, V3 // 10611500 - VSUBCUQ V1, V2, V3 // 10611540 - VSUBCUW V1, V2, V3 // 10611580 - VSUBUBS V1, V2, V3 // 10611600 - VSUBUHS V1, V2, V3 // 10611640 - VSUBUWS V1, V2, V3 // 10611680 - VSUBSBS V1, V2, V3 // 10611700 - VSUBSHS V1, V2, V3 // 10611740 - VSUBSWS V1, V2, V3 // 10611780 - VSUBEUQM V1, V2, V3, V4 // 108110fe - VSUBECUQ V1, V2, V3, V4 // 108110ff - VMULESB V1, V2, V3 // 10611308 - VMULOSB V1, V2, V3 // 10611108 - VMULEUB V1, V2, V3 // 10611208 - VMULOUB V1, V2, V3 // 10611008 - VMULESH V1, V2, V3 // 10611348 - VMULOSH V1, V2, V3 // 10611148 - VMULEUH V1, V2, V3 // 10611248 - VMULOUH V1, V2, V3 // 10611048 - VMULESH V1, V2, V3 // 10611348 - VMULOSW V1, V2, V3 // 10611188 - VMULEUW V1, V2, V3 // 10611288 - VMULOUW V1, V2, V3 // 10611088 - VMULUWM V1, V2, V3 // 10611089 - VPMSUMB V1, V2, V3 // 10611408 - VPMSUMH V1, V2, V3 // 10611448 - VPMSUMW V1, V2, V3 // 10611488 - VPMSUMD V1, V2, V3 // 106114c8 - VMSUMUDM V1, V2, V3, V4 // 108110e3 - VRLB V1, V2, V3 // 10611004 - VRLH V1, V2, V3 // 10611044 - VRLW V1, V2, V3 // 10611084 - VRLD V1, V2, V3 // 106110c4 - VSLB V1, V2, V3 // 10611104 - VSLH V1, V2, V3 // 10611144 - VSLW V1, V2, V3 // 10611184 - VSL V1, V2, V3 // 106111c4 - VSLO V1, V2, V3 // 1061140c - VSRB V1, V2, V3 // 10611204 - VSRH V1, V2, V3 // 10611244 - VSRW V1, V2, V3 // 10611284 - VSR V1, V2, V3 // 106112c4 - VSRO V1, V2, V3 // 1061144c - VSLD V1, V2, V3 // 106115c4 - VSRAB V1, V2, V3 // 10611304 - VSRAH V1, V2, V3 // 10611344 - VSRAW V1, V2, V3 // 10611384 - VSRAD V1, V2, V3 // 106113c4 - VSLDOI $3, V1, V2, V3 // 106110ec - VCLZB V1, V2 // 10400f02 - VCLZH V1, V2 // 10400f42 - VCLZW V1, V2 // 10400f82 - VCLZD V1, V2 // 10400fc2 - VPOPCNTB V1, V2 // 10400f03 - VPOPCNTH V1, V2 // 10400f43 - VPOPCNTW V1, V2 // 10400f83 - VPOPCNTD V1, V2 // 10400fc3 - VCMPEQUB V1, V2, V3 // 10611006 - VCMPEQUBCC V1, V2, V3 // 10611406 - VCMPEQUH V1, V2, V3 // 10611046 - VCMPEQUHCC V1, V2, V3 // 10611446 - VCMPEQUW V1, V2, V3 // 10611086 - VCMPEQUWCC V1, V2, V3 // 10611486 - VCMPEQUD V1, V2, V3 // 106110c7 - VCMPEQUDCC V1, V2, V3 // 106114c7 - VCMPGTUB V1, V2, V3 // 10611206 - VCMPGTUBCC V1, V2, V3 // 10611606 - VCMPGTUH V1, V2, V3 // 10611246 - VCMPGTUHCC V1, V2, V3 // 10611646 - VCMPGTUW V1, V2, V3 // 10611286 - VCMPGTUWCC V1, V2, V3 // 10611686 - VCMPGTUD V1, V2, V3 // 106112c7 - VCMPGTUDCC V1, V2, V3 // 106116c7 - VCMPGTSB V1, V2, V3 // 10611306 - VCMPGTSBCC V1, V2, V3 // 10611706 - VCMPGTSH V1, V2, V3 // 10611346 - VCMPGTSHCC V1, V2, V3 // 10611746 - VCMPGTSW V1, V2, V3 // 10611386 - VCMPGTSWCC V1, V2, V3 // 10611786 - VCMPGTSD V1, V2, V3 // 106113c7 - VCMPGTSDCC V1, V2, V3 // 106117c7 - VCMPNEZB V1, V2, V3 // 10611107 - VCMPNEZBCC V1, V2, V3 // 10611507 - VCMPNEB V1, V2, V3 // 10611007 - VCMPNEBCC V1, V2, V3 // 10611407 - VCMPNEH V1, V2, V3 // 10611047 - VCMPNEHCC V1, V2, V3 // 10611447 - VCMPNEW V1, V2, V3 // 10611087 - VCMPNEWCC V1, V2, V3 // 10611487 - VPERM V1, V2, V3, V4 // 108110eb - VPERMR V1, V2, V3, V4 // 108110fb - VPERMXOR V1, V2, V3, V4 // 108110ed - VBPERMQ V1, V2, V3 // 1061154c - VBPERMD V1, V2, V3 // 106115cc - VSEL V1, V2, V3, V4 // 108110ea - VSPLTB $1, V1, V2 // 10410a0c - VSPLTH $1, V1, V2 // 10410a4c - VSPLTW $1, V1, V2 // 10410a8c - VSPLTISB $1, V1 // 1021030c - VSPLTISW $1, V1 // 1021038c - VSPLTISH $1, V1 // 1021034c - VCIPHER V1, V2, V3 // 10611508 - VCIPHERLAST V1, V2, V3 // 10611509 - VNCIPHER V1, V2, V3 // 10611548 - VNCIPHERLAST V1, V2, V3 // 10611549 - VSBOX V1, V2 // 104105c8 - VSHASIGMAW $1, V1, $15, V2 // 10418e82 - VSHASIGMAD $2, V1, $15, V2 // 104196c2 - - LXVD2X (R3)(R4), VS1 // 7c241e98 - LXV 16(R3), VS1 // f4230011 - LXVL R3, R4, VS1 // 7c23221a - LXVLL R3, R4, VS1 // 7c23225a - LXVX R3, R4, VS1 // 7c232218 - LXSDX (R3)(R4), VS1 // 7c241c98 - STXVD2X VS1, (R3)(R4) // 7c241f98 - STXV VS1,16(R3) // f4230015 - STXVL VS1, R3, R4 // 7c23231a - STXVLL VS1, R3, R4 // 7c23235a - STXVX VS1, R3, R4 // 7c232318 - STXSDX VS1, (R3)(R4) // 7c241d98 - LXSIWAX (R3)(R4), VS1 // 7c241898 - STXSIWX VS1, (R3)(R4) // 7c241918 - MFVSRD VS1, R3 // 7c230066 - MTVSRD R3, VS1 // 7c230166 - XXLAND VS1, VS2, VS3 // f0611410 - XXLOR VS1, VS2, VS3 // f0611490 - XXLORC VS1, VS2, VS3 // f0611550 - XXLXOR VS1, VS2, VS3 // f06114d0 - XXSEL VS1, VS2, VS3, VS4 // f08110f0 - XXMRGHW VS1, VS2, VS3 // f0611090 - XXSPLTW VS1, $1, VS2 // f0410a90 - XXPERM VS1, VS2, VS3 // f06110d0 - XXSLDWI VS1, VS2, $1, VS3 // f0611110 - XSCVDPSP VS1, VS2 // f0400c24 - XVCVDPSP VS1, VS2 // f0400e24 - XSCVSXDDP VS1, VS2 // f0400de0 - XVCVDPSXDS VS1, VS2 // f0400f60 - XVCVSXDDP VS1, VS2 // f0400fe0 - - MOVD R3, LR // 7c6803a6 - MOVD R3, CTR // 7c6903a6 - MOVD R3, XER // 7c6103a6 - MOVD LR, R3 // 7c6802a6 - MOVD CTR, R3 // 7c6902a6 - MOVD XER, R3 // 7c6102a6 - MOVFL CR3, CR1 // 4c8c0000 - - RET diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index dcabb3cd6a..090fefb4d8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2159,7 +2159,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { /* Z23-form, 3-register operands + CY field */ func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { - return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7 + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<9 } /* X-form, 3-register operands + EH field */ -- cgit v1.3 From dd58239dd20f7002b3e219a477514b91dd0cc5fc Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 7 Oct 2020 12:31:05 -0400 Subject: cmd/asm: allow def/ref of func ABI when compiling runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function symbols defined and referenced by assembly source currently always default to ABI0; this patch adds preliminary support for accepting an explicit ABI selector clause for func defs/refs. This functionality is currently only enabled when compiling runtime-related packages (runtime, syscall, reflect). Examples: TEXT ·DefinedAbi0Symbol(SB),NOSPLIT,$0 RET TEXT ·DefinedAbi1Symbol(SB),NOSPLIT,$0 CALL ·AbiZerolSym(SB) ... JMP ·AbiInternalSym(SB) RET Also included is a small change to the code in the compiler that reads the symabis file emitted by the assembler. New behavior is currently gated under GOEXPERIMENT=regabi. Updates #27539, #40724. Change-Id: Ia22221fe26df0fa002191cfb13bdfaaa38d7df38 Reviewed-on: https://go-review.googlesource.com/c/go/+/260477 Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Than McIntosh --- src/cmd/asm/internal/asm/endtoend_test.go | 4 +- src/cmd/asm/internal/asm/expr_test.go | 4 +- src/cmd/asm/internal/asm/line_test.go | 2 +- src/cmd/asm/internal/asm/operand_test.go | 24 ++++- src/cmd/asm/internal/asm/parse.go | 148 ++++++++++++++++++++---------- src/cmd/asm/internal/asm/pseudo_test.go | 1 + src/cmd/asm/main.go | 3 +- src/cmd/compile/internal/gc/main.go | 11 ++- src/cmd/internal/obj/link.go | 14 +++ src/cmd/internal/obj/util.go | 39 +++++++- src/cmd/internal/objabi/path.go | 22 +++++ 11 files changed, 207 insertions(+), 65 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index decf5391db..989b7a5405 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -31,7 +31,7 @@ func testEndToEnd(t *testing.T, goarch, file string) { architecture, ctxt := setArch(goarch) architecture.Init(ctxt) lexer := lex.NewLexer(input) - parser := NewParser(ctxt, architecture, lexer) + parser := NewParser(ctxt, architecture, lexer, false) pList := new(obj.Plist) var ok bool testOut = new(bytes.Buffer) // The assembler writes test output to this buffer. @@ -273,7 +273,7 @@ func testErrors(t *testing.T, goarch, file string) { input := filepath.Join("testdata", file+".s") architecture, ctxt := setArch(goarch) lexer := lex.NewLexer(input) - parser := NewParser(ctxt, architecture, lexer) + parser := NewParser(ctxt, architecture, lexer, false) pList := new(obj.Plist) var ok bool testOut = new(bytes.Buffer) // The assembler writes test output to this buffer. diff --git a/src/cmd/asm/internal/asm/expr_test.go b/src/cmd/asm/internal/asm/expr_test.go index 1251594349..e9c92df1f3 100644 --- a/src/cmd/asm/internal/asm/expr_test.go +++ b/src/cmd/asm/internal/asm/expr_test.go @@ -57,7 +57,7 @@ var exprTests = []exprTest{ } func TestExpr(t *testing.T) { - p := NewParser(nil, nil, nil) // Expression evaluation uses none of these fields of the parser. + p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser. for i, test := range exprTests { p.start(lex.Tokenize(test.input)) result := int64(p.expr()) @@ -113,7 +113,7 @@ func TestBadExpr(t *testing.T) { } func runBadTest(i int, test badExprTest, t *testing.T) (err error) { - p := NewParser(nil, nil, nil) // Expression evaluation uses none of these fields of the parser. + p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser. p.start(lex.Tokenize(test.input)) return tryParse(t, func() { p.expr() diff --git a/src/cmd/asm/internal/asm/line_test.go b/src/cmd/asm/internal/asm/line_test.go index 01b058bd95..da857ced3a 100644 --- a/src/cmd/asm/internal/asm/line_test.go +++ b/src/cmd/asm/internal/asm/line_test.go @@ -39,7 +39,7 @@ func testBadInstParser(t *testing.T, goarch string, tests []badInstTest) { for i, test := range tests { arch, ctxt := setArch(goarch) tokenizer := lex.NewTokenizer("", strings.NewReader(test.input+"\n"), nil) - parser := NewParser(ctxt, arch, tokenizer) + parser := NewParser(ctxt, arch, tokenizer, false) err := tryParse(t, func() { parser.Parse() diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go index f187d0b166..2e83e176b2 100644 --- a/src/cmd/asm/internal/asm/operand_test.go +++ b/src/cmd/asm/internal/asm/operand_test.go @@ -28,7 +28,7 @@ func setArch(goarch string) (*arch.Arch, *obj.Link) { func newParser(goarch string) *Parser { architecture, ctxt := setArch(goarch) - return NewParser(ctxt, architecture, nil) + return NewParser(ctxt, architecture, nil, false) } // tryParse executes parse func in panicOnError=true context. @@ -75,7 +75,12 @@ func testOperandParser(t *testing.T, parser *Parser, tests []operandTest) { parser.start(lex.Tokenize(test.input)) addr := obj.Addr{} parser.operand(&addr) - result := obj.Dconv(&emptyProg, &addr) + var result string + if parser.compilingRuntime { + result = obj.DconvWithABIDetail(&emptyProg, &addr) + } else { + result = obj.Dconv(&emptyProg, &addr) + } if result != test.output { t.Errorf("fail at %s: got %s; expected %s\n", test.input, result, test.output) } @@ -86,6 +91,9 @@ func TestAMD64OperandParser(t *testing.T) { parser := newParser("amd64") testOperandParser(t, parser, amd64OperandTests) testBadOperandParser(t, parser, amd64BadOperandTests) + parser.compilingRuntime = true + testOperandParser(t, parser, amd64RuntimeOperandTests) + testBadOperandParser(t, parser, amd64BadOperandRuntimeTests) } func Test386OperandParser(t *testing.T) { @@ -141,7 +149,7 @@ func TestFuncAddress(t *testing.T) { parser := newParser(sub.arch) for _, test := range sub.tests { parser.start(lex.Tokenize(test.input)) - name, ok := parser.funcAddress() + name, _, ok := parser.funcAddress() isFuncSym := strings.HasSuffix(test.input, "(SB)") && // Ignore static symbols. @@ -298,6 +306,11 @@ var amd64OperandTests = []operandTest{ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms. } +var amd64RuntimeOperandTests = []operandTest{ + {"$bar(SB)", "$bar(SB)"}, + {"$foo(SB)", "$foo(SB)"}, +} + var amd64BadOperandTests = []badOperandTest{ {"[", "register list: expected ']', found EOF"}, {"[4", "register list: bad low register in `[4`"}, @@ -311,6 +324,11 @@ var amd64BadOperandTests = []badOperandTest{ {"[X0-X1-X2]", "register list: expected ']' after `[X0-X1`, found '-'"}, {"[X0,X3]", "register list: expected '-' after `[X0`, found ','"}, {"[X0,X1,X2,X3]", "register list: expected '-' after `[X0`, found ','"}, + {"$foo", "ABI selector only permitted when compiling runtime, reference was to \"foo\""}, +} + +var amd64BadOperandRuntimeTests = []badOperandTest{ + {"$foo", "malformed ABI selector \"bletch\" in reference to \"foo\""}, } var x86OperandTests = []operandTest{ diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index d9dbd92cb0..154cf9c7a7 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -25,25 +25,26 @@ import ( ) type Parser struct { - lex lex.TokenReader - lineNum int // Line number in source file. - errorLine int // Line number of last error. - errorCount int // Number of errors. - sawCode bool // saw code in this file (as opposed to comments and blank lines) - pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. - input []lex.Token - inputPos int - pendingLabels []string // Labels to attach to next instruction. - labels map[string]*obj.Prog - toPatch []Patch - addr []obj.Addr - arch *arch.Arch - ctxt *obj.Link - firstProg *obj.Prog - lastProg *obj.Prog - dataAddr map[string]int64 // Most recent address for DATA for this symbol. - isJump bool // Instruction being assembled is a jump. - errorWriter io.Writer + lex lex.TokenReader + lineNum int // Line number in source file. + errorLine int // Line number of last error. + errorCount int // Number of errors. + sawCode bool // saw code in this file (as opposed to comments and blank lines) + pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. + input []lex.Token + inputPos int + pendingLabels []string // Labels to attach to next instruction. + labels map[string]*obj.Prog + toPatch []Patch + addr []obj.Addr + arch *arch.Arch + ctxt *obj.Link + firstProg *obj.Prog + lastProg *obj.Prog + dataAddr map[string]int64 // Most recent address for DATA for this symbol. + isJump bool // Instruction being assembled is a jump. + compilingRuntime bool + errorWriter io.Writer } type Patch struct { @@ -51,14 +52,15 @@ type Patch struct { label string } -func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { +func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader, compilingRuntime bool) *Parser { return &Parser{ - ctxt: ctxt, - arch: ar, - lex: lexer, - labels: make(map[string]*obj.Prog), - dataAddr: make(map[string]int64), - errorWriter: os.Stderr, + ctxt: ctxt, + arch: ar, + lex: lexer, + labels: make(map[string]*obj.Prog), + dataAddr: make(map[string]int64), + errorWriter: os.Stderr, + compilingRuntime: compilingRuntime, } } @@ -310,8 +312,8 @@ func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) { // Defines text symbol in operands[0]. if len(operands) > 0 { p.start(operands[0]) - if name, ok := p.funcAddress(); ok { - fmt.Fprintf(w, "def %s ABI0\n", name) + if name, abi, ok := p.funcAddress(); ok { + fmt.Fprintf(w, "def %s %s\n", name, abi) } } return @@ -329,8 +331,8 @@ func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) { // Search for symbol references. for _, op := range operands { p.start(op) - if name, ok := p.funcAddress(); ok { - fmt.Fprintf(w, "ref %s ABI0\n", name) + if name, abi, ok := p.funcAddress(); ok { + fmt.Fprintf(w, "ref %s %s\n", name, abi) } } } @@ -765,20 +767,19 @@ func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) { case '*': a.Type = obj.TYPE_INDIR } - // Weirdness with statics: Might now have "<>". - isStatic := false - if p.peek() == '<' { - isStatic = true - p.next() - p.get('>') - } + + // Parse optional <> (indicates a static symbol) or + // (selecting text symbol with specific ABI). + doIssueError := true + isStatic, abi := p.symRefAttrs(name, doIssueError) + if p.peek() == '+' || p.peek() == '-' { a.Offset = int64(p.expr()) } if isStatic { a.Sym = p.ctxt.LookupStatic(name) } else { - a.Sym = p.ctxt.Lookup(name) + a.Sym = p.ctxt.LookupABI(name, abi) } if p.peek() == scanner.EOF { if prefix == 0 && p.isJump { @@ -823,12 +824,60 @@ func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, pr } } +// symRefAttrs parses an optional function symbol attribute clause for +// the function symbol 'name', logging an error for a malformed +// attribute clause if 'issueError' is true. The return value is a +// (boolean, ABI) pair indicating that the named symbol is either +// static or a particular ABI specification. +// +// The expected form of the attribute clause is: +// +// empty, yielding (false, obj.ABI0) +// "<>", yielding (true, obj.ABI0) +// "" yielding (false, obj.ABI0) +// "" yielding (false, obj.ABIInternal) +// +// Anything else beginning with "<" logs an error if issueError is +// true, otherwise returns (false, obj.ABI0). +// +func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) { + abi := obj.ABI0 + isStatic := false + if p.peek() != '<' { + return isStatic, abi + } + p.next() + tok := p.peek() + if tok == '>' { + isStatic = true + } else if tok == scanner.Ident { + abistr := p.get(scanner.Ident).String() + if !p.compilingRuntime { + if issueError { + p.errorf("ABI selector only permitted when compiling runtime, reference was to %q", name) + } + } else { + theabi, valid := obj.ParseABI(abistr) + if !valid { + if issueError { + p.errorf("malformed ABI selector %q in reference to %q", + abistr, name) + } + } else { + abi = theabi + } + } + } + p.get('>') + return isStatic, abi +} + // funcAddress parses an external function address. This is a // constrained form of the operand syntax that's always SB-based, // non-static, and has at most a simple integer offset: // -// [$|*]sym[+Int](SB) -func (p *Parser) funcAddress() (string, bool) { +// [$|*]sym[][+Int](SB) +func (p *Parser) funcAddress() (string, obj.ABI, bool) { switch p.peek() { case '$', '*': // Skip prefix. @@ -838,25 +887,32 @@ func (p *Parser) funcAddress() (string, bool) { tok := p.next() name := tok.String() if tok.ScanToken != scanner.Ident || p.atStartOfRegister(name) { - return "", false + return "", obj.ABI0, false + } + // Parse optional <> (indicates a static symbol) or + // (selecting text symbol with specific ABI). + noErrMsg := false + isStatic, abi := p.symRefAttrs(name, noErrMsg) + if isStatic { + return "", obj.ABI0, false // This function rejects static symbols. } tok = p.next() if tok.ScanToken == '+' { if p.next().ScanToken != scanner.Int { - return "", false + return "", obj.ABI0, false } tok = p.next() } if tok.ScanToken != '(' { - return "", false + return "", obj.ABI0, false } if reg := p.next(); reg.ScanToken != scanner.Ident || reg.String() != "SB" { - return "", false + return "", obj.ABI0, false } if p.next().ScanToken != ')' || p.peek() != scanner.EOF { - return "", false + return "", obj.ABI0, false } - return name, true + return name, abi, true } // registerIndirect parses the general form of a register indirection. diff --git a/src/cmd/asm/internal/asm/pseudo_test.go b/src/cmd/asm/internal/asm/pseudo_test.go index 100bef91cf..622ee25ce7 100644 --- a/src/cmd/asm/internal/asm/pseudo_test.go +++ b/src/cmd/asm/internal/asm/pseudo_test.go @@ -37,6 +37,7 @@ func TestErroneous(t *testing.T) { {"TEXT", "$0É:0, 0, $1", "expected end of operand, found É"}, // Issue #12467. {"TEXT", "$:0:(SB, 0, $1", "expected '(', found 0"}, // Issue 12468. {"TEXT", "@B(SB),0,$0", "expected '(', found B"}, // Issue 23580. + {"TEXT", "foo(SB),0", "ABI selector only permitted when compiling runtime, reference was to \"foo\""}, {"FUNCDATA", "", "expect two operands for FUNCDATA"}, {"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"}, {"DATA", "", "expect two operands for DATA"}, diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index fd079a2ccd..01c963ac72 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -52,6 +52,7 @@ func main() { case "all", "ret": ctxt.Retpoline = true } + compilingRuntime := objabi.IsRuntimePackagePath(*flags.Importpath) ctxt.Bso = bufio.NewWriter(os.Stdout) defer ctxt.Bso.Flush() @@ -74,7 +75,7 @@ func main() { var failedFile string for _, f := range flag.Args() { lexer := lex.NewLexer(f) - parser := asm.NewParser(ctxt, architecture, lexer) + parser := asm.NewParser(ctxt, architecture, lexer, compilingRuntime) ctxt.DiagFunc = func(format string, args ...interface{}) { diag = true log.Printf(format, args...) diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index e4e4ce72fd..2fffe625cd 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -968,9 +968,10 @@ func readSymABIs(file, myimportpath string) { if len(parts) != 3 { log.Fatalf(`%s:%d: invalid symabi: syntax is "%s sym abi"`, file, lineNum, parts[0]) } - sym, abi := parts[1], parts[2] - if abi != "ABI0" { // Only supported external ABI right now - log.Fatalf(`%s:%d: invalid symabi: unknown abi "%s"`, file, lineNum, abi) + sym, abistr := parts[1], parts[2] + abi, valid := obj.ParseABI(abistr) + if !valid { + log.Fatalf(`%s:%d: invalid symabi: unknown abi "%s"`, file, lineNum, abistr) } // If the symbol is already prefixed with @@ -983,9 +984,9 @@ func readSymABIs(file, myimportpath string) { // Record for later. if parts[0] == "def" { - symabiDefs[sym] = obj.ABI0 + symabiDefs[sym] = abi } else { - symabiRefs[sym] = obj.ABI0 + symabiRefs[sym] = abi } default: log.Fatalf(`%s:%d: invalid symabi type "%s"`, file, lineNum, parts[0]) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ae85dbbe4e..ad4708138f 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -502,6 +502,20 @@ const ( ABICount ) +// ParseABI converts from a string representation in 'abistr' to the +// corresponding ABI value. Second return value is TRUE if the +// abi string is recognized, FALSE otherwise. +func ParseABI(abistr string) (ABI, bool) { + switch abistr { + default: + return ABI0, false + case "ABI0": + return ABI0, true + case "ABIInternal": + return ABIInternal, true + } +} + // Attribute is a set of symbol attributes. type Attribute uint32 diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index a30ccf0564..21e28807a6 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -210,13 +210,30 @@ func (ctxt *Link) CanReuseProgs() bool { return ctxt.Debugasm == 0 } +// Dconv accepts an argument 'a' within a prog 'p' and returns a string +// with a formatted version of the argument. func Dconv(p *Prog, a *Addr) string { buf := new(bytes.Buffer) - WriteDconv(buf, p, a) + writeDconv(buf, p, a, false) return buf.String() } +// DconvDconvWithABIDetail accepts an argument 'a' within a prog 'p' +// and returns a string with a formatted version of the argument, in +// which text symbols are rendered with explicit ABI selectors. +func DconvWithABIDetail(p *Prog, a *Addr) string { + buf := new(bytes.Buffer) + writeDconv(buf, p, a, true) + return buf.String() +} + +// WriteDconv accepts an argument 'a' within a prog 'p' +// and writes a formatted version of the arg to the writer. func WriteDconv(w io.Writer, p *Prog, a *Addr) { + writeDconv(w, p, a, false) +} + +func writeDconv(w io.Writer, p *Prog, a *Addr, abiDetail bool) { switch a.Type { default: fmt.Fprintf(w, "type=%d", a.Type) @@ -250,7 +267,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_BRANCH: if a.Sym != nil { - fmt.Fprintf(w, "%s(SB)", a.Sym.Name) + fmt.Fprintf(w, "%s%s(SB)", a.Sym.Name, abiDecorate(a, abiDetail)) } else if a.Target() != nil { fmt.Fprint(w, a.Target().Pc) } else { @@ -259,7 +276,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_INDIR: io.WriteString(w, "*") - a.WriteNameTo(w) + a.writeNameTo(w, abiDetail) case TYPE_MEM: a.WriteNameTo(w) @@ -299,7 +316,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_ADDR: io.WriteString(w, "$") - a.WriteNameTo(w) + a.writeNameTo(w, abiDetail) case TYPE_SHIFT: v := int(a.Offset) @@ -335,6 +352,11 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { } func (a *Addr) WriteNameTo(w io.Writer) { + a.writeNameTo(w, false) +} + +func (a *Addr) writeNameTo(w io.Writer, abiDetail bool) { + switch a.Name { default: fmt.Fprintf(w, "name=%d", a.Name) @@ -356,7 +378,7 @@ func (a *Addr) WriteNameTo(w io.Writer) { reg = Rconv(int(a.Reg)) } if a.Sym != nil { - fmt.Fprintf(w, "%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + fmt.Fprintf(w, "%s%s%s(%s)", a.Sym.Name, abiDecorate(a, abiDetail), offConv(a.Offset), reg) } else { fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) } @@ -596,3 +618,10 @@ func Bool2int(b bool) int { } return i } + +func abiDecorate(a *Addr, abiDetail bool) string { + if !abiDetail || a.Sym == nil { + return "" + } + return fmt.Sprintf("<%s>", a.Sym.ABI()) +} diff --git a/src/cmd/internal/objabi/path.go b/src/cmd/internal/objabi/path.go index 2a42179a36..fd1c9981c6 100644 --- a/src/cmd/internal/objabi/path.go +++ b/src/cmd/internal/objabi/path.go @@ -39,3 +39,25 @@ func PathToPrefix(s string) string { return string(p) } + +// IsRuntimePackagePath examines 'pkgpath' and returns TRUE if it +// belongs to the collection of "runtime-related" packages, including +// "runtime" itself, "reflect", "syscall", and the +// "runtime/internal/*" packages. The compiler and/or assembler in +// some cases need to be aware of when they are building such a +// package, for example to enable features such as ABI selectors in +// assembly sources. +func IsRuntimePackagePath(pkgpath string) bool { + rval := false + switch pkgpath { + case "runtime": + rval = true + case "reflect": + rval = true + case "syscall": + rval = true + default: + rval = strings.HasPrefix(pkgpath, "runtime/internal") + } + return rval +} -- cgit v1.3 From 4d1cecdee8360ef12a817c124d7a04c9d29741c3 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 14 Oct 2020 08:06:54 -0400 Subject: cmd/dist,cmd/go: broaden use of asm macro GOEXPERIMENT_REGABI This extends a change made in https://golang.org/cl/252258 to the go command (to define an asm macro when GOEXPERIMENT=regabi is in effect); we need this same macro during the bootstrap build in order to build the runtime correctly. In addition, expand the set of packages where the macro is applied to {runtime, reflect, syscall, runtime/internal/*}, and move the logic for deciding when something is a "runtime package" out of the assembler and into cmd/{go,dist}, introducing a new assembler command line flag instead. Updates #27539, #40724. Change-Id: Ifcc7f029f56873584de1e543c55b0d3e54ad6c49 Reviewed-on: https://go-review.googlesource.com/c/go/+/262317 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/flags/flags.go | 19 ++++++++++--------- src/cmd/asm/main.go | 4 ++-- src/cmd/dist/build.go | 35 +++++++++++++++++++++++++++++++++++ src/cmd/go/internal/work/gc.go | 22 ++++++++++++++-------- 4 files changed, 61 insertions(+), 19 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index 64024cc97d..0765439959 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -15,15 +15,16 @@ import ( ) var ( - Debug = flag.Bool("debug", false, "dump instructions as they are parsed") - OutputFile = flag.String("o", "", "output file; default foo.o for /a/b/c/foo.s as first argument") - TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths") - Shared = flag.Bool("shared", false, "generate code that can be linked into a shared library") - Dynlink = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries") - AllErrors = flag.Bool("e", false, "no limit on number of errors reported") - SymABIs = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble") - Importpath = flag.String("p", "", "set expected package import to path") - Spectre = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)") + Debug = flag.Bool("debug", false, "dump instructions as they are parsed") + OutputFile = flag.String("o", "", "output file; default foo.o for /a/b/c/foo.s as first argument") + TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths") + Shared = flag.Bool("shared", false, "generate code that can be linked into a shared library") + Dynlink = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries") + AllErrors = flag.Bool("e", false, "no limit on number of errors reported") + SymABIs = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble") + Importpath = flag.String("p", "", "set expected package import to path") + Spectre = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)") + CompilingRuntime = flag.Bool("compilingRuntime", false, "source to be compiled is part of the Go runtime") ) var ( diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index 01c963ac72..149925d23f 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -52,7 +52,6 @@ func main() { case "all", "ret": ctxt.Retpoline = true } - compilingRuntime := objabi.IsRuntimePackagePath(*flags.Importpath) ctxt.Bso = bufio.NewWriter(os.Stdout) defer ctxt.Bso.Flush() @@ -75,7 +74,8 @@ func main() { var failedFile string for _, f := range flag.Args() { lexer := lex.NewLexer(f) - parser := asm.NewParser(ctxt, architecture, lexer, compilingRuntime) + parser := asm.NewParser(ctxt, architecture, lexer, + *flags.CompilingRuntime) ctxt.DiagFunc = func(format string, args ...interface{}) { diag = true log.Printf(format, args...) diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index 398ed6bce1..11da38ebdf 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -832,6 +832,21 @@ func runInstall(pkg string, ch chan struct{}) { asmArgs = append(asmArgs, "-D", "GOMIPS64_"+gomips64) } goasmh := pathf("%s/go_asm.h", workdir) + if IsRuntimePackagePath(pkg) { + asmArgs = append(asmArgs, "-compilingRuntime") + if os.Getenv("GOEXPERIMENT") == "regabi" { + // In order to make it easier to port runtime assembly + // to the register ABI, we introduce a macro + // indicating the experiment is enabled. + // + // Note: a similar change also appears in + // cmd/go/internal/work/gc.go. + // + // TODO(austin): Remove this once we commit to the + // register ABI (#40724). + asmArgs = append(asmArgs, "-D=GOEXPERIMENT_REGABI=1") + } + } // Collect symabis from assembly code. var symabis string @@ -1733,3 +1748,23 @@ func cmdlist() { fatalf("write failed: %v", err) } } + +// IsRuntimePackagePath examines 'pkgpath' and returns TRUE if it +// belongs to the collection of "runtime-related" packages, including +// "runtime" itself, "reflect", "syscall", and the +// "runtime/internal/*" packages. See also the function of the same +// name in cmd/internal/objabi/path.go. +func IsRuntimePackagePath(pkgpath string) bool { + rval := false + switch pkgpath { + case "runtime": + rval = true + case "reflect": + rval = true + case "syscall": + rval = true + default: + rval = strings.HasPrefix(pkgpath, "runtime/internal") + } + return rval +} diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index 56ad1872be..2df4a52ba5 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -292,14 +292,20 @@ func asmArgs(a *Action, p *load.Package) []interface{} { } } } - if p.ImportPath == "runtime" && objabi.Regabi_enabled != 0 { - // In order to make it easier to port runtime assembly - // to the register ABI, we introduce a macro - // indicating the experiment is enabled. - // - // TODO(austin): Remove this once we commit to the - // register ABI (#40724). - args = append(args, "-D=GOEXPERIMENT_REGABI=1") + if objabi.IsRuntimePackagePath(pkgpath) { + args = append(args, "-compilingRuntime") + if objabi.Regabi_enabled != 0 { + // In order to make it easier to port runtime assembly + // to the register ABI, we introduce a macro + // indicating the experiment is enabled. + // + // Note: a similar change also appears in + // cmd/dist/build.go. + // + // TODO(austin): Remove this once we commit to the + // register ABI (#40724). + args = append(args, "-D=GOEXPERIMENT_REGABI=1") + } } if cfg.Goarch == "mips" || cfg.Goarch == "mipsle" { -- cgit v1.3 From d595712540f00d980b1276ed25495ee7e05c1bfa Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Tue, 20 Oct 2020 09:06:42 -0400 Subject: cmd/asm: rename "compiling runtime" flag Rename the assembler "-compilingRuntime" flag to "-compiling-runtime", to be more consistent with the flag style of other Go commands. Change-Id: I8cc5cbf0b9b34d1dd4e9fa499d3fec8c1ef10b6e Reviewed-on: https://go-review.googlesource.com/c/go/+/263857 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/flags/flags.go | 2 +- src/cmd/dist/build.go | 2 +- src/cmd/go/internal/work/gc.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index 0765439959..426e0156aa 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -24,7 +24,7 @@ var ( SymABIs = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble") Importpath = flag.String("p", "", "set expected package import to path") Spectre = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)") - CompilingRuntime = flag.Bool("compilingRuntime", false, "source to be compiled is part of the Go runtime") + CompilingRuntime = flag.Bool("compiling-runtime", false, "source to be compiled is part of the Go runtime") ) var ( diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go index 11da38ebdf..d902addb0c 100644 --- a/src/cmd/dist/build.go +++ b/src/cmd/dist/build.go @@ -833,7 +833,7 @@ func runInstall(pkg string, ch chan struct{}) { } goasmh := pathf("%s/go_asm.h", workdir) if IsRuntimePackagePath(pkg) { - asmArgs = append(asmArgs, "-compilingRuntime") + asmArgs = append(asmArgs, "-compiling-runtime") if os.Getenv("GOEXPERIMENT") == "regabi" { // In order to make it easier to port runtime assembly // to the register ABI, we introduce a macro diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index 2df4a52ba5..e93031431c 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -293,7 +293,7 @@ func asmArgs(a *Action, p *load.Package) []interface{} { } } if objabi.IsRuntimePackagePath(pkgpath) { - args = append(args, "-compilingRuntime") + args = append(args, "-compiling-runtime") if objabi.Regabi_enabled != 0 { // In order to make it easier to port runtime assembly // to the register ABI, we introduce a macro -- cgit v1.3 From 4a6782562828779025d1423c9a04bc47ca9c3688 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sat, 24 Oct 2020 03:53:53 +1100 Subject: cmd/internal/obj/riscv: support additional register to register moves Add support for signed and unsigned register to register moves of various sizes. This makes it easier to handle zero and sign extension and will allow for further changes that improve the compiler optimisations for riscv64. While here, change the existing register to register moves from obj.Prog rewriting to instruction generation. Change-Id: Id21911019b76922367a134da13c3449a84a1fb08 Reviewed-on: https://go-review.googlesource.com/c/go/+/264657 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/riscvenc.s | 29 ++++++++------ src/cmd/internal/obj/riscv/obj.go | 56 ++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 26 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s index 8d301f2dd5..e30a576473 100644 --- a/src/cmd/asm/internal/asm/testdata/riscvenc.s +++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s @@ -297,6 +297,13 @@ start: MOVW X5, (X6) // 23205300 MOVW X5, 4(X6) // 23225300 + MOVB X5, X6 // 1393820313538343 + MOVH X5, X6 // 1393020313530343 + MOVW X5, X6 // 1b830200 + MOVBU X5, X6 // 13f3f20f + MOVHU X5, X6 // 1393020313530303 + MOVWU X5, X6 // 1393020213530302 + MOVF 4(X5), F0 // 07a04200 MOVF F0, 4(X5) // 27a20200 MOVF F0, F1 // d3000020 @@ -318,7 +325,7 @@ start: // These jumps can get printed as jumps to 2 because they go to the // second instruction in the function (the first instruction is an // invisible stack pointer adjustment). - JMP start // JMP 2 // 6ff01fc5 + JMP start // JMP 2 // 6ff09fc2 JMP (X5) // 67800200 JMP 4(X5) // 67804200 @@ -331,16 +338,16 @@ start: JMP asmtest(SB) // 970f0000 // Branch pseudo-instructions - BEQZ X5, start // BEQZ X5, 2 // e38a02c2 - BGEZ X5, start // BGEZ X5, 2 // e3d802c2 - BGT X5, X6, start // BGT X5, X6, 2 // e3c662c2 - BGTU X5, X6, start // BGTU X5, X6, 2 // e3e462c2 - BGTZ X5, start // BGTZ X5, 2 // e34250c2 - BLE X5, X6, start // BLE X5, X6, 2 // e3d062c2 - BLEU X5, X6, start // BLEU X5, X6, 2 // e3fe62c0 - BLEZ X5, start // BLEZ X5, 2 // e35c50c0 - BLTZ X5, start // BLTZ X5, 2 // e3ca02c0 - BNEZ X5, start // BNEZ X5, 2 // e39802c0 + BEQZ X5, start // BEQZ X5, 2 // e38602c0 + BGEZ X5, start // BGEZ X5, 2 // e3d402c0 + BGT X5, X6, start // BGT X5, X6, 2 // e3c262c0 + BGTU X5, X6, start // BGTU X5, X6, 2 // e3e062c0 + BGTZ X5, start // BGTZ X5, 2 // e34e50be + BLE X5, X6, start // BLE X5, X6, 2 // e3dc62be + BLEU X5, X6, start // BLEU X5, X6, 2 // e3fa62be + BLEZ X5, start // BLEZ X5, 2 // e35850be + BLTZ X5, start // BLTZ X5, 2 // e3c602be + BNEZ X5, start // BNEZ X5, 2 // e39402be // Set pseudo-instructions SEQZ X15, X15 // 93b71700 diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 045c2250b5..7bd3984e51 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -252,19 +252,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { switch p.To.Type { case obj.TYPE_REG: switch p.As { - case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb - p.As = AADDI - p.Reg = p.From.Reg - p.From = obj.Addr{Type: obj.TYPE_CONST} - - case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb - p.As = AFSGNJS - p.Reg = p.From.Reg - - case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb - p.As = AFSGNJD - p.Reg = p.From.Reg - + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: default: ctxt.Diag("unsupported register-register move at %v", p) } @@ -1805,6 +1793,44 @@ func instructionsForProg(p *obj.Prog) []*instruction { } ins.imm = p.To.Offset + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + // Handle register to register moves. + if p.From.Type != obj.TYPE_REG || p.To.Type != obj.TYPE_REG { + break + } + switch p.As { + case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255 + case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg) + case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg) + case AMOVB, AMOVH: + // Use SLLI/SRAI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVB { + ins.imm = 56 + } else if p.As == AMOVH { + ins.imm = 48 + } + ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + case AMOVHU, AMOVWU: + // Use SLLI/SRLI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVHU { + ins.imm = 48 + } else if p.As == AMOVWU { + ins.imm = 32 + } + ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + } + case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD: if p.From.Type != obj.TYPE_MEM { p.Ctxt.Diag("%v requires memory for source", p) @@ -1859,13 +1885,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { } else { ins.as = AFEQD } - ins = &instruction{ + ins2 := &instruction{ as: AXORI, // [bit] xor 1 = not [bit] rd: ins.rd, rs1: ins.rd, imm: 1, } - inss = append(inss, ins) + inss = append(inss, ins2) case AFSQRTS, AFSQRTD: // These instructions expect a zero (i.e. float register 0) -- cgit v1.3 From 3089ef6bd7ecc7af4b23eb68e3d7879d340aa673 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Mon, 28 Sep 2020 18:58:51 +0800 Subject: cmd/asm: add several arm64 SIMD instructions This patch enables VSLI, VUADDW(2), VUSRA and FMOVQ SIMD instructions required by the issue #40725. And the GNU syntax of 'FMOVQ' is 128-bit ldr/str(immediate, simd&fp). Add test cases. Fixes #40725 Change-Id: Ide968ef4a9385ce4cd8f69bce854289014d30456 Reviewed-on: https://go-review.googlesource.com/c/go/+/258397 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 11 + src/cmd/asm/internal/asm/testdata/arm64error.s | 16 +- src/cmd/internal/obj/arm64/a.out.go | 59 +++-- src/cmd/internal/obj/arm64/anames.go | 5 + src/cmd/internal/obj/arm64/anames7.go | 10 + src/cmd/internal/obj/arm64/asm7.go | 300 ++++++++++++++++++------- 6 files changed, 290 insertions(+), 111 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7f495b90bb..b6c22e0d6f 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -454,6 +454,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD F20, (R2) // 540000fd FMOVD.P F20, 8(R1) // 348400fc FMOVD.W 8(R1), F20 // 348c40fc + FMOVQ.P F13, 11(R10) // 4db5803c + FMOVQ.W F15, 11(R20) // 8fbe803c + FMOVQ.P 11(R10), F13 // 4db5c03c + FMOVQ.W 11(R20), F15 // 8fbec03c + FMOVQ F10, 65520(R10) // 4afdbf3d + FMOVQ F11, 64(RSP) // eb13803d + FMOVQ F11, 8(R20) // 8b82803c + FMOVQ F11, 4(R20) // 8b42803c + FMOVQ 32(R5), F2 // a208c03d + FMOVQ 65520(R10), F10 // 4afdff3d + FMOVQ 64(RSP), F11 // eb13c03d PRFM (R2), PLDL1KEEP // 400080f9 PRFM 16(R2), PLDL1KEEP // 400880f9 PRFM 48(R6), PSTL2STRM // d31880f9 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 20b1f3e9f0..c3a617066a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -87,13 +87,13 @@ TEXT errors(SB),$0 VLD1.P 32(R1), [V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VLD1.P 48(R1), [V7.S4, V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VPMULL V1.D1, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.B16, V2.B16, V3.H8 // ERROR "invalid arrangement" + VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.B16, V2.B16, V3.H8 // ERROR "operand mismatch" VPMULL2 V1.D2, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "invalid arrangement" + VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "operand mismatch" VEXT $8, V1.B16, V2.B8, V2.B16 // ERROR "invalid arrangement" VEXT $8, V1.H8, V2.H8, V2.H8 // ERROR "invalid arrangement" VRBIT V1.B16, V2.B8 // ERROR "invalid arrangement" @@ -353,4 +353,8 @@ TEXT errors(SB),$0 VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" + VUADDW V9.B8, V12.H8, V14.B8 // ERROR "invalid arrangement" + VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" + VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" + VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 33319e48df..98504353e2 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -419,27 +419,33 @@ const ( C_SBRA // for TYPE_BRANCH C_LBRA - C_ZAUTO // 0(RSP) - C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 - C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 - C_NSAUTO // -256 <= x < 0 - C_NPAUTO // -512 <= x < 0, 0 mod 8 - C_NAUTO4K // -4095 <= x < 0 - C_PSAUTO_8 // 0 to 255, 0 mod 8 - C_PSAUTO_4 // 0 to 255, 0 mod 4 - C_PSAUTO // 0 to 255 - C_PPAUTO // 0 to 504, 0 mod 8 - C_UAUTO4K_8 // 0 to 4095, 0 mod 8 - C_UAUTO4K_4 // 0 to 4095, 0 mod 4 - C_UAUTO4K_2 // 0 to 4095, 0 mod 2 - C_UAUTO4K // 0 to 4095 - C_UAUTO8K_8 // 0 to 8190, 0 mod 8 - C_UAUTO8K_4 // 0 to 8190, 0 mod 4 - C_UAUTO8K // 0 to 8190, 0 mod 2 - C_UAUTO16K_8 // 0 to 16380, 0 mod 8 - C_UAUTO16K // 0 to 16380, 0 mod 4 - C_UAUTO32K // 0 to 32760, 0 mod 8 - C_LAUTO // any other 32-bit constant + C_ZAUTO // 0(RSP) + C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 + C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 + C_NSAUTO // -256 <= x < 0 + C_NPAUTO // -512 <= x < 0, 0 mod 8 + C_NAUTO4K // -4095 <= x < 0 + C_PSAUTO_8 // 0 to 255, 0 mod 8 + C_PSAUTO_4 // 0 to 255, 0 mod 4 + C_PSAUTO // 0 to 255 + C_PPAUTO_16 // 0 to 504, 0 mod 16 + C_PPAUTO // 0 to 504, 0 mod 8 + C_UAUTO4K_16 // 0 to 4095, 0 mod 16 + C_UAUTO4K_8 // 0 to 4095, 0 mod 8 + C_UAUTO4K_4 // 0 to 4095, 0 mod 4 + C_UAUTO4K_2 // 0 to 4095, 0 mod 2 + C_UAUTO4K // 0 to 4095 + C_UAUTO8K_16 // 0 to 8190, 0 mod 16 + C_UAUTO8K_8 // 0 to 8190, 0 mod 8 + C_UAUTO8K_4 // 0 to 8190, 0 mod 4 + C_UAUTO8K // 0 to 8190, 0 mod 2 + C_PSAUTO + C_UAUTO16K_16 // 0 to 16380, 0 mod 16 + C_UAUTO16K_8 // 0 to 16380, 0 mod 8 + C_UAUTO16K // 0 to 16380, 0 mod 4 + C_PSAUTO + C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO + C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO + C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO + C_LAUTO // any other 32-bit constant C_SEXT1 // 0 to 4095, direct C_SEXT2 // 0 to 8190 @@ -457,17 +463,23 @@ const ( C_PSOREG_8 C_PSOREG_4 C_PSOREG + C_PPOREG_16 C_PPOREG + C_UOREG4K_16 C_UOREG4K_8 C_UOREG4K_4 C_UOREG4K_2 C_UOREG4K + C_UOREG8K_16 C_UOREG8K_8 C_UOREG8K_4 C_UOREG8K + C_UOREG16K_16 C_UOREG16K_8 C_UOREG16K + C_UOREG32K_16 C_UOREG32K + C_UOREG64K C_LOREG C_ADDR // TODO(aram): explain difference from C_VCONADDR @@ -873,6 +885,7 @@ const ( AFDIVS AFLDPD AFLDPS + AFMOVQ AFMOVD AFMOVS AVMOVQ @@ -1001,6 +1014,7 @@ const ( AVUZP2 AVSHL AVSRI + AVSLI AVBSL AVBIT AVTBL @@ -1008,6 +1022,9 @@ const ( AVZIP1 AVZIP2 AVCMTST + AVUADDW2 + AVUADDW + AVUSRA ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index e5534e26b9..126eefd032 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -379,6 +379,7 @@ var Anames = []string{ "FDIVS", "FLDPD", "FLDPS", + "FMOVQ", "FMOVD", "FMOVS", "VMOVQ", @@ -507,6 +508,7 @@ var Anames = []string{ "VUZP2", "VSHL", "VSRI", + "VSLI", "VBSL", "VBIT", "VTBL", @@ -514,5 +516,8 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", + "VUADDW2", + "VUADDW", + "VUSRA", "LAST", } diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index 96c9f788d9..f7e99517ce 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -51,16 +51,21 @@ var cnames7 = []string{ "PSAUTO_4", "PSAUTO", "PPAUTO", + "UAUTO4K_16", "UAUTO4K_8", "UAUTO4K_4", "UAUTO4K_2", "UAUTO4K", + "UAUTO8K_16", "UAUTO8K_8", "UAUTO8K_4", "UAUTO8K", + "UAUTO16K_16", "UAUTO16K_8", "UAUTO16K", + "UAUTO32K_8", "UAUTO32K", + "UAUTO64K", "LAUTO", "SEXT1", "SEXT2", @@ -78,16 +83,21 @@ var cnames7 = []string{ "PSOREG_4", "PSOREG", "PPOREG", + "UOREG4K_16", "UOREG4K_8", "UOREG4K_4", "UOREG4K_2", "UOREG4K", + "UOREG8K_16", "UOREG8K_8", "UOREG8K_4", "UOREG8K", + "UOREG16K_16", "UOREG16K_8", "UOREG16K", + "UOREG32K_16", "UOREG32K", + "UOREG64K", "LOREG", "ADDR", "GOTADDR", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 92c5729d25..8cbf5e719f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -483,6 +483,7 @@ var optab = []Optab{ {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, 105, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -509,6 +510,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UAUTO64K, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UOREG64K, 20, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, @@ -526,6 +529,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, /* scaled 12-bit unsigned displacement load */ {AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -543,6 +548,8 @@ var optab = []Optab{ {AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_UAUTO32K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_UOREG32K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_UAUTO64K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_UOREG64K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement load */ {AMOVB, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -560,6 +567,8 @@ var optab = []Optab{ {AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* long displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, @@ -577,6 +586,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, /* long displacement load */ {AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0}, @@ -594,6 +605,8 @@ var optab = []Optab{ {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -603,6 +616,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, @@ -611,6 +625,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, /* pre/post-indexed store (unscaled, signed 9-bit offset) */ {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, @@ -620,6 +635,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, @@ -628,6 +644,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, /* load with shifted or extended register offset */ {AMOVD, C_ROFF, C_NONE, C_NONE, C_REG, 98, 4, 0, 0, 0}, @@ -1207,37 +1224,49 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { C_PSAUTO, C_PSAUTO_8, C_PSAUTO_4, + C_PPAUTO_16, C_PPAUTO, + C_UAUTO4K_16, C_UAUTO4K_8, C_UAUTO4K_4, C_UAUTO4K_2, C_UAUTO4K, + C_UAUTO8K_16, C_UAUTO8K_8, C_UAUTO8K_4, C_UAUTO8K, + C_UAUTO16K_16, C_UAUTO16K_8, C_UAUTO16K, + C_UAUTO32K_16, C_UAUTO32K, + C_UAUTO64K, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO, C_NAUTO4K, C_LAUTO, - C_PPOREG, C_PSOREG, - C_PSOREG_4, C_PSOREG_8, + C_PSOREG_4, + C_PPOREG_16, + C_PPOREG, + C_UOREG4K_16, C_UOREG4K_8, C_UOREG4K_4, C_UOREG4K_2, C_UOREG4K, + C_UOREG8K_16, C_UOREG8K_8, C_UOREG8K_4, C_UOREG8K, + C_UOREG16K_16, C_UOREG16K_8, C_UOREG16K, + C_UOREG32K_16, C_UOREG32K, + C_UOREG64K, C_NSOREG_8, C_NSOREG_4, C_NSOREG, @@ -1532,10 +1561,18 @@ func autoclass(l int64) int { } return C_PSAUTO } - if l <= 504 && l&7 == 0 { - return C_PPAUTO + if l <= 504 { + if l&15 == 0 { + return C_PPAUTO_16 + } + if l&7 == 0 { + return C_PPAUTO + } } if l <= 4095 { + if l&15 == 0 { + return C_UAUTO4K_16 + } if l&7 == 0 { return C_UAUTO4K_8 } @@ -1548,6 +1585,9 @@ func autoclass(l int64) int { return C_UAUTO4K } if l <= 8190 { + if l&15 == 0 { + return C_UAUTO8K_16 + } if l&7 == 0 { return C_UAUTO8K_8 } @@ -1559,6 +1599,9 @@ func autoclass(l int64) int { } } if l <= 16380 { + if l&15 == 0 { + return C_UAUTO16K_16 + } if l&7 == 0 { return C_UAUTO16K_8 } @@ -1566,8 +1609,16 @@ func autoclass(l int64) int { return C_UAUTO16K } } - if l <= 32760 && (l&7) == 0 { - return C_UAUTO32K + if l <= 32760 { + if l&15 == 0 { + return C_UAUTO32K_16 + } + if l&7 == 0 { + return C_UAUTO32K + } + } + if l <= 65520 && (l&15) == 0 { + return C_UAUTO64K } return C_LAUTO } @@ -1595,6 +1646,8 @@ func (c *ctxt7) offsetshift(p *obj.Prog, v int64, cls int) int64 { s = 2 case C_UAUTO32K, C_UOREG32K: s = 3 + case C_UAUTO64K, C_UOREG64K: + s = 4 default: c.ctxt.Diag("bad class: %v\n%v", DRconv(cls), p) } @@ -2126,46 +2179,66 @@ func cmp(a int, b int) bool { case C_PPAUTO: switch b { - case C_ZAUTO, C_PSAUTO_8: + case C_ZAUTO, C_PSAUTO_8, C_PPAUTO_16: return true } case C_UAUTO4K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16: return true } case C_UAUTO8K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16: return true } case C_UAUTO16K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO16K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16: return true } case C_UAUTO32K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_8, C_UAUTO8K_8, C_UAUTO16K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_UAUTO64K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16, + C_UAUTO32K_16: return true } case C_LAUTO: switch b { - case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, - C_NAUTO4K, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, - C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, - C_UAUTO16K, C_UAUTO16K_8, - C_UAUTO32K: + case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, C_NAUTO4K, + C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K, C_UAUTO32K_16, + C_UAUTO64K: return true } @@ -2192,6 +2265,11 @@ func cmp(a int, b int) bool { return true } + case C_PSOREG_8: + if b == C_ZOREG { + return true + } + case C_PSOREG_4: switch b { case C_ZOREG, C_PSOREG_8: @@ -2206,48 +2284,66 @@ func cmp(a int, b int) bool { case C_PPOREG: switch b { - case C_ZOREG, C_PSOREG_8: + case C_ZOREG, C_PSOREG_8, C_PPOREG_16: return true } case C_UOREG4K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16: return true } case C_UOREG8K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K_4, C_UOREG8K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16: return true } case C_UOREG16K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, - C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16: return true } case C_UOREG32K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_8, C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_UOREG64K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16, + C_UOREG32K_16: return true } case C_LOREG: switch b { - case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, - C_NOREG4K, C_PSOREG_4, C_PSOREG_8, C_PSOREG, C_PPOREG, - C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, - C_UOREG16K, C_UOREG16K_8, - C_UOREG32K: + case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, C_NOREG4K, + C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K, C_UOREG32K_16, + C_UOREG64K: return true } @@ -2675,7 +2771,8 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: + case AFMOVQ, AFMOVD, AFMOVS, + AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -2802,6 +2899,8 @@ func buildop(ctxt *obj.Link) { case AVUSHR: oprangeset(AVSHL, t) oprangeset(AVSRI, t) + oprangeset(AVSLI, t) + oprangeset(AVUSRA, t) case AVREV32: oprangeset(AVCNT, t) @@ -2829,6 +2928,9 @@ func buildop(ctxt *obj.Link) { case AVEOR3: oprangeset(AVBCAX, t) + case AVUADDW: + oprangeset(AVUADDW2, t) + case ASHA1H, AVCNT, AVMOV, @@ -4805,47 +4907,31 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 93: /* vpmull{2} Vm., Vn., Vd */ - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - a := int((p.Reg >> 5) & 15) + case 93: /* vpmull{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if af != a { + c.ctxt.Diag("invalid arrangement: %v", p) + } var Q, size uint32 - if p.As == AVPMULL { - Q = 0 - } else { + if p.As == AVPMULL2 { Q = 1 } - - var fArng int - switch at { - case ARNG_8H: - if Q == 0 { - fArng = ARNG_8B - } else { - fArng = ARNG_16B - } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): size = 0 - case ARNG_1Q: - if Q == 0 { - fArng = ARNG_1D - } else { - fArng = ARNG_2D - } + case pack(0, ARNG_1Q, ARNG_1D), pack(1, ARNG_1Q, ARNG_2D): size = 3 default: - c.ctxt.Diag("invalid arrangement on Vd.: %v", p) - } - - if af != a || af != fArng { - c.ctxt.Diag("invalid arrangement: %v", p) + c.ctxt.Diag("operand mismatch: %v\n", p) } o1 = c.oprrr(p, p.As) rf := int((p.From.Reg) & 31) rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) case 94: /* vext $imm4, Vm., Vn., Vd. */ @@ -4883,7 +4969,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= ((Q & 1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 95: /* vushr $shift, Vn., Vd. */ + case 95: /* vushr/vshl/vsri/vsli/vusra $shift, Vn., Vd. */ at := int((p.To.Reg >> 5) & 15) af := int((p.Reg >> 5) & 15) shift := int(p.From.Offset) @@ -4920,14 +5006,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } imm := 0 - switch p.As { - case AVUSHR, AVSRI: + case AVUSHR, AVSRI, AVUSRA: imm = esize*2 - shift if imm < esize || imm > imax { c.ctxt.Diag("shift out of range: %v", p) } - case AVSHL: + case AVSHL, AVSLI: imm = esize + shift if imm > imax { c.ctxt.Diag("shift out of range: %v", p) @@ -4940,7 +5025,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) rf := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | (uint32(imm&127) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + o1 |= ((Q & 1) << 30) | (uint32(imm&0x7f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) case 96: /* vst1 Vt1.[index], offset(Rn) */ af := int((p.From.Reg >> 5) & 15) @@ -5169,11 +5254,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { shift = 0 } - pack := func(q, x, y uint8) uint32 { - return uint32(q)<<16 | uint32(x)<<8 | uint32(y) - } - - var Q uint8 = uint8(o1>>30) & 1 + Q := (o1 >> 30) & 1 var immh, width uint8 switch pack(Q, af, at) { case pack(0, ARNG_8B, ARNG_8H): @@ -5195,6 +5276,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ ta := (p.From.Reg >> 5) & 15 tm := (p.Reg >> 5) & 15 @@ -5240,6 +5322,35 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + case 105: /* vuaddw{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if at != a { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q, size uint32 + if p.As == AVUADDW2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_4S, ARNG_4H), pack(1, ARNG_4S, ARNG_8H): + size = 1 + case pack(0, ARNG_2D, ARNG_2S), pack(1, ARNG_2D, ARNG_4S): + size = 2 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5898,6 +6009,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVUZP2: return 7<<25 | 1<<14 | 3<<11 + + case AVUADDW, AVUADDW2: + return 0x17<<25 | 1<<21 | 1<<12 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6097,6 +6211,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + case AVSLI: + return 0x5E<<23 | 21<<10 + case AVUSHLL, AVUXTL: return 1<<29 | 15<<24 | 0x29<<10 @@ -6106,6 +6223,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVXAR: return 0xCE<<24 | 1<<23 + case AVUSRA: + return 1<<29 | 15<<24 | 5<<10 + case APRFM: return 0xf9<<24 | 2<<22 } @@ -6535,7 +6655,14 @@ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { // pre/post-indexed store. // and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr(p, a)) + enc := c.opldr(p, a) + switch p.As { + case AFMOVQ: + enc = enc &^ (1 << 22) + default: + enc = LD2STR(enc) + } + return enc } // load(immediate) @@ -6571,6 +6698,9 @@ func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { case AFMOVD: return LDSTR(3, 1, 1) + + case AFMOVQ: + return LDSTR(0, 1, 3) } c.ctxt.Diag("bad opldr %v\n%v", a, p) @@ -7063,10 +7193,13 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { */ func movesize(a obj.As) int { switch a { - case AMOVD: + case AFMOVQ: + return 4 + + case AMOVD, AFMOVD: return 3 - case AMOVW, AMOVWU: + case AMOVW, AMOVWU, AFMOVS: return 2 case AMOVH, AMOVHU: @@ -7075,12 +7208,6 @@ func movesize(a obj.As) int { case AMOVB, AMOVBU: return 0 - case AFMOVS: - return 2 - - case AFMOVD: - return 3 - default: return -1 } @@ -7145,3 +7272,8 @@ func (c *ctxt7) encRegShiftOrExt(a *obj.Addr, r int16) uint32 { return 0 } + +// pack returns the encoding of the "Q" field and two arrangement specifiers. +func pack(q uint32, arngA, arngB uint8) uint32 { + return uint32(q)<<16 | uint32(arngA)<<8 | uint32(arngB) +} -- cgit v1.3 From 53efbdb12e056c5fd4e967f07c7e660377f7aba9 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 16 Oct 2020 19:06:18 +0800 Subject: cmd/asm: sort test cases in the arm64.s file This patch sorts the test cases in the arm64.s file by instruction category and deletes comments related to the old parser. Change-Id: I9bbf56281e247a4fd8d5e670e8ad67c923aef1ee Reviewed-on: https://go-review.googlesource.com/c/go/+/263458 Trust: fannie zhang Trust: Emmanuel Odeke Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 702 ++++++++++++------------------ 1 file changed, 272 insertions(+), 430 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index b6c22e0d6f..7943990e16 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -10,14 +10,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 -// -// ADD -// -// LTYPE1 imsr ',' spreg ',' reg -// { -// outcode($1, &$2, $4, &$6); -// } -// imsr comes from the old 7a, we only support immediates and registers + +// arithmetic operations ADDW $1, R2, R3 ADDW R1, R2, R3 ADDW R1, ZR, R3 @@ -25,18 +19,29 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 ADD R1, R2, R3 ADD R1, ZR, R3 ADD $1, R2, R3 - ADD $0x000aaa, R2, R3 // ADD $2730, R2, R3 // 43a82a91 - ADD $0x000aaa, R2 // ADD $2730, R2 // 42a82a91 - ADD $0xaaa000, R2, R3 // ADD $11182080, R2, R3 // 43a86a91 - ADD $0xaaa000, R2 // ADD $11182080, R2 // 42a86a91 - ADD $0xaaaaaa, R2, R3 // ADD $11184810, R2, R3 // 43a82a9163a86a91 - ADD $0xaaaaaa, R2 // ADD $11184810, R2 // 42a82a9142a86a91 - SUB $0x000aaa, R2, R3 // SUB $2730, R2, R3 // 43a82ad1 - SUB $0x000aaa, R2 // SUB $2730, R2 // 42a82ad1 - SUB $0xaaa000, R2, R3 // SUB $11182080, R2, R3 // 43a86ad1 - SUB $0xaaa000, R2 // SUB $11182080, R2 // 42a86ad1 - SUB $0xaaaaaa, R2, R3 // SUB $11184810, R2, R3 // 43a82ad163a86ad1 - SUB $0xaaaaaa, R2 // SUB $11184810, R2 // 42a82ad142a86ad1 + ADDW $1, R2 + ADDW R1, R2 + ADD $1, R2 + ADD R1, R2 + ADD R1>>11, R2 + ADD R1<<22, R2 + ADD R1->33, R2 + ADD $0x000aaa, R2, R3 // ADD $2730, R2, R3 // 43a82a91 + ADD $0x000aaa, R2 // ADD $2730, R2 // 42a82a91 + ADD $0xaaa000, R2, R3 // ADD $11182080, R2, R3 // 43a86a91 + ADD $0xaaa000, R2 // ADD $11182080, R2 // 42a86a91 + ADD $0xaaaaaa, R2, R3 // ADD $11184810, R2, R3 // 43a82a9163a86a91 + ADD $0xaaaaaa, R2 // ADD $11184810, R2 // 42a82a9142a86a91 + SUB $0x000aaa, R2, R3 // SUB $2730, R2, R3 // 43a82ad1 + SUB $0x000aaa, R2 // SUB $2730, R2 // 42a82ad1 + SUB $0xaaa000, R2, R3 // SUB $11182080, R2, R3 // 43a86ad1 + SUB $0xaaa000, R2 // SUB $11182080, R2 // 42a86ad1 + SUB $0xaaaaaa, R2, R3 // SUB $11184810, R2, R3 // 43a82ad163a86ad1 + SUB $0xaaaaaa, R2 // SUB $11184810, R2 // 42a82ad142a86ad1 + ADDW $0x60060, R2 // ADDW $393312, R2 // 4280011142804111 + ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091 + SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb + ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b ADD R1>>11, R2, R3 ADD R1<<22, R2, R3 ADD R1->33, R2, R3 @@ -59,6 +64,30 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 CMN R1.SXTX<<2, R10 // 5fe921ab CMPW R2.UXTH<<3, R11 // 7f2d226b CMNW R1.SXTB, R9 // 3f81212b + CMPW $0x60060, R2 // CMPW $393312, R2 // 1b0c8052db00a0725f001b6b + CMPW $40960, R0 // 1f284071 + CMPW $27745, R2 // 3b8c8d525f001b6b + CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b + CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b + CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb + CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb + CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb + CMP $3, R2 + CMP R1, R2 + CMP R1->11, R2 + CMP R1>>22, R2 + CMP R1<<33, R2 + CMP R22.SXTX, RSP // ffe336eb + CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb + CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b + CCMN MI, ZR, R1, $4 // e44341ba + // MADD Rn,Rm,Ra,Rd + MADD R1, R2, R3, R4 // 6408019b + // CLS + CLSW R1, R2 + CLS R1, R2 + +// fp/simd instructions. VADDP V1.B16, V2.B16, V3.B16 // 43bc214e VADDP V1.S4, V2.S4, V3.S4 // 43bca14e VADDP V1.D2, V2.D2, V3.D2 // 43bce14e @@ -67,22 +96,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VORR V5.B16, V4.B16, V3.B16 // 831ca54e VADD V16.S4, V5.S4, V9.S4 // a984b04e VEOR V0.B16, V1.B16, V0.B16 // 201c206e - SHA256H V9.S4, V3, V2 // 6240095e - SHA256H2 V9.S4, V4, V3 // 8350095e - SHA256SU0 V8.S4, V7.S4 // 0729285e - SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e - SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e - SHA1SU1 V5.S4, V1.S4 // a118285e - SHA1C V1.S4, V2, V3 // 4300015e - SHA1H V5, V4 // a408285e - SHA1M V8.S4, V7, V6 // e620085e - SHA1P V11.S4, V10, V9 // 49110b5e - SHA512H V2.D2, V1, V0 // 208062ce - SHA512H2 V4.D2, V3, V2 // 628464ce - SHA512SU0 V9.D2, V8.D2 // 2881c0ce - SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce - VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace - VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f VUADDLV V6.B16, V6 // c638306e @@ -96,10 +109,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VFMLS V1.D2, V12.D2, V1.D2 // 81cde14e VFMLS V1.S2, V12.S2, V1.S2 // 81cda10e VFMLS V1.S4, V12.S4, V1.S4 // 81cda14e - VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e - VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e - VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e - VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e VEXT $4, V2.B8, V1.B8, V3.B8 // 2320022e VEXT $8, V2.B16, V1.B16, V3.B16 // 2340026e VRBIT V24.B16, V24.B16 // 185b606e @@ -125,6 +134,14 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VSRI $8, V1.H8, V2.H8 // 2244186f VSRI $2, V1.B8, V2.B8 // 22440e2f VSRI $2, V1.B16, V2.B16 // 22440e6f + VSLI $7, V2.B16, V3.B16 // 43540f6f + VSLI $15, V3.H4, V4.H4 // 64541f2f + VSLI $31, V5.S4, V6.S4 // a6543f6f + VSLI $63, V7.D2, V8.D2 // e8547f6f + VUSRA $8, V2.B16, V3.B16 // 4314086f + VUSRA $16, V3.H4, V4.H4 // 6414102f + VUSRA $32, V5.S4, V6.S4 // a614206f + VUSRA $64, V7.D2, V8.D2 // e814406f VTBL V22.B16, [V28.B16, V29.B16], V11.B16 // 8b23164e VTBL V18.B8, [V17.B16, V18.B16, V19.B16], V22.B8 // 3642120e VTBL V31.B8, [V14.B16, V15.B16, V16.B16, V17.B16], V15.B8 // cf611f0e @@ -141,8 +158,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e VTBL V3.B8, [V27.B16], V8.B8 // 6803030e - VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce - VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e @@ -180,114 +195,95 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f VBIF V0.B8, V30.B8, V1.B8 // c11fe02e VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e - MOVD (R2)(R6.SXTW), R4 // 44c866f8 - MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 - MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 - MOVWU (R19)(R20<<2), R20 // 747a74b8 - MOVD (R2)(R6<<3), R4 // 447866f8 - MOVD (R3)(R7.SXTX<<3), R8 // 68f867f8 - MOVWU (R5)(R4.UXTW), R10 // aa4864b8 - MOVBU (R3)(R9.UXTW), R8 // 68486938 - MOVBU (R5)(R8), R10 // MOVBU (R5)(R8*1), R10 // aa686838 - MOVHU (R2)(R7.SXTW<<1), R11 // 4bd86778 - MOVHU (R1)(R2<<1), R5 // 25786278 - MOVB (R9)(R3.UXTW), R6 // 2649a338 - MOVB (R10)(R6), R15 // MOVB (R10)(R6*1), R15 // 4f69a638 - MOVH (R5)(R7.SXTX<<1), R19 // b3f8a778 - MOVH (R8)(R4<<1), R10 // 0a79a478 - MOVW (R9)(R8.SXTW<<2), R19 // 33d9a8b8 - MOVW (R1)(R4.SXTX), R11 // 2be8a4b8 - MOVW (R1)(R4.SXTX), ZR // 3fe8a4b8 - MOVW (R2)(R5), R12 // MOVW (R2)(R5*1), R12 // 4c68a5b8 - MOVD R5, (R2)(R6<<3) // 457826f8 - MOVD R9, (R6)(R7.SXTX<<3) // c9f827f8 - MOVD ZR, (R6)(R7.SXTX<<3) // dff827f8 - MOVW R8, (R2)(R3.UXTW<<2) // 485823b8 - MOVW R7, (R3)(R4.SXTW) // 67c824b8 - MOVB R4, (R2)(R6.SXTX) // 44e82638 - MOVB R8, (R3)(R9.UXTW) // 68482938 - MOVB R10, (R5)(R8) // MOVB R10, (R5)(R8*1) // aa682838 - MOVH R11, (R2)(R7.SXTW<<1) // 4bd82778 - MOVH R5, (R1)(R2<<1) // 25782278 - MOVH R7, (R2)(R5.SXTX<<1) // 47f82578 - MOVH R8, (R3)(R6.UXTW) // 68482678 - MOVB (R29)(R30<<0), R14 // ae7bbe38 - MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38 - MOVB R4, (R2)(R6.SXTX) // 44e82638 FMOVS $(4.0), F0 // 0010221e FMOVD $(4.0), F0 // 0010621e FMOVS $(0.265625), F1 // 01302a1e FMOVD $(0.1796875), F2 // 02f0681e FMOVS $(0.96875), F3 // 03f02d1e FMOVD $(28.0), F4 // 0490671e + VUADDW V9.B8, V12.H8, V14.H8 // 8e11292e + VUADDW V13.H4, V10.S4, V11.S4 // 4b116d2e + VUADDW V21.S2, V24.D2, V29.D2 // 1d13b52e + VUADDW2 V9.B16, V12.H8, V14.H8 // 8e11296e + VUADDW2 V13.H8, V20.S4, V30.S4 // 9e126d6e + VUADDW2 V21.S4, V24.D2, V29.D2 // 1d13b56e + FCCMPS LT, F1, F2, $1 // 41b4211e + FMADDS F1, F3, F2, F4 // 440c011f + FMADDD F4, F5, F4, F4 // 8414441f + FMSUBS F13, F21, F13, F19 // b3d50d1f + FMSUBD F11, F7, F15, F31 // ff9d4b1f + FNMADDS F1, F3, F2, F4 // 440c211f + FNMADDD F1, F3, F2, F4 // 440c611f + FNMSUBS F1, F3, F2, F4 // 448c211f + FNMSUBD F1, F3, F2, F4 // 448c611f + FADDS F2, F3, F4 // 6428221e + FADDD F1, F2 // 4228611e + VDUP V19.S[0], V17.S4 // 7106044e -// move a large constant to a Vd. - VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 - VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 - VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 - VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 - FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc - FMOVS (R2)(R6<<2), F4 // 447866bc - FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc - FMOVD (R2)(R6<<3), F4 // 447866fc - FMOVS F4, (R2)(R6) // FMOVS F4, (R2)(R6*1) // 446826bc - FMOVS F4, (R2)(R6<<2) // 447826bc - FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc - FMOVD F4, (R2)(R6<<3) // 447826fc +// special + PRFM (R2), PLDL1KEEP // 400080f9 + PRFM 16(R2), PLDL1KEEP // 400880f9 + PRFM 48(R6), PSTL2STRM // d31880f9 + PRFM 8(R12), PLIL3STRM // 8d0580f9 + PRFM (R8), $25 // 190180f9 + PRFM 8(R9), $30 // 3e0580f9 + NOOP // 1f2003d5 + HINT $0 // 1f2003d5 + DMB $1 + SVC - CMPW $40960, R0 // 1f284071 - CMPW $27745, R2 // 3b8c8d525f001b6b - CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b - CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b - CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb - CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb - ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091 - SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb - CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb - ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b -// LTYPE1 imsr ',' spreg ',' -// { -// outcode($1, &$2, $4, &nullgen); -// } -// LTYPE1 imsr ',' reg -// { -// outcode($1, &$2, NREG, &$4); -// } - ADDW $1, R2 - ADDW R1, R2 - ADD $1, R2 - ADD R1, R2 - ADD R1>>11, R2 - ADD R1<<22, R2 - ADD R1->33, R2 - AND R1@>33, R2 +// encryption + SHA256H V9.S4, V3, V2 // 6240095e + SHA256H2 V9.S4, V4, V3 // 8350095e + SHA256SU0 V8.S4, V7.S4 // 0729285e + SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e + SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e + SHA1SU1 V5.S4, V1.S4 // a118285e + SHA1C V1.S4, V2, V3 // 4300015e + SHA1H V5, V4 // a408285e + SHA1M V8.S4, V7, V6 // e620085e + SHA1P V11.S4, V10, V9 // 49110b5e + SHA512H V2.D2, V1, V0 // 208062ce + SHA512H2 V4.D2, V3, V2 // 628464ce + SHA512SU0 V9.D2, V8.D2 // 2881c0ce + SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce + VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace + VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce + VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e + VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e + VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e + VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e + VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce + VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce + VREV32 V5.B16, V5.B16 // a508206e + VREV64 V2.S2, V3.S2 // 4308a00e + VREV64 V2.S4, V3.S4 // 4308a04e // logical ops +// // make sure constants get encoded into an instruction when it could - AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192 - AND $(1<<63-1), R1 // AND $9223372036854775807, R1 // 21f84092 - ORR $(1<<63), R1 // ORR $-9223372036854775808, R1 // 210041b2 - ORR $(1<<63-1), R1 // ORR $9223372036854775807, R1 // 21f840b2 - EOR $(1<<63), R1 // EOR $-9223372036854775808, R1 // 210041d2 - EOR $(1<<63-1), R1 // EOR $9223372036854775807, R1 // 21f840d2 - - ANDW $0x3ff00000, R2 // ANDW $1072693248, R2 // 42240c12 - BICW $0x3ff00000, R2 // BICW $1072693248, R2 // 42540212 - ORRW $0x3ff00000, R2 // ORRW $1072693248, R2 // 42240c32 - ORNW $0x3ff00000, R2 // ORNW $1072693248, R2 // 42540232 - EORW $0x3ff00000, R2 // EORW $1072693248, R2 // 42240c52 - EONW $0x3ff00000, R2 // EONW $1072693248, R2 // 42540252 - - AND $0x22220000, R3, R4 // AND $572653568, R3, R4 // 5b44a4d264001b8a - ORR $0x22220000, R3, R4 // ORR $572653568, R3, R4 // 5b44a4d264001baa - EOR $0x22220000, R3, R4 // EOR $572653568, R3, R4 // 5b44a4d264001bca - BIC $0x22220000, R3, R4 // BIC $572653568, R3, R4 // 5b44a4d264003b8a - ORN $0x22220000, R3, R4 // ORN $572653568, R3, R4 // 5b44a4d264003baa - EON $0x22220000, R3, R4 // EON $572653568, R3, R4 // 5b44a4d264003bca - ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea - BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea - + AND R1@>33, R2 + AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192 + AND $(1<<63-1), R1 // AND $9223372036854775807, R1 // 21f84092 + ORR $(1<<63), R1 // ORR $-9223372036854775808, R1 // 210041b2 + ORR $(1<<63-1), R1 // ORR $9223372036854775807, R1 // 21f840b2 + EOR $(1<<63), R1 // EOR $-9223372036854775808, R1 // 210041d2 + EOR $(1<<63-1), R1 // EOR $9223372036854775807, R1 // 21f840d2 + ANDW $0x3ff00000, R2 // ANDW $1072693248, R2 // 42240c12 + BICW $0x3ff00000, R2 // BICW $1072693248, R2 // 42540212 + ORRW $0x3ff00000, R2 // ORRW $1072693248, R2 // 42240c32 + ORNW $0x3ff00000, R2 // ORNW $1072693248, R2 // 42540232 + EORW $0x3ff00000, R2 // EORW $1072693248, R2 // 42240c52 + EONW $0x3ff00000, R2 // EONW $1072693248, R2 // 42540252 + AND $0x22220000, R3, R4 // AND $572653568, R3, R4 // 5b44a4d264001b8a + ORR $0x22220000, R3, R4 // ORR $572653568, R3, R4 // 5b44a4d264001baa + EOR $0x22220000, R3, R4 // EOR $572653568, R3, R4 // 5b44a4d264001bca + BIC $0x22220000, R3, R4 // BIC $572653568, R3, R4 // 5b44a4d264003b8a + ORN $0x22220000, R3, R4 // ORN $572653568, R3, R4 // 5b44a4d264003baa + EON $0x22220000, R3, R4 // EON $572653568, R3, R4 // 5b44a4d264003bca + ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea + BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2 TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72 TST $0x4900000049, R0 // TST $313532612681, R0 // 3b0980d23b09c0f21f001bea @@ -316,19 +312,22 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 EONW $0x6006000060060, R5 // EONW $1689262177517664, R5 // 1b0c8052db00a072a5003b4a ORNW $0x6006000060060, R5 // ORNW $1689262177517664, R5 // 1b0c8052db00a072a5003b2a BICSW $0x6006000060060, R5 // BICSW $1689262177517664, R5 // 1b0c8052db00a072a5003b6a - ADDW $0x60060, R2 // ADDW $393312, R2 // 4280011142804111 - CMPW $0x60060, R2 // CMPW $393312, R2 // 1b0c8052db00a0725f001b6b - // TODO: this could have better encoding - ANDW $-1, R10 // 1b0080124a011b0a - - AND $8, R0, RSP // 1f007d92 - ORR $8, R0, RSP // 1f007db2 - EOR $8, R0, RSP // 1f007dd2 - BIC $8, R0, RSP // 1ff87c92 - ORN $8, R0, RSP // 1ff87cb2 - EON $8, R0, RSP // 1ff87cd2 + ANDW $-1, R10 // 1b0080124a011b0a + AND $8, R0, RSP // 1f007d92 + ORR $8, R0, RSP // 1f007db2 + EOR $8, R0, RSP // 1f007dd2 + BIC $8, R0, RSP // 1ff87c92 + ORN $8, R0, RSP // 1ff87cb2 + EON $8, R0, RSP // 1ff87cd2 + TST $15, R2 // 5f0c40f2 + TST R1, R2 // 5f0001ea + TST R1->11, R2 // 5f2c81ea + TST R1>>22, R2 // 5f5841ea + TST R1<<33, R2 // 5f8401ea + TST $0x22220000, R3 // TST $572653568, R3 // 5b44a4d27f001bea +// move an immediate to a Rn. MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2 MOVW $1000000, R4 // 04488852e401a072 MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552 @@ -348,46 +347,37 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $-1, R1 // 01008092 MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + MOVW $1, ZR + MOVW $1, R1 + MOVD $1, ZR + MOVD $1, R1 + MOVK $1, R1 + +// move a large constant to a Vd. + VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 + VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 + VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 + VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 +// mov(to/from sp) MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // fb074091610b0091 MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // fb0740917f231c91 MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // fb08409161070091 MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // fbfc7f9161ff3f91 - MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1 MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1 MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1 MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1 MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1 - -// -// CLS -// -// LTYPE2 imsr ',' reg -// { -// outcode($1, &$2, NREG, &$4); -// } - CLSW R1, R2 - CLS R1, R2 - -// -// MOV -// -// LTYPE3 addr ',' addr -// { -// outcode($1, &$2, NREG, &$4); -// } MOVW R1, R2 MOVW ZR, R1 MOVW R1, ZR - MOVW $1, ZR - MOVW $1, R1 - MOVW ZR, (R1) MOVD R1, R2 MOVD ZR, R1 - MOVD $1, ZR - MOVD $1, R1 - MOVD ZR, (R1) + +// store and load +// +// LD1/ST1 VLD1 (R8), [V1.B16, V2.B16] // 01a1404c VLD1.P (R3), [V31.H8, V0.H8] // 7fa4df4c VLD1.P (R8)(R20), [V21.B16, V22.B16] // VLD1.P (R8)(R20*1), [V21.B16,V22.B16] // 15a1d44c @@ -445,45 +435,21 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VST4 [V22.D2, V23.D2, V24.D2, V25.D2], (R3) // 760c004c VST4.P [V14.D2, V15.D2, V16.D2, V17.D2], 64(R15) // ee0d9f4c VST4.P [V24.B8, V25.B8, V26.B8, V27.B8], (R3)(R23) // VST4.P [V24.B8, V25.B8, V26.B8, V27.B8], (R3)(R23*1) // 7800970c - FMOVS F20, (R0) // 140000bd + +// pre/post-indexed FMOVS.P F20, 4(R0) // 144400bc FMOVS.W F20, 4(R0) // 144c00bc - FMOVS (R0), F20 // 140040bd - FMOVS.P 8(R0), F20 // 148440bc - FMOVS.W 8(R0), F20 // 148c40bc - FMOVD F20, (R2) // 540000fd FMOVD.P F20, 8(R1) // 348400fc - FMOVD.W 8(R1), F20 // 348c40fc FMOVQ.P F13, 11(R10) // 4db5803c FMOVQ.W F15, 11(R20) // 8fbe803c + + FMOVS.P 8(R0), F20 // 148440bc + FMOVS.W 8(R0), F20 // 148c40bc + FMOVD.W 8(R1), F20 // 348c40fc FMOVQ.P 11(R10), F13 // 4db5c03c FMOVQ.W 11(R20), F15 // 8fbec03c - FMOVQ F10, 65520(R10) // 4afdbf3d - FMOVQ F11, 64(RSP) // eb13803d - FMOVQ F11, 8(R20) // 8b82803c - FMOVQ F11, 4(R20) // 8b42803c - FMOVQ 32(R5), F2 // a208c03d - FMOVQ 65520(R10), F10 // 4afdff3d - FMOVQ 64(RSP), F11 // eb13c03d - PRFM (R2), PLDL1KEEP // 400080f9 - PRFM 16(R2), PLDL1KEEP // 400880f9 - PRFM 48(R6), PSTL2STRM // d31880f9 - PRFM 8(R12), PLIL3STRM // 8d0580f9 - PRFM (R8), $25 // 190180f9 - PRFM 8(R9), $30 // 3e0580f9 - // small offset fits into instructions - MOVB 1(R1), R2 // 22048039 - MOVH 1(R1), R2 // 22108078 - MOVH 2(R1), R2 // 22048079 - MOVW 1(R1), R2 // 221080b8 - MOVW 4(R1), R2 // 220480b9 - MOVD 1(R1), R2 // 221040f8 - MOVD 8(R1), R2 // 220440f9 - FMOVS 1(R1), F2 // 221040bc - FMOVS 4(R1), F2 // 220440bd - FMOVD 1(R1), F2 // 221040fc - FMOVD 8(R1), F2 // 220440fd +// small offset fits into instructions MOVB R1, 1(R2) // 41040039 MOVH R1, 1(R2) // 41100078 MOVH R1, 2(R2) // 41040079 @@ -491,18 +457,37 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVW R1, 4(R2) // 410400b9 MOVD R1, 1(R2) // 411000f8 MOVD R1, 8(R2) // 410400f9 + MOVD ZR, (R1) + MOVW ZR, (R1) FMOVS F1, 1(R2) // 411000bc FMOVS F1, 4(R2) // 410400bd + FMOVS F20, (R0) // 140000bd FMOVD F1, 1(R2) // 411000fc FMOVD F1, 8(R2) // 410400fd + FMOVD F20, (R2) // 540000fd + FMOVQ F0, 32(R5)// a008803d + FMOVQ F10, 65520(R10) // 4afdbf3d + FMOVQ F11, 64(RSP) // eb13803d + FMOVQ F11, 8(R20) // 8b82803c + FMOVQ F11, 4(R20) // 8b42803c - // large aligned offset, use two instructions - MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039 - MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079 - MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9 - MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9 - FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd - FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd + MOVB 1(R1), R2 // 22048039 + MOVH 1(R1), R2 // 22108078 + MOVH 2(R1), R2 // 22048079 + MOVW 1(R1), R2 // 221080b8 + MOVW 4(R1), R2 // 220480b9 + MOVD 1(R1), R2 // 221040f8 + MOVD 8(R1), R2 // 220440f9 + FMOVS (R0), F20 // 140040bd + FMOVS 1(R1), F2 // 221040bc + FMOVS 4(R1), F2 // 220440bd + FMOVD 1(R1), F2 // 221040fc + FMOVD 8(R1), F2 // 220440fd + FMOVQ 32(R5), F2 // a208c03d + FMOVQ 65520(R10), F10 // 4afdff3d + FMOVQ 64(RSP), F11 // eb13c03d + +// large aligned offset, use two instructions(add+ldr/store). MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039 MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079 MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9 @@ -510,15 +495,16 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd - // very large or unaligned offset uses constant pool - // the encoding cannot be checked as the address of the constant pool is unknown. - // here we only test that they can be assembled. - MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2 - MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2 - MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2 - MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2 - FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2 - FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2 + MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039 + MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079 + MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9 + MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9 + FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd + FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd + +// very large or unaligned offset uses constant pool. +// the encoding cannot be checked as the address of the constant pool is unknown. +// here we only test that they can be assembled. MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2) MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2) MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2) @@ -526,14 +512,59 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2) FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2) -// -// MOVK -// -// LMOVK imm ',' reg -// { -// outcode($1, &$2, NREG, &$4); -// } - MOVK $1, R1 + MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2 + MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2 + MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2 + MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2 + FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2 + FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2 + +// shifted or extended register offset. + MOVD (R2)(R6.SXTW), R4 // 44c866f8 + MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 + MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 + MOVWU (R19)(R20<<2), R20 // 747a74b8 + MOVD (R2)(R6<<3), R4 // 447866f8 + MOVD (R3)(R7.SXTX<<3), R8 // 68f867f8 + MOVWU (R5)(R4.UXTW), R10 // aa4864b8 + MOVBU (R3)(R9.UXTW), R8 // 68486938 + MOVBU (R5)(R8), R10 // MOVBU (R5)(R8*1), R10 // aa686838 + MOVHU (R2)(R7.SXTW<<1), R11 // 4bd86778 + MOVHU (R1)(R2<<1), R5 // 25786278 + MOVB (R9)(R3.UXTW), R6 // 2649a338 + MOVB (R10)(R6), R15 // MOVB (R10)(R6*1), R15 // 4f69a638 + MOVB (R29)(R30<<0), R14 // ae7bbe38 + MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38 + MOVH (R5)(R7.SXTX<<1), R19 // b3f8a778 + MOVH (R8)(R4<<1), R10 // 0a79a478 + MOVW (R9)(R8.SXTW<<2), R19 // 33d9a8b8 + MOVW (R1)(R4.SXTX), R11 // 2be8a4b8 + MOVW (R1)(R4.SXTX), ZR // 3fe8a4b8 + MOVW (R2)(R5), R12 // MOVW (R2)(R5*1), R12 // 4c68a5b8 + FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc + FMOVS (R2)(R6<<2), F4 // 447866bc + FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc + FMOVD (R2)(R6<<3), F4 // 447866fc + + MOVD R5, (R2)(R6<<3) // 457826f8 + MOVD R9, (R6)(R7.SXTX<<3) // c9f827f8 + MOVD ZR, (R6)(R7.SXTX<<3) // dff827f8 + MOVW R8, (R2)(R3.UXTW<<2) // 485823b8 + MOVW R7, (R3)(R4.SXTW) // 67c824b8 + MOVB R4, (R2)(R6.SXTX) // 44e82638 + MOVB R8, (R3)(R9.UXTW) // 68482938 + MOVB R10, (R5)(R8) // MOVB R10, (R5)(R8*1) // aa682838 + MOVH R11, (R2)(R7.SXTW<<1) // 4bd82778 + MOVH R5, (R1)(R2<<1) // 25782278 + MOVH R7, (R2)(R5.SXTX<<1) // 47f82578 + MOVH R8, (R3)(R6.UXTW) // 68482678 + MOVB R4, (R2)(R6.SXTX) // 44e82638 + FMOVS F4, (R2)(R6) // FMOVS F4, (R2)(R6*1) // 446826bc + FMOVS F4, (R2)(R6<<2) // 447826bc + FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc + FMOVD F4, (R2)(R6<<3) // 447826fc + +// vmov VMOV V8.S[1], R1 // 013d0c0e VMOV V0.D[0], R11 // 0b3c084e VMOV V0.D[1], R11 // 0b3c184e @@ -548,205 +579,28 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VMOV V9.H[0], V12.H[1] // 2c05066e VMOV V8.B[0], V12.B[1] // 0c05036e VMOV V8.B[7], V4.B[8] // 043d116e - VREV32 V5.B16, V5.B16 // a508206e - VREV64 V2.S2, V3.S2 // 4308a00e - VREV64 V2.S4, V3.S4 // 4308a04e - VDUP V19.S[0], V17.S4 // 7106044e -// -// B/BL -// -// LTYPE4 comma rel -// { -// outcode($1, &nullgen, NREG, &$3); -// } - BL 1(PC) // CALL 1(PC) - -// LTYPE4 comma nireg -// { -// outcode($1, &nullgen, NREG, &$3); -// } - BL (R2) // CALL (R2) - BL foo(SB) // CALL foo(SB) - BL bar<>(SB) // CALL bar<>(SB) -// -// BEQ -// -// LTYPE5 comma rel -// { -// outcode($1, &nullgen, NREG, &$3); -// } - BEQ 1(PC) -// -// SVC -// -// LTYPE6 -// { -// outcode($1, &nullgen, NREG, &nullgen); -// } - SVC -// -// CMP -// -// LTYPE7 imsr ',' spreg comma -// { -// outcode($1, &$2, $4, &nullgen); -// } - CMP $3, R2 - CMP R1, R2 - CMP R1->11, R2 - CMP R1>>22, R2 - CMP R1<<33, R2 - CMP R22.SXTX, RSP // ffe336eb - - CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb - CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b - -// TST - TST $15, R2 // 5f0c40f2 - TST R1, R2 // 5f0001ea - TST R1->11, R2 // 5f2c81ea - TST R1>>22, R2 // 5f5841ea - TST R1<<33, R2 // 5f8401ea - TST $0x22220000, R3 // TST $572653568, R3 // 5b44a4d27f001bea - -// // CBZ -// -// LTYPE8 reg ',' rel -// { -// outcode($1, &$2, NREG, &$4); -// } again: CBZ R1, again // CBZ R1 -// -// CSET -// -// LTYPER cond ',' reg -// { -// outcode($1, &$2, NREG, &$4); -// } - CSET GT, R1 // e1d79f9a - CSETW HI, R2 // e2979f1a -// -// CSEL/CSINC/CSNEG/CSINV -// -// LTYPES cond ',' reg ',' reg ',' reg -// { -// outgcode($1, &$2, $6.reg, &$4, &$8); -// } +// conditional operations + CSET GT, R1 // e1d79f9a + CSETW HI, R2 // e2979f1a CSEL LT, R1, R2, ZR // 3fb0829a CSELW LT, R2, R3, R4 // 44b0831a CSINC GT, R1, ZR, R3 // 23c49f9a CSNEG MI, R1, R2, R3 // 234482da CSINV CS, R1, R2, R3 // CSINV HS, R1, R2, R3 // 232082da CSINVW MI, R2, ZR, R2 // 42409f5a - -// LTYPES cond ',' reg ',' reg -// { -// outcode($1, &$2, $4.reg, &$6); -// } CINC EQ, R4, R9 // 8914849a CINCW PL, R2, ZR // 5f44821a CINV PL, R11, R22 // 76418bda CINVW LS, R7, R13 // ed80875a CNEG LS, R13, R7 // a7858dda CNEGW EQ, R8, R13 // 0d15885a -// -// CCMN -// -// LTYPEU cond ',' imsr ',' reg ',' imm comma -// { -// outgcode($1, &$2, $6.reg, &$4, &$8); -// } - CCMN MI, ZR, R1, $4 // e44341ba - -// -// FADDD -// -// LTYPEK frcon ',' freg -// { -// outcode($1, &$2, NREG, &$4); -// } -// FADDD $0.5, F1 // FADDD $(0.5), F1 - FADDD F1, F2 - -// LTYPEK frcon ',' freg ',' freg -// { -// outcode($1, &$2, $4.reg, &$6); -// } -// FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2 - FADDD F1, F2, F3 - -// -// FCMP -// -// LTYPEL frcon ',' freg comma -// { -// outcode($1, &$2, $4.reg, &nullgen); -// } -// FCMP $0.2, F1 -// FCMP F1, F2 - -// -// FCCMP -// -// LTYPEF cond ',' freg ',' freg ',' imm comma -// { -// outgcode($1, &$2, $6.reg, &$4, &$8); -// } - FCCMPS LT, F1, F2, $1 // 41b4211e - -// -// FMULA -// -// LTYPE9 freg ',' freg ',' freg ',' freg comma -// { -// outgcode($1, &$2, $4.reg, &$6, &$8); -// } -// FMULA F1, F2, F3, F4 -// -// FCSEL -// -// LFCSEL cond ',' freg ',' freg ',' freg -// { -// outgcode($1, &$2, $6.reg, &$4, &$8); -// } -// -// MADD Rn,Rm,Ra,Rd -// -// LTYPEM reg ',' reg ',' sreg ',' reg -// { -// outgcode($1, &$2, $6, &$4, &$8); -// } -// MADD R1, R2, R3, R4 - - FMADDS F1, F3, F2, F4 // 440c011f - FMADDD F4, F5, F4, F4 // 8414441f - FMSUBS F13, F21, F13, F19 // b3d50d1f - FMSUBD F11, F7, F15, F31 // ff9d4b1f - FNMADDS F1, F3, F2, F4 // 440c211f - FNMADDD F1, F3, F2, F4 // 440c611f - FNMSUBS F1, F3, F2, F4 // 448c211f - FNMSUBD F1, F3, F2, F4 // 448c611f - -// DMB, HINT -// -// LDMB imm -// { -// outcode($1, &$2, NREG, &nullgen); -// } - DMB $1 - -// -// STXR -// -// LSTXR reg ',' addr ',' reg -// { -// outcode($1, &$2, &$4, &$6); -// } +// atomic ops LDARB (R25), R2 // 22ffdf08 LDARH (R5), R7 // a7fcdf48 LDAXPW (R10), (R20, R16) // 54c17f88 @@ -923,21 +777,18 @@ again: LDORLH R5, (RSP), R7 // e7336578 LDORLB R5, (R6), R7 // c7306538 LDORLB R5, (RSP), R7 // e7336538 + // RET -// -// LTYPEA comma -// { -// outcode($1, &nullgen, NREG, &nullgen); -// } - BEQ 2(PC) RET RET foo(SB) -// More B/BL cases, and canonical names JMP, CALL. - - BEQ 2(PC) - B foo(SB) // JMP foo(SB) - BL foo(SB) // CALL foo(SB) +// B/BL/B.cond cases, and canonical names JMP, CALL. + BL 1(PC) // CALL 1(PC) + BL (R2) // CALL (R2) + BL foo(SB) // CALL foo(SB) + BL bar<>(SB) // CALL bar<>(SB) + B foo(SB) // JMP foo(SB) + BEQ 1(PC) BEQ 2(PC) TBZ $1, R1, 2(PC) TBNZ $2, R2, 2(PC) @@ -1112,8 +963,6 @@ again: FSTPS (F3, F4), 1024(RSP) // fb0310916313002d FSTPS (F3, F4), x(SB) FSTPS (F3, F4), x+8(SB) - NOOP // 1f2003d5 - HINT $0 // 1f2003d5 // System Register MSR $1, SPSel // bf4100d5 @@ -1675,11 +1524,4 @@ again: MSR R13, ZCR_EL1 // 0d1218d5 MRS ZCR_EL1, R23 // 171238d5 MSR R17, ZCR_EL1 // 111218d5 - -// END -// -// LTYPEE comma -// { -// outcode($1, &nullgen, NREG, &nullgen); -// } END -- cgit v1.3 From 15131caeaa83e57fd8bdf87dde2801443f5602db Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 8 May 2020 10:51:29 +0800 Subject: cmd/internal/obj/arm64: add CASx/CASPx instructions This patch adds support for CASx and CASPx atomic instructions. go syntax gnu syntax CASD Rs, (Rn|RSP), Rt => cas Xs, Xt, (Xn|SP) CASALW Rs, (Rn|RSP), Rt => casal Ws, Wt, (Xn|SP) CASPD (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Xs, Xs+1, Xt, Xt+1, (Xn|SP) CASPW (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Ws, Ws+1, Wt, Wt+1, (Xn|SP) This patch changes the type of prog.RestArgs from "[]Addr" to "[]struct{Addr, Pos}", Pos is a enum, indicating the position of the operand. This patch also adds test cases. Change-Id: Ib971cfda7890b7aa895d17bab22dea326c7fcaa4 Reviewed-on: https://go-review.googlesource.com/c/go/+/233277 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 13 ++- src/cmd/asm/internal/asm/asm.go | 18 +++- src/cmd/asm/internal/asm/testdata/arm64.s | 18 ++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 4 + src/cmd/compile/internal/s390x/ssa.go | 12 +-- src/cmd/internal/obj/arm64/a.out.go | 14 +++ src/cmd/internal/obj/arm64/anames.go | 14 +++ src/cmd/internal/obj/arm64/asm7.go | 132 +++++++++++++++++++------ src/cmd/internal/obj/link.go | 84 +++++++++++----- src/cmd/internal/obj/riscv/obj.go | 8 +- src/cmd/internal/obj/s390x/asmz.go | 2 +- src/cmd/internal/obj/util.go | 15 ++- src/cmd/internal/obj/x86/asm6.go | 6 +- 13 files changed, 266 insertions(+), 74 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index e643889aef..e557630ca6 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -75,7 +75,7 @@ func IsARM64STLXR(op obj.As) bool { arm64.ASTXP, arm64.ASTXPW, arm64.ASTLXP, arm64.ASTLXPW: return true } - // atomic instructions + // LDADDx/SWPx/CASx atomic instructions if arm64.IsAtomicInstruction(op) { return true } @@ -93,6 +93,17 @@ func IsARM64TBL(op obj.As) bool { return false } +// IsARM64CASP reports whether the op (as defined by an arm64.A* +// constant) is one of the CASP-like instructions, and its 2nd +// destination is a register pair that require special handling. +func IsARM64CASP(op obj.As) bool { + switch op { + case arm64.ACASPD, arm64.ACASPW: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index b9efa454ed..c4032759bb 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -637,6 +637,18 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.From = a[0] prog.SetFrom3(a[1]) prog.To = a[2] + case arch.IsARM64CASP(op): + prog.From = a[0] + prog.To = a[1] + // both 1st operand and 3rd operand are (Rs, Rs+1) register pair. + // And the register pair must be contiguous. + if (a[0].Type != obj.TYPE_REGREG) || (a[2].Type != obj.TYPE_REGREG) { + p.errorf("invalid addressing modes for 1st or 3rd operand to %s instruction, must be register pair", op) + return + } + // For ARM64 CASP-like instructions, its 2nd destination operand is register pair(Rt, Rt+1) that can + // not fit into prog.RegTo2, so save it to the prog.RestArgs. + prog.SetTo2(a[2]) default: prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) @@ -725,7 +737,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2]} + prog.SetRestArgs([]obj.Addr{a[1], a[2]}) prog.To = a[3] break } @@ -808,13 +820,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } if p.arch.Family == sys.S390X { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7943990e16..5547cf634c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -777,6 +777,24 @@ again: LDORLH R5, (RSP), R7 // e7336578 LDORLB R5, (R6), R7 // c7306538 LDORLB R5, (RSP), R7 // e7336538 + CASD R1, (R2), ZR // 5f7ca1c8 + CASW R1, (RSP), ZR // ff7fa188 + CASB ZR, (R5), R3 // a37cbf08 + CASH R3, (RSP), ZR // ff7fa348 + CASW R5, (R7), R6 // e67ca588 + CASLD ZR, (RSP), R8 // e8ffbfc8 + CASLW R9, (R10), ZR // 5ffda988 + CASAD R7, (R11), R15 // 6f7de7c8 + CASAW R10, (RSP), R19 // f37fea88 + CASALD R5, (R6), R7 // c7fce5c8 + CASALD R5, (RSP), R7 // e7ffe5c8 + CASALW R5, (R6), R7 // c7fce588 + CASALW R5, (RSP), R7 // e7ffe588 + CASALH ZR, (R5), R8 // a8fcff48 + CASALB R8, (R9), ZR // 3ffde808 + CASPD (R30, ZR), (RSP), (R8, R9) // e87f3e48 + CASPW (R6, R7), (R8), (R4, R5) // 047d2608 + CASPD (R2, R3), (R2), (R8, R9) // 487c2248 // RET RET diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index c3a617066a..99e4d62d25 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -357,4 +357,8 @@ TEXT errors(SB),$0 VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" + CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register" + CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register" + CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous" + CASPD (R2, R3), (R2), (R8, R10) // ERROR "destination register pair must be contiguous" RET diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index e23b31f385..84b9f491e4 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -182,11 +182,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { i := v.Aux.(s390x.RotateParams) p := s.Prog(v.Op.Asm()) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(i.Start)} - p.RestArgs = []obj.Addr{ + p.SetRestArgs([]obj.Addr{ {Type: obj.TYPE_CONST, Offset: int64(i.End)}, {Type: obj.TYPE_CONST, Offset: int64(i.Amount)}, {Type: obj.TYPE_REG, Reg: r2}, - } + }) p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, @@ -913,7 +913,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(s390x.NotEqual & s390x.NotUnordered) // unordered is not possible p.Reg = s390x.REG_R3 - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: 0}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) if b.Succs[0].Block() != next { s.Br(s390x.ABR, b.Succs[0].Block()) } @@ -956,17 +956,17 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}) case ssa.BlockS390XCGIJ, ssa.BlockS390XCIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}) case ssa.BlockS390XCLGIJ, ssa.BlockS390XCLIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}) default: b.Fatalf("branch not implemented: %s", b.LongString()) } diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 98504353e2..5844b71ca7 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -843,6 +843,20 @@ const ( ASWPLW ASWPLH ASWPLB + ACASD + ACASW + ACASH + ACASB + ACASAD + ACASAW + ACASLD + ACASLW + ACASALD + ACASALW + ACASALH + ACASALB + ACASPD + ACASPW ABEQ ABNE ABCS diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 126eefd032..fb216f9a94 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -337,6 +337,20 @@ var Anames = []string{ "SWPLW", "SWPLH", "SWPLB", + "CASD", + "CASW", + "CASH", + "CASB", + "CASAD", + "CASAW", + "CASLD", + "CASLW", + "CASALD", + "CASALW", + "CASALH", + "CASALB", + "CASPD", + "CASPW", "BEQ", "BNE", "BCS", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 8cbf5e719f..4fc62d5c7f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -80,12 +80,17 @@ type Optab struct { } func IsAtomicInstruction(as obj.As) bool { - _, ok := atomicInstructions[as] - return ok + if _, ok := atomicLDADD[as]; ok { + return true + } + if _, ok := atomicSWP[as]; ok { + return true + } + return false } // known field values of an instruction. -var atomicInstructions = map[obj.As]uint32{ +var atomicLDADD = map[obj.As]uint32{ ALDADDAD: 3<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAW: 2<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAH: 1<<30 | 0x1c5<<21 | 0x00<<10, @@ -150,22 +155,41 @@ var atomicInstructions = map[obj.As]uint32{ ALDORLW: 2<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLH: 1<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLB: 0<<30 | 0x1c3<<21 | 0x0c<<10, - ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, - ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, - ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, - ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, - ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, - ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, - ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, +} + +var atomicSWP = map[obj.As]uint32{ + ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, + ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, + ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, + ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, + ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, + ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, + ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, + ACASD: 3<<30 | 0x45<<21 | 0x1f<<10, + ACASW: 2<<30 | 0x45<<21 | 0x1f<<10, + ACASH: 1<<30 | 0x45<<21 | 0x1f<<10, + ACASB: 0<<30 | 0x45<<21 | 0x1f<<10, + ACASAD: 3<<30 | 0x47<<21 | 0x1f<<10, + ACASAW: 2<<30 | 0x47<<21 | 0x1f<<10, + ACASLD: 3<<30 | 0x45<<21 | 0x3f<<10, + ACASLW: 2<<30 | 0x45<<21 | 0x3f<<10, + ACASALD: 3<<30 | 0x47<<21 | 0x3f<<10, + ACASALW: 2<<30 | 0x47<<21 | 0x3f<<10, + ACASALH: 1<<30 | 0x47<<21 | 0x3f<<10, + ACASALB: 0<<30 | 0x47<<21 | 0x3f<<10, +} +var atomicCASP = map[obj.As]uint32{ + ACASPD: 1<<30 | 0x41<<21 | 0x1f<<10, + ACASPW: 0<<30 | 0x41<<21 | 0x1f<<10, } var oprange [ALAST & obj.AMask][]Optab @@ -794,8 +818,10 @@ var optab = []Optab{ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST}, {ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, - {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG - {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZOREG, 106, 4, 0, 0, 0}, // RegTo2=C_REGREG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZAUTO, 106, 4, REGSP, 0, 0}, // RegTo2=C_REGREG {ALDAR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDAXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, @@ -2011,7 +2037,7 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab { a1-- a3 := C_NONE + 1 - if p.GetFrom3() != nil { + if p.GetFrom3() != nil && p.RestArgs[0].Pos == 0 { a3 = int(p.GetFrom3().Class) if a3 == 0 { a3 = c.aclass(p.GetFrom3()) + 1 @@ -2496,10 +2522,18 @@ func buildop(ctxt *obj.Link) { oprangeset(AMOVZW, t) case ASWPD: - for i := range atomicInstructions { + for i := range atomicLDADD { + oprangeset(i, t) + } + for i := range atomicSWP { + if i == ASWPD { + continue + } oprangeset(i, t) } + case ACASPD: + oprangeset(ACASPW, t) case ABEQ: oprangeset(ABNE, t) oprangeset(ABCS, t) @@ -3994,17 +4028,27 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: /* SWPx/LDADDx/LDANDx/LDEORx/LDORx Rs, (Rb), Rt */ + case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg - fields := atomicInstructions[p.As] - // rt can't be sp. rt can't be r31 when field A is 0, A bit is the 23rd bit. - if rt == REG_RSP || (rt == REGZERO && (fields&(1<<23) == 0)) { + // rt can't be sp. + if rt == REG_RSP { c.ctxt.Diag("illegal destination register: %v\n", p) } - o1 |= fields | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + if enc, ok := atomicLDADD[p.As]; ok { + // for LDADDx-like instructions, rt can't be r31 when field.enc A is 0, A bit is the 23rd bit. + if (rt == REGZERO) && (enc&(1<<23) == 0) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc + } else if enc, ok := atomicSWP[p.As]; ok { + o1 |= enc + } else { + c.ctxt.Diag("invalid atomic instructions: %v\n", p) + } + o1 |= uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) case 48: /* ADD $C_ADDCON2, Rm, Rd */ // NOTE: this case does not use REGTMP. If it ever does, @@ -5351,6 +5395,38 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 106: // CASPx (Rs, Rs+1), (Rb), (Rt, Rt+1) + rs := p.From.Reg + rt := p.GetTo2().Reg + rb := p.To.Reg + rs1 := int16(p.From.Offset) + rt1 := int16(p.GetTo2().Offset) + + enc, ok := atomicCASP[p.As] + if !ok { + c.ctxt.Diag("invalid CASP-like atomic instructions: %v\n", p) + } + // for CASPx-like instructions, Rs<0> != 1 && Rt<0> != 1 + switch { + case rs&1 != 0: + c.ctxt.Diag("source register pair must start from even register: %v\n", p) + break + case rt&1 != 0: + c.ctxt.Diag("destination register pair must start from even register: %v\n", p) + break + case rs != rs1-1: + c.ctxt.Diag("source register pair must be contiguous: %v\n", p) + break + case rt != rt1-1: + c.ctxt.Diag("destination register pair must be contiguous: %v\n", p) + break + } + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) } out[0] = o1 out[1] = o2 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 2037beca72..b578b6a09a 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -283,28 +283,41 @@ func (a *Addr) SetTarget(t *Prog) { // The other fields not yet mentioned are for use by the back ends and should // be left zeroed by creators of Prog lists. type Prog struct { - Ctxt *Link // linker context - Link *Prog // next Prog in linked list - From Addr // first source operand - RestArgs []Addr // can pack any operands that not fit into {Prog.From, Prog.To} - To Addr // destination operand (second is RegTo2 below) - Pool *Prog // constant pool entry, for arm,arm64 back ends - Forwd *Prog // for x86 back end - Rel *Prog // for x86, arm back ends - Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase - Pos src.XPos // source position of this instruction - Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) - As As // assembler opcode - Reg int16 // 2nd source operand - RegTo2 int16 // 2nd destination operand - Mark uint16 // bitmask of arch-specific items - Optab uint16 // arch-specific opcode index - Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) - Back uint8 // for x86 back end: backwards branch state - Ft uint8 // for x86 back end: type index of Prog.From - Tt uint8 // for x86 back end: type index of Prog.To - Isize uint8 // for x86 back end: size of the instruction in bytes -} + Ctxt *Link // linker context + Link *Prog // next Prog in linked list + From Addr // first source operand + RestArgs []AddrPos // can pack any operands that not fit into {Prog.From, Prog.To} + To Addr // destination operand (second is RegTo2 below) + Pool *Prog // constant pool entry, for arm,arm64 back ends + Forwd *Prog // for x86 back end + Rel *Prog // for x86, arm back ends + Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase + Pos src.XPos // source position of this instruction + Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) + As As // assembler opcode + Reg int16 // 2nd source operand + RegTo2 int16 // 2nd destination operand + Mark uint16 // bitmask of arch-specific items + Optab uint16 // arch-specific opcode index + Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) + Back uint8 // for x86 back end: backwards branch state + Ft uint8 // for x86 back end: type index of Prog.From + Tt uint8 // for x86 back end: type index of Prog.To + Isize uint8 // for x86 back end: size of the instruction in bytes +} + +// Pos indicates whether the oprand is the source or the destination. +type AddrPos struct { + Addr + Pos OperandPos +} + +type OperandPos int8 + +const ( + Source OperandPos = iota + Destination +) // From3Type returns p.GetFrom3().Type, or TYPE_NONE when // p.GetFrom3() returns nil. @@ -330,15 +343,36 @@ func (p *Prog) GetFrom3() *Addr { if p.RestArgs == nil { return nil } - return &p.RestArgs[0] + return &p.RestArgs[0].Addr } -// SetFrom3 assigns []Addr{a} to p.RestArgs. +// SetFrom3 assigns []Args{{a, 0}} to p.RestArgs. // In pair with Prog.GetFrom3 it can help in emulation of Prog.From3. // // Deprecated: for the same reasons as Prog.GetFrom3. func (p *Prog) SetFrom3(a Addr) { - p.RestArgs = []Addr{a} + p.RestArgs = []AddrPos{{a, Source}} +} + +// SetTo2 assings []Args{{a, 1}} to p.RestArgs when the second destination +// operand does not fit into prog.RegTo2. +func (p *Prog) SetTo2(a Addr) { + p.RestArgs = []AddrPos{{a, Destination}} +} + +// GetTo2 returns the second destination operand. +func (p *Prog) GetTo2() *Addr { + if p.RestArgs == nil { + return nil + } + return &p.RestArgs[0].Addr +} + +// SetRestArgs assigns more than one source operands to p.RestArgs. +func (p *Prog) SetRestArgs(args []Addr) { + for i := range args { + p.RestArgs = append(p.RestArgs, AddrPos{args[i], Source}) + } } // An As denotes an assembler opcode. diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 5301e44002..0cffa54fa6 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -48,7 +48,7 @@ func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *ob p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -234,7 +234,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: to.Reg} @@ -275,7 +275,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_STYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -340,7 +340,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = to diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index da14dd3c41..f0f9d5cefc 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -718,7 +718,7 @@ func (c *ctxtz) oplook(p *obj.Prog) *Optab { p.From.Class = int8(c.aclass(&p.From) + 1) p.To.Class = int8(c.aclass(&p.To) + 1) for i := range p.RestArgs { - p.RestArgs[i].Class = int8(c.aclass(&p.RestArgs[i]) + 1) + p.RestArgs[i].Addr.Class = int8(c.aclass(&p.RestArgs[i].Addr) + 1) } // Mirrors the argument list in Optab. diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index 21e28807a6..b9bacb7a22 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -175,9 +175,11 @@ func (p *Prog) WriteInstructionString(w io.Writer) { sep = ", " } for i := range p.RestArgs { - io.WriteString(w, sep) - WriteDconv(w, p, &p.RestArgs[i]) - sep = ", " + if p.RestArgs[i].Pos == Source { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } } if p.As == ATEXT { @@ -198,6 +200,13 @@ func (p *Prog) WriteInstructionString(w io.Writer) { if p.RegTo2 != REG_NONE { fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.RegTo2))) } + for i := range p.RestArgs { + if p.RestArgs[i].Pos == Destination { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } + } } func (ctxt *Link) NewProg() *Prog { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index c412f4945d..94aed44871 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -4270,7 +4270,7 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { args = append(args, ft) } for i := range p.RestArgs { - args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) + args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) } if tt != Ynone*Ymax { args = append(args, tt) @@ -5438,10 +5438,10 @@ func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { // unpackOps4 extracts 4 operands from p. func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To } // unpackOps5 extracts 5 operands from p. func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To } -- cgit v1.3 From 8b517983048205932305905bc01a29bd146cb8d6 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 27 Oct 2020 23:03:11 +1100 Subject: cmd/asm: remove X27 and S11 register names on riscv64 The X27 register (known as S11 via its ABI name) is the g register on riscv64. Prevent assembly from referring to it by either of these names. Change-Id: Iba389eb8e44e097c0142c5b3d92e72bcae8a244a Reviewed-on: https://go-review.googlesource.com/c/go/+/265519 Trust: Joel Sing Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot --- src/cmd/asm/internal/arch/arch.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go index 2e5d0ff991..a62e55191e 100644 --- a/src/cmd/asm/internal/arch/arch.go +++ b/src/cmd/asm/internal/arch/arch.go @@ -535,6 +535,9 @@ func archRISCV64() *Arch { // Standard register names. for i := riscv.REG_X0; i <= riscv.REG_X31; i++ { + if i == riscv.REG_G { + continue + } name := fmt.Sprintf("X%d", i-riscv.REG_X0) register[name] = int16(i) } @@ -571,7 +574,7 @@ func archRISCV64() *Arch { register["S8"] = riscv.REG_S8 register["S9"] = riscv.REG_S9 register["S10"] = riscv.REG_S10 - register["S11"] = riscv.REG_S11 + // Skip S11 as it is the g register. register["T3"] = riscv.REG_T3 register["T4"] = riscv.REG_T4 register["T5"] = riscv.REG_T5 -- cgit v1.3 From fdba080220f0642b2b3926cbc062c775f6224e5d Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Fri, 30 Oct 2020 18:36:41 -0400 Subject: cmd: remove Go115AMD64 Always do aligned jumps now. Change-Id: If68a16fe93c9173c83323a9063465c9bd166eeb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/266857 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: David Chase --- src/cmd/asm/internal/asm/endtoend_test.go | 7 +------ src/cmd/internal/obj/x86/asm6.go | 11 +++-------- src/cmd/internal/objabi/util.go | 9 +-------- src/cmd/link/internal/amd64/l.go | 2 +- src/cmd/link/internal/amd64/obj.go | 7 +------ 5 files changed, 7 insertions(+), 29 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 989b7a5405..7472507caf 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -390,12 +390,7 @@ func TestARM64Errors(t *testing.T) { } func TestAMD64EndToEnd(t *testing.T) { - defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64) - for _, goamd64 := range []string{"normaljumps", "alignedjumps"} { - t.Logf("GOAMD64=%s", goamd64) - objabi.GOAMD64 = goamd64 - testEndToEnd(t, "amd64", "amd64") - } + testEndToEnd(t, "amd64", "amd64") } func Test386Encoder(t *testing.T) { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 94aed44871..a6b85ac4a0 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -1851,9 +1851,9 @@ func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { return q } -// If the environment variable GOAMD64=alignedjumps the assembler will ensure that -// no standalone or macro-fused jump will straddle or end on a 32 byte boundary -// by inserting NOPs before the jumps +// isJump returns whether p is a jump instruction. +// It is used to ensure that no standalone or macro-fused jump will straddle +// or end on a 32 byte boundary by inserting NOPs before the jumps. func isJump(p *obj.Prog) bool { return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO @@ -1987,11 +1987,6 @@ func makePjcCtx(ctxt *obj.Link) padJumpsCtx { return padJumpsCtx(0) } - if objabi.GOAMD64 != "alignedjumps" { - return padJumpsCtx(0) - - } - return padJumpsCtx(32) } diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go index b81b73a022..9479ab2cd9 100644 --- a/src/cmd/internal/objabi/util.go +++ b/src/cmd/internal/objabi/util.go @@ -25,7 +25,6 @@ var ( GOARCH = envOr("GOARCH", defaultGOARCH) GOOS = envOr("GOOS", defaultGOOS) GO386 = envOr("GO386", defaultGO386) - GOAMD64 = goamd64() GOARM = goarm() GOMIPS = gomips() GOMIPS64 = gomips64() @@ -37,15 +36,9 @@ var ( const ( ElfRelocOffset = 256 - MachoRelocOffset = 2048 // reserve enough space for ELF relocations - Go115AMD64 = "alignedjumps" // Should be "alignedjumps" or "normaljumps"; this replaces environment variable introduced in CL 219357. + MachoRelocOffset = 2048 // reserve enough space for ELF relocations ) -// TODO(1.16): assuming no issues in 1.15 release, remove this and related constant. -func goamd64() string { - return Go115AMD64 -} - func goarm() int { switch v := envOr("GOARM", defaultGOARM); v { case "5": diff --git a/src/cmd/link/internal/amd64/l.go b/src/cmd/link/internal/amd64/l.go index a9afb3a39f..c9ea90a9c8 100644 --- a/src/cmd/link/internal/amd64/l.go +++ b/src/cmd/link/internal/amd64/l.go @@ -33,7 +33,7 @@ package amd64 const ( maxAlign = 32 // max data alignment minAlign = 1 // min data alignment - funcAlign = 16 + funcAlign = 32 ) /* Used by ../internal/ld/dwarf.go */ diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 777f99dbe2..d09c90ea28 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -39,13 +39,8 @@ import ( func Init() (*sys.Arch, ld.Arch) { arch := sys.ArchAMD64 - fa := funcAlign - if objabi.GOAMD64 == "alignedjumps" { - fa = 32 - } - theArch := ld.Arch{ - Funcalign: fa, + Funcalign: funcAlign, Maxalign: maxAlign, Minalign: minAlign, Dwarfregsp: dwarfRegSP, -- cgit v1.3 From 5f0fca1475e042a6e85f81ff6db676ec18805041 Mon Sep 17 00:00:00 2001 From: Jonathan Swinney Date: Mon, 2 Nov 2020 16:36:10 +0000 Subject: cmd/asm: rename arm64 instructions LDANDx to LDCLRx The LDANDx instructions were misleading because they correspond to the mnemonic LDCLRx as defined in the Arm Architecture Reference Manual for Armv8. This changes the assembler to use the same mnemonic as the GNU assembler and the manual. The instruction has the form: LDCLRx Rs, (Rb), Rt: *Rb -> Rt, Rs AND NOT(*Rb) -> *Rb Change-Id: I94ae003e99e817209bba1afe960e612bf3a0b410 Reviewed-on: https://go-review.googlesource.com/c/go/+/267138 Reviewed-by: Cherry Zhang Reviewed-by: fannie zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: fannie zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 64 +++++++++++++------------- src/cmd/asm/internal/asm/testdata/arm64error.s | 48 +++++++++---------- src/cmd/internal/obj/arm64/a.out.go | 32 ++++++------- src/cmd/internal/obj/arm64/anames.go | 32 ++++++------- src/cmd/internal/obj/arm64/asm7.go | 34 +++++++------- 5 files changed, 105 insertions(+), 105 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 5547cf634c..91e3a0ca0a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -681,38 +681,38 @@ again: LDADDLH R5, (RSP), R7 // e7036578 LDADDLB R5, (R6), R7 // c7006538 LDADDLB R5, (RSP), R7 // e7036538 - LDANDAD R5, (R6), R7 // c710a5f8 - LDANDAD R5, (RSP), R7 // e713a5f8 - LDANDAW R5, (R6), R7 // c710a5b8 - LDANDAW R5, (RSP), R7 // e713a5b8 - LDANDAH R5, (R6), R7 // c710a578 - LDANDAH R5, (RSP), R7 // e713a578 - LDANDAB R5, (R6), R7 // c710a538 - LDANDAB R5, (RSP), R7 // e713a538 - LDANDALD R5, (R6), R7 // c710e5f8 - LDANDALD R5, (RSP), R7 // e713e5f8 - LDANDALW R5, (R6), R7 // c710e5b8 - LDANDALW R5, (RSP), R7 // e713e5b8 - LDANDALH R5, (R6), R7 // c710e578 - LDANDALH R5, (RSP), R7 // e713e578 - LDANDALB R5, (R6), R7 // c710e538 - LDANDALB R5, (RSP), R7 // e713e538 - LDANDD R5, (R6), R7 // c71025f8 - LDANDD R5, (RSP), R7 // e71325f8 - LDANDW R5, (R6), R7 // c71025b8 - LDANDW R5, (RSP), R7 // e71325b8 - LDANDH R5, (R6), R7 // c7102578 - LDANDH R5, (RSP), R7 // e7132578 - LDANDB R5, (R6), R7 // c7102538 - LDANDB R5, (RSP), R7 // e7132538 - LDANDLD R5, (R6), R7 // c71065f8 - LDANDLD R5, (RSP), R7 // e71365f8 - LDANDLW R5, (R6), R7 // c71065b8 - LDANDLW R5, (RSP), R7 // e71365b8 - LDANDLH R5, (R6), R7 // c7106578 - LDANDLH R5, (RSP), R7 // e7136578 - LDANDLB R5, (R6), R7 // c7106538 - LDANDLB R5, (RSP), R7 // e7136538 + LDCLRAD R5, (R6), R7 // c710a5f8 + LDCLRAD R5, (RSP), R7 // e713a5f8 + LDCLRAW R5, (R6), R7 // c710a5b8 + LDCLRAW R5, (RSP), R7 // e713a5b8 + LDCLRAH R5, (R6), R7 // c710a578 + LDCLRAH R5, (RSP), R7 // e713a578 + LDCLRAB R5, (R6), R7 // c710a538 + LDCLRAB R5, (RSP), R7 // e713a538 + LDCLRALD R5, (R6), R7 // c710e5f8 + LDCLRALD R5, (RSP), R7 // e713e5f8 + LDCLRALW R5, (R6), R7 // c710e5b8 + LDCLRALW R5, (RSP), R7 // e713e5b8 + LDCLRALH R5, (R6), R7 // c710e578 + LDCLRALH R5, (RSP), R7 // e713e578 + LDCLRALB R5, (R6), R7 // c710e538 + LDCLRALB R5, (RSP), R7 // e713e538 + LDCLRD R5, (R6), R7 // c71025f8 + LDCLRD R5, (RSP), R7 // e71325f8 + LDCLRW R5, (R6), R7 // c71025b8 + LDCLRW R5, (RSP), R7 // e71325b8 + LDCLRH R5, (R6), R7 // c7102578 + LDCLRH R5, (RSP), R7 // e7132578 + LDCLRB R5, (R6), R7 // c7102538 + LDCLRB R5, (RSP), R7 // e7132538 + LDCLRLD R5, (R6), R7 // c71065f8 + LDCLRLD R5, (RSP), R7 // e71365f8 + LDCLRLW R5, (R6), R7 // c71065b8 + LDCLRLW R5, (RSP), R7 // e71365b8 + LDCLRLH R5, (R6), R7 // c7106578 + LDCLRLH R5, (RSP), R7 // e7136578 + LDCLRLB R5, (R6), R7 // c7106538 + LDCLRLB R5, (RSP), R7 // e7136538 LDEORAD R5, (R6), R7 // c720a5f8 LDEORAD R5, (RSP), R7 // e723a5f8 LDEORAW R5, (R6), R7 // c720a5b8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 99e4d62d25..e579f20836 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -123,14 +123,14 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), ZR // ERROR "illegal destination register" LDADDLH R5, (R6), ZR // ERROR "illegal destination register" LDADDLB R5, (R6), ZR // ERROR "illegal destination register" - LDANDD R5, (R6), ZR // ERROR "illegal destination register" - LDANDW R5, (R6), ZR // ERROR "illegal destination register" - LDANDH R5, (R6), ZR // ERROR "illegal destination register" - LDANDB R5, (R6), ZR // ERROR "illegal destination register" - LDANDLD R5, (R6), ZR // ERROR "illegal destination register" - LDANDLW R5, (R6), ZR // ERROR "illegal destination register" - LDANDLH R5, (R6), ZR // ERROR "illegal destination register" - LDANDLB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLB R5, (R6), ZR // ERROR "illegal destination register" LDEORD R5, (R6), ZR // ERROR "illegal destination register" LDEORW R5, (R6), ZR // ERROR "illegal destination register" LDEORH R5, (R6), ZR // ERROR "illegal destination register" @@ -163,22 +163,22 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), RSP // ERROR "illegal destination register" LDADDLH R5, (R6), RSP // ERROR "illegal destination register" LDADDLB R5, (R6), RSP // ERROR "illegal destination register" - LDANDAD R5, (R6), RSP // ERROR "illegal destination register" - LDANDAW R5, (R6), RSP // ERROR "illegal destination register" - LDANDAH R5, (R6), RSP // ERROR "illegal destination register" - LDANDAB R5, (R6), RSP // ERROR "illegal destination register" - LDANDALD R5, (R6), RSP // ERROR "illegal destination register" - LDANDALW R5, (R6), RSP // ERROR "illegal destination register" - LDANDALH R5, (R6), RSP // ERROR "illegal destination register" - LDANDALB R5, (R6), RSP // ERROR "illegal destination register" - LDANDD R5, (R6), RSP // ERROR "illegal destination register" - LDANDW R5, (R6), RSP // ERROR "illegal destination register" - LDANDH R5, (R6), RSP // ERROR "illegal destination register" - LDANDB R5, (R6), RSP // ERROR "illegal destination register" - LDANDLD R5, (R6), RSP // ERROR "illegal destination register" - LDANDLW R5, (R6), RSP // ERROR "illegal destination register" - LDANDLH R5, (R6), RSP // ERROR "illegal destination register" - LDANDLB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLB R5, (R6), RSP // ERROR "illegal destination register" LDEORAD R5, (R6), RSP // ERROR "illegal destination register" LDEORAW R5, (R6), RSP // ERROR "illegal destination register" LDEORAH R5, (R6), RSP // ERROR "illegal destination register" diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 5844b71ca7..1d1bea505c 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -617,22 +617,6 @@ const ( ALDADDLD ALDADDLH ALDADDLW - ALDANDAB - ALDANDAD - ALDANDAH - ALDANDAW - ALDANDALB - ALDANDALD - ALDANDALH - ALDANDALW - ALDANDB - ALDANDD - ALDANDH - ALDANDW - ALDANDLB - ALDANDLD - ALDANDLH - ALDANDLW ALDAR ALDARB ALDARH @@ -643,6 +627,22 @@ const ( ALDAXRB ALDAXRH ALDAXRW + ALDCLRAB + ALDCLRAD + ALDCLRAH + ALDCLRAW + ALDCLRALB + ALDCLRALD + ALDCLRALH + ALDCLRALW + ALDCLRB + ALDCLRD + ALDCLRH + ALDCLRW + ALDCLRLB + ALDCLRLD + ALDCLRLH + ALDCLRLW ALDEORAB ALDEORAD ALDEORAH diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index fb216f9a94..a98f8c7ed5 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -111,22 +111,6 @@ var Anames = []string{ "LDADDLD", "LDADDLH", "LDADDLW", - "LDANDAB", - "LDANDAD", - "LDANDAH", - "LDANDAW", - "LDANDALB", - "LDANDALD", - "LDANDALH", - "LDANDALW", - "LDANDB", - "LDANDD", - "LDANDH", - "LDANDW", - "LDANDLB", - "LDANDLD", - "LDANDLH", - "LDANDLW", "LDAR", "LDARB", "LDARH", @@ -137,6 +121,22 @@ var Anames = []string{ "LDAXRB", "LDAXRH", "LDAXRW", + "LDCLRAB", + "LDCLRAD", + "LDCLRAH", + "LDCLRAW", + "LDCLRALB", + "LDCLRALD", + "LDCLRALH", + "LDCLRALW", + "LDCLRB", + "LDCLRD", + "LDCLRH", + "LDCLRW", + "LDCLRLB", + "LDCLRLD", + "LDCLRLH", + "LDCLRLW", "LDEORAB", "LDEORAD", "LDEORAH", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 4fc62d5c7f..1a359f1921 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -107,22 +107,22 @@ var atomicLDADD = map[obj.As]uint32{ ALDADDLW: 2<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLH: 1<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLB: 0<<30 | 0x1c3<<21 | 0x00<<10, - ALDANDAD: 3<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAW: 2<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAH: 1<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAB: 0<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDALD: 3<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALW: 2<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALH: 1<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALB: 0<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDD: 3<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDW: 2<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDH: 1<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDB: 0<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDLD: 3<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLW: 2<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLH: 1<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLB: 0<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRAD: 3<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAW: 2<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAH: 1<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAB: 0<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRALD: 3<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALW: 2<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALH: 1<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALB: 0<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRD: 3<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRW: 2<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRH: 1<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRB: 0<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRLD: 3<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLW: 2<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLH: 1<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLB: 0<<30 | 0x1c3<<21 | 0x04<<10, ALDEORAD: 3<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAW: 2<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAH: 1<<30 | 0x1c5<<21 | 0x08<<10, @@ -4028,7 +4028,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt + case 47: // SWPx/LDADDx/LDCLRx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg -- cgit v1.3 From 5ed81a3d14aa4eda5de87d7fe074b4c913b58511 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 3 Nov 2020 16:59:25 -0600 Subject: cmd/asm: fix rlwnm reg,reg,const,reg encoding on ppc64 The wrong value for the first reg parameter was selected. Likewise the wrong opcode was selected. This should match rlwnm (rrr type), not rlwinm (irr type). Similarly, fix the optab matching rules so clrlslwi does not match reg,reg,const,reg arguments. This is not a valid operand combination for clrlslwi. Fixes #42368 Change-Id: I4eb16d45a760b9fd3f497ef9863f82465351d39f Reviewed-on: https://go-review.googlesource.com/c/go/+/267421 Reviewed-by: Cherry Zhang Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64.s | 2 ++ src/cmd/internal/obj/ppc64/asm9.go | 26 ++++++-------------------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 2b1191c44b..8f6eb14f73 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -282,7 +282,9 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e + RLWNM R3, R4, $7, R6 // 5c861f7e RLWNMCC $3, R4, $7, R6 // 54861f7f + RLWNMCC R3, R4, $7, R6 // 5c861f7f RLDMI $0, R4, $7, R6 // 7886076c RLDMICC $0, R4, $7, R6 // 7886076d RLDIMI $0, R4, $7, R6 // 788601cc diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 090fefb4d8..775d27d8e8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -174,6 +174,7 @@ var optab = []Optab{ {ASRAD, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0}, {ARLWMI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLWMI, C_REG, C_REG, C_LCON, C_REG, 63, 4, 0}, + {ACLRLSLWI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLDMI, C_SCON, C_REG, C_LCON, C_REG, 30, 4, 0}, {ARLDC, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, {ARLDCL, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, @@ -1911,7 +1912,6 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) - opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -2010,6 +2010,7 @@ func buildop(ctxt *obj.Link) { AADDEX, ACMPEQB, AECIWX, + ACLRLSLWI, obj.ANOP, obj.ATEXT, obj.AUNDEF, @@ -3413,25 +3414,10 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { } case 63: /* rlwmi b,s,$mask,a */ - v := c.regoff(&p.From) - switch p.As { - case ACLRLSLWI: - n := c.regoff(p.GetFrom3()) - if n > v || v >= 32 { - // Message will match operands from the ISA even though in the - // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) - } - // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n - // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) - default: - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 - } + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 -- cgit v1.3 From 189931296f6b56090d9d7f49b7936b817189d87d Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 10 Nov 2020 02:31:28 -0800 Subject: cmd/internal/obj/s390x: fix SYNC instruction encoding SYNC is supposed to correspond to 'fast-BCR-serialization' which is encoded as 'bcr 14,0'. In CL 197178 I accidentally modified the encoding to 'bcr 7,0' which is a no-op. This CL reverses that change. Fixes #42479. Change-Id: I9918d93d720f5e12acc3014cde20d2d32cc87ee5 Reviewed-on: https://go-review.googlesource.com/c/go/+/268797 Run-TryBot: Michael Munday Trust: Michael Munday TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/s390x.s | 2 ++ src/cmd/internal/obj/s390x/asmz.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s index 03b84cfa62..7c5d26be33 100644 --- a/src/cmd/asm/internal/asm/testdata/s390x.s +++ b/src/cmd/asm/internal/asm/testdata/s390x.s @@ -412,6 +412,8 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- UNDEF // 00000000 NOPH // 0700 + SYNC // 07e0 + // vector add and sub instructions VAB V3, V4, V4 // e743400000f3 VAH V3, V4, V4 // e743400010f3 diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index f0f9d5cefc..06921085c9 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -3700,7 +3700,7 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { } case 80: // sync - zRR(op_BCR, uint32(NotEqual), 0, asm) + zRR(op_BCR, 14, 0, asm) // fast-BCR-serialization case 81: // float to fixed and fixed to float moves (no conversion) switch p.As { -- cgit v1.3 From 0433845ad18a355413033bb3495ba3195f4c69ec Mon Sep 17 00:00:00 2001 From: Quey-Liang Kao Date: Sat, 21 Nov 2020 22:48:55 +0800 Subject: cmd/asm, cmd/internal/obj/riscv: fix branch pseudo-instructions Pseudo branch instructions BGT, BGTU, BLE, and BLEU implemented In CL 226397 were translated inconsistently compared to other ones due to the inversion of registers. For instance, while "BLT a, b" generates "jump if a < b", "BLE a, b" generates "jump if b <= a." This CL fixes the translation in the assembler and the tests. Change-Id: Ia757be73e848734ca5b3a790e081f7c4f98c30f2 Reviewed-on: https://go-review.googlesource.com/c/go/+/271911 Reviewed-by: Cherry Zhang Reviewed-by: Joel Sing Run-TryBot: Joel Sing --- src/cmd/asm/internal/asm/testdata/riscvenc.s | 8 ++-- src/cmd/internal/obj/riscv/obj.go | 8 ++-- .../obj/riscv/testdata/testbranch/branch_test.go | 49 ++++++++++------------ 3 files changed, 31 insertions(+), 34 deletions(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s index e30a576473..9a49d96ca0 100644 --- a/src/cmd/asm/internal/asm/testdata/riscvenc.s +++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s @@ -340,11 +340,11 @@ start: // Branch pseudo-instructions BEQZ X5, start // BEQZ X5, 2 // e38602c0 BGEZ X5, start // BGEZ X5, 2 // e3d402c0 - BGT X5, X6, start // BGT X5, X6, 2 // e3c262c0 - BGTU X5, X6, start // BGTU X5, X6, 2 // e3e062c0 + BGT X5, X6, start // BGT X5, X6, 2 // e34253c0 + BGTU X5, X6, start // BGTU X5, X6, 2 // e36053c0 BGTZ X5, start // BGTZ X5, 2 // e34e50be - BLE X5, X6, start // BLE X5, X6, 2 // e3dc62be - BLEU X5, X6, start // BLEU X5, X6, 2 // e3fa62be + BLE X5, X6, start // BLE X5, X6, 2 // e35c53be + BLEU X5, X6, start // BLEU X5, X6, 2 // e37a53be BLEZ X5, start // BLEZ X5, 2 // e35850be BLTZ X5, start // BLTZ X5, 2 // e3c602be BNEZ X5, start // BNEZ X5, 2 // e39402be diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 0cffa54fa6..9257a6453a 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -1777,15 +1777,15 @@ func instructionsForProg(p *obj.Prog) []*instruction { case ABGEZ: ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg) case ABGT: - ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg) case ABGTU: - ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg) case ABGTZ: ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO case ABLE: - ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg) case ABLEU: - ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg) case ABLEZ: ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO case ABLTZ: diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go index 5412577a05..279aeb2c32 100644 --- a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go @@ -43,33 +43,34 @@ func TestBranchCondition(t *testing.T) { {"BGEU", 0, -1, testBGEU, false}, {"BGEU", -1, 0, testBGEU, true}, {"BGEU", 1, 0, testBGEU, true}, - {"BGT", 0, 1, testBGT, true}, + {"BGT", 0, 1, testBGT, false}, {"BGT", 0, 0, testBGT, false}, - {"BGT", 0, -1, testBGT, false}, - {"BGT", -1, 0, testBGT, true}, - {"BGT", 1, 0, testBGT, false}, - {"BGTU", 0, 1, testBGTU, true}, - {"BGTU", 0, -1, testBGTU, true}, - {"BGTU", -1, 0, testBGTU, false}, - {"BGTU", 1, 0, testBGTU, false}, - {"BLE", 0, 1, testBLE, false}, - {"BLE", 0, -1, testBLE, true}, + {"BGT", 0, -1, testBGT, true}, + {"BGT", -1, 0, testBGT, false}, + {"BGT", 1, 0, testBGT, true}, + {"BGTU", 0, 1, testBGTU, false}, + {"BGTU", 0, 0, testBGTU, false}, + {"BGTU", 0, -1, testBGTU, false}, + {"BGTU", -1, 0, testBGTU, true}, + {"BGTU", 1, 0, testBGTU, true}, + {"BLE", 0, 1, testBLE, true}, {"BLE", 0, 0, testBLE, true}, - {"BLE", -1, 0, testBLE, false}, - {"BLE", 1, 0, testBLE, true}, - {"BLEU", 0, 1, testBLEU, false}, - {"BLEU", 0, -1, testBLEU, false}, + {"BLE", 0, -1, testBLE, false}, + {"BLE", -1, 0, testBLE, true}, + {"BLE", 1, 0, testBLE, false}, + {"BLEU", 0, 1, testBLEU, true}, {"BLEU", 0, 0, testBLEU, true}, - {"BLEU", -1, 0, testBLEU, true}, - {"BLEU", 1, 0, testBLEU, true}, + {"BLEU", 0, -1, testBLEU, true}, + {"BLEU", -1, 0, testBLEU, false}, + {"BLEU", 1, 0, testBLEU, false}, {"BLT", 0, 1, testBLT, true}, - {"BLT", 0, -1, testBLT, false}, {"BLT", 0, 0, testBLT, false}, + {"BLT", 0, -1, testBLT, false}, {"BLT", -1, 0, testBLT, true}, {"BLT", 1, 0, testBLT, false}, {"BLTU", 0, 1, testBLTU, true}, - {"BLTU", 0, -1, testBLTU, true}, {"BLTU", 0, 0, testBLTU, false}, + {"BLTU", 0, -1, testBLTU, true}, {"BLTU", -1, 0, testBLTU, false}, {"BLTU", 1, 0, testBLTU, false}, } @@ -82,17 +83,13 @@ func TestBranchCondition(t *testing.T) { case "BGEU": fn = func(a, b int64) bool { return uint64(a) >= uint64(b) } case "BGT": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return b > a } + fn = func(a, b int64) bool { return a > b } case "BGTU": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return uint64(b) > uint64(a) } + fn = func(a, b int64) bool { return uint64(a) > uint64(b) } case "BLE": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return b <= a } + fn = func(a, b int64) bool { return a <= b } case "BLEU": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return uint64(b) <= uint64(a) } + fn = func(a, b int64) bool { return uint64(a) <= uint64(b) } case "BLT": fn = func(a, b int64) bool { return a < b } case "BLTU": -- cgit v1.3 From 56b783ad94f9a55163d494d5b34c783a9603d478 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Thu, 10 Dec 2020 08:44:44 -0500 Subject: cmd/go, cmd/asm: pass -linkshared to assembler for shared linkage builds When the -linkshared build mode is in effect, the Go command passes the "-linkshared" command line option to the compiler so as to insure special handling for things like builtin functions (which may appear in a shared library and not the main executable). This patch extends this behavior to the assembler, since the assembler may also wind up referencing builtins when emitting a stack-split prolog. Fixes #43107. Change-Id: I56eaded79789b083f3c3d800fb140353dee33ba9 Reviewed-on: https://go-review.googlesource.com/c/go/+/276932 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Jay Conrod Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/flags/flags.go | 1 + src/cmd/asm/main.go | 1 + src/cmd/go/internal/work/init.go | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go index 426e0156aa..1335860315 100644 --- a/src/cmd/asm/internal/flags/flags.go +++ b/src/cmd/asm/internal/flags/flags.go @@ -20,6 +20,7 @@ var ( TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths") Shared = flag.Bool("shared", false, "generate code that can be linked into a shared library") Dynlink = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries") + Linkshared = flag.Bool("linkshared", false, "generate code that will be linked against Go shared libraries") AllErrors = flag.Bool("e", false, "no limit on number of errors reported") SymABIs = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble") Importpath = flag.String("p", "", "set expected package import to path") diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index 149925d23f..31636e3045 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -37,6 +37,7 @@ func main() { ctxt := obj.Linknew(architecture.LinkArch) ctxt.Debugasm = flags.PrintOut ctxt.Flag_dynlink = *flags.Dynlink + ctxt.Flag_linkshared = *flags.Linkshared ctxt.Flag_shared = *flags.Shared || *flags.Dynlink ctxt.IsAsm = true ctxt.Pkgpath = *flags.Importpath diff --git a/src/cmd/go/internal/work/init.go b/src/cmd/go/internal/work/init.go index 102def4838..ba7c7c2fbb 100644 --- a/src/cmd/go/internal/work/init.go +++ b/src/cmd/go/internal/work/init.go @@ -241,7 +241,8 @@ func buildModeInit() { if gccgo { codegenArg = "-fPIC" } else { - forcedAsmflags = append(forcedAsmflags, "-D=GOBUILDMODE_shared=1") + forcedAsmflags = append(forcedAsmflags, "-D=GOBUILDMODE_shared=1", + "-linkshared") codegenArg = "-dynlink" forcedGcflags = append(forcedGcflags, "-linkshared") // TODO(mwhudson): remove -w when that gets fixed in linker. -- cgit v1.3