From dfdc3880b01d46d1d8125ab9eea0606b2fa5b819 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 20 Aug 2020 17:02:18 +0800 Subject: cmd/internal/obj/arm64: enable some SIMD instructions Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD instructions required by the issue #40725. And FMOVQ instrucion is used to move a large constant to a Vn register. Add test cases. Fixes #40725 Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549 Reviewed-on: https://go-review.googlesource.com/c/go/+/249758 Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/arm64/a.out.go | 6 ++ src/cmd/internal/obj/arm64/anames.go | 6 ++ src/cmd/internal/obj/arm64/asm7.go | 121 +++++++++++++++++++++++++++++++---- 3 files changed, 119 insertions(+), 14 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 03e0278a33..ab065e07e5 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -874,6 +874,7 @@ const ( AFLDPS AFMOVD AFMOVS + AFMOVQ AFMULD AFMULS AFNEGD @@ -987,9 +988,14 @@ const ( AVUSHR AVSHL AVSRI + AVBSL + AVBIT AVTBL AVZIP1 AVZIP2 + AVCMTST + AVUXTL + AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 65ecd007ea..8961f04b0c 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,6 +381,7 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", + "FMOVQ", "FMULD", "FMULS", "FNEGD", @@ -494,8 +495,13 @@ var Anames = []string{ "VUSHR", "VSHL", "VSRI", + "VBSL", + "VBIT", "VTBL", "VZIP1", "VZIP2", + "VCMTST", + "VUXTL", + "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 0b90e31392..7ce18d0f13 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -393,6 +393,11 @@ var optab = []Optab{ {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + // Move a large constant to a Vn. + {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, {ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -403,12 +408,14 @@ var optab = []Optab{ {obj.ARET, C_NONE, C_NONE, C_NONE, C_REG, 6, 4, 0, 0, 0}, {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, {ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, 0, 0}, - {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, - {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, {ACBZ, C_REG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, 0, 0}, {ATBZ, C_VCON, C_REG, C_NONE, C_SBRA, 40, 4, 0, 0, 0}, {AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + // get a PC-relative address + {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, + {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, {ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, {ABFM, C_VCON, C_REG, C_VCON, C_REG, 42, 4, 0, 0, 0}, @@ -473,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -2657,7 +2665,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD: + case AFMOVS, AFMOVD, AFMOVQ: break case AFCVTZSD: @@ -2740,6 +2748,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVCMEQ, t) oprangeset(AVORR, t) oprangeset(AVEOR, t) + oprangeset(AVBSL, t) + oprangeset(AVBIT, t) + oprangeset(AVCMTST, t) case AVADD: oprangeset(AVSUB, t) @@ -2787,6 +2798,9 @@ func buildop(ctxt *obj.Link) { case AVZIP1: oprangeset(AVZIP2, t) + case AVUXTL: + oprangeset(AVUXTL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4163,7 +4177,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4204,17 +4218,24 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("invalid arrangement: %v", p) } - if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) && - (af != ARNG_16B && af != ARNG_8B) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if (p.As == AVFMLA || p.As == AVFMLS) && - (af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if p.As == AVORR { - size = 2 - } else if p.As == AVAND || p.As == AVEOR { + switch p.As { + case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + if af != ARNG_16B && af != ARNG_8B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVFMLA, AVFMLS: + if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + switch p.As { + case AVAND, AVEOR: size = 0 - } else if p.As == AVFMLA || p.As == AVFMLS { + case AVBSL: + size = 1 + case AVORR, AVBIT: + size = 2 + case AVFMLA, AVFMLS: if af == ARNG_2D { size = 1 } else { @@ -5096,6 +5117,59 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) + case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) + + case 102: // VUXTL{2} Vn., Vd. + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + var Q, immh uint32 + switch at { + case ARNG_8H: + if af == ARNG_8B { + immh = 1 + Q = 0 + } else if af == ARNG_16B { + immh = 1 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_4S: + if af == ARNG_4H { + immh = 2 + Q = 0 + } else if af == ARNG_8H { + immh = 2 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_2D: + if af == ARNG_2S { + immh = 4 + Q = 0 + } else if af == ARNG_4S { + immh = 4 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + if p.As == AVUXTL && Q == 1 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + if p.As == AVUXTL2 && Q == 0 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5662,6 +5736,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVADD: return 7<<25 | 1<<21 | 1<<15 | 1<<10 + case AVSUB: + return 0x17<<25 | 1<<21 | 1<<15 | 1<<10 + case AVADDP: return 7<<25 | 1<<21 | 1<<15 | 15<<10 @@ -5724,6 +5801,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + + case AVBIT: + return 1<<29 | 0x75<<21 | 7<<10 + + case AVBSL: + return 1<<29 | 0x73<<21 | 7<<10 + + case AVCMTST: + return 0xE<<24 | 1<<21 | 0x23<<10 + + case AVUXTL, AVUXTL2: + return 0x5e<<23 | 0x29<<10 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6566,6 +6655,10 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { fp = 1 w = 1 /* 64-bit SIMD/FP */ + case AFMOVQ: + fp = 1 + w = 2 /* 128-bit SIMD/FP */ + case AMOVD: if p.Pool.As == ADWORD { w = 1 /* 64-bit */ -- cgit v1.3 From d7ab277eed4d2e5ede4f3361adf42d4ad76ced8f Mon Sep 17 00:00:00 2001 From: Junchen Li Date: Mon, 31 Aug 2020 13:32:33 +0800 Subject: cmd/asm: add more SIMD instructions on arm64 This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested by #40725. And since UXTL* are aliases of USHLL*, this CL also merges them into one case. Updates #40725 Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816 Reviewed-on: https://go-review.googlesource.com/c/go/+/253659 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/asm/testdata/arm64.s | 20 +++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 8 ++ src/cmd/internal/obj/arm64/a.out.go | 9 +- src/cmd/internal/obj/arm64/anames.go | 9 +- src/cmd/internal/obj/arm64/asm7.go | 109 +++++++++++++------------ 5 files changed, 100 insertions(+), 55 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 451ca749ba..e106ff2ae1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -156,6 +156,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VCMTST V2.B8, V29.B8, V2.B8 // a28f220e VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e VSUB V2.B8, V30.B8, V30.B8 // de87222e + VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e + VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e + VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e + VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e + VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e + VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e + VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e + VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e + VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e + VUSHLL $0, V30.B8, V30.H8 // dea7082f + VUSHLL $0, V30.H4, V29.S4 // dda7102f + VUSHLL $0, V29.S2, V2.D2 // a2a7202f + VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f + VUSHLL2 $0, V30.H8, V30.S4 // dea7106f + VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f + VUSHLL $7, V30.B8, V30.H8 // dea70f2f + VUSHLL $15, V30.H4, V29.S4 // dda71f2f + VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f + VBIF V0.B8, V30.B8, V1.B8 // c11fe02e + VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 2a911b4cce..20b1f3e9f0 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -345,4 +345,12 @@ TEXT errors(SB),$0 VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" + VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch" + VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement" + VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch" + VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch" + VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range" + VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" + VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" + VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index ab065e07e5..2839da1437 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -954,6 +954,7 @@ const ( AVADD AVADDP AVAND + AVBIF AVCMEQ AVCNT AVEOR @@ -986,6 +987,12 @@ const ( AVEXT AVRBIT AVUSHR + AVUSHLL + AVUSHLL2 + AVUXTL + AVUXTL2 + AVUZP1 + AVUZP2 AVSHL AVSRI AVBSL @@ -994,8 +1001,6 @@ const ( AVZIP1 AVZIP2 AVCMTST - AVUXTL - AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 8961f04b0c..48c066abfd 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -461,6 +461,7 @@ var Anames = []string{ "VADD", "VADDP", "VAND", + "VBIF", "VCMEQ", "VCNT", "VEOR", @@ -493,6 +494,12 @@ var Anames = []string{ "VEXT", "VRBIT", "VUSHR", + "VUSHLL", + "VUSHLL2", + "VUXTL", + "VUXTL2", + "VUZP1", + "VUZP2", "VSHL", "VSRI", "VBSL", @@ -501,7 +508,5 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", - "VUXTL", - "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7ce18d0f13..df4bbbbd35 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -480,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ @@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVBSL, t) oprangeset(AVBIT, t) oprangeset(AVCMTST, t) + oprangeset(AVUZP1, t) + oprangeset(AVUZP2, t) + oprangeset(AVBIF, t) case AVADD: oprangeset(AVSUB, t) @@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) { case AVUXTL: oprangeset(AVUXTL2, t) + case AVUSHLL: + oprangeset(AVUSHLL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } switch p.As { - case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: if af != ARNG_16B && af != ARNG_8B { c.ctxt.Diag("invalid arrangement: %v", p) } @@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { size = 0 case AVBSL: size = 1 - case AVORR, AVBIT: + case AVORR, AVBIT, AVBIF: size = 2 case AVFMLA, AVFMLS: if af == ARNG_2D { @@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) - case 102: // VUXTL{2} Vn., Vd. - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - var Q, immh uint32 - switch at { - case ARNG_8H: - if af == ARNG_8B { - immh = 1 - Q = 0 - } else if af == ARNG_16B { - immh = 1 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_4S: - if af == ARNG_4H { - immh = 2 - Q = 0 - } else if af == ARNG_8H { - immh = 2 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_2D: - if af == ARNG_2S { - immh = 4 - Q = 0 - } else if af == ARNG_4S { - immh = 4 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } + case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ + o1 = c.opirr(p, p.As) + rf := p.Reg + af := uint8((p.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + shift := int(p.From.Offset) + if p.As == AVUXTL || p.As == AVUXTL2 { + rf = p.From.Reg + af = uint8((p.From.Reg >> 5) & 15) + shift = 0 + } + + pack := func(q, x, y uint8) uint32 { + return uint32(q)<<16 | uint32(x)<<8 | uint32(y) + } + + var Q uint8 = uint8(o1>>30) & 1 + var immh, width uint8 + switch pack(Q, af, at) { + case pack(0, ARNG_8B, ARNG_8H): + immh, width = 1, 8 + case pack(1, ARNG_16B, ARNG_8H): + immh, width = 1, 8 + case pack(0, ARNG_4H, ARNG_4S): + immh, width = 2, 16 + case pack(1, ARNG_8H, ARNG_4S): + immh, width = 2, 16 + case pack(0, ARNG_2S, ARNG_2D): + immh, width = 4, 32 + case pack(1, ARNG_4S, ARNG_2D): + immh, width = 4, 32 default: c.ctxt.Diag("operand mismatch: %v\n", p) } - - if p.As == AVUXTL && Q == 1 { - c.ctxt.Diag("operand mismatch: %v\n", p) + if !(0 <= shift && shift <= int(width-1)) { + c.ctxt.Diag("shift amount out of range: %v\n", p) } - if p.As == AVUXTL2 && Q == 0 { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - - o1 = c.oprrr(p, p.As) - rf := int((p.From.Reg) & 31) - rt := int((p.To.Reg) & 31) - o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) + o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) } out[0] = o1 out[1] = o2 @@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + case AVBIF: + return 1<<29 | 7<<25 | 7<<21 | 7<<10 + case AVBIT: return 1<<29 | 0x75<<21 | 7<<10 @@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVCMTST: return 0xE<<24 | 1<<21 | 0x23<<10 - case AVUXTL, AVUXTL2: - return 0x5e<<23 | 0x29<<10 + case AVUZP1: + return 7<<25 | 3<<11 + + case AVUZP2: + return 7<<25 | 1<<14 | 3<<11 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + + case AVUSHLL, AVUXTL: + return 1<<29 | 15<<24 | 0x29<<10 + + case AVUSHLL2, AVUXTL2: + return 3<<29 | 15<<24 | 0x29<<10 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From a86b6f23f08fd42154bc5bbfa417da6ee5ef48fb Mon Sep 17 00:00:00 2001 From: diaxu01 Date: Thu, 2 Apr 2020 02:39:28 +0000 Subject: cmd/internal/obj/arm64: optimize the instruction of moving long effective stack address Currently, when the offset of "MOVD $offset(Rn), Rd" is a large positive constant or a negative constant, the assembler will load this offset from the constant pool.This patch gets rid of the constant pool by encoding the offset into two ADD instructions if it's a large positive constant or one SUB instruction if negative. For very large negative offset, it is rarely used, here we don't optimize this case. Optimized case 1: MOVD $-0x100000(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: SUB $0x100000, R7, R0 Optimized case 2: MOVD $0x123468(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: ADD $0x123000, R7, R27; ADD $0x000468, R27, R0 1. Binary size before/after. binary size change pkg/linux_arm64 +4KB pkg/tool/linux_arm64 no change go no change gofmt no change 2. go1 benckmark. name old time/op new time/op delta pkg:test/bench/go1 goos:linux goarch:arm64 BinaryTree17-64 7335721401.800000ns +-40% 6264542009.800000ns +-14% ~ (p=0.421 n=5+5) Fannkuch11-64 3886551822.600000ns +- 0% 3875870590.200000ns +- 0% ~ (p=0.151 n=5+5) FmtFprintfEmpty-64 82.960000ns +- 1% 83.900000ns +- 2% +1.13% (p=0.048 n=5+5) FmtFprintfString-64 149.200000ns +- 1% 148.000000ns +- 0% -0.80% (p=0.016 n=5+4) FmtFprintfInt-64 177.000000ns +- 0% 178.400000ns +- 2% ~ (p=0.794 n=4+5) FmtFprintfIntInt-64 240.200000ns +- 2% 239.400000ns +- 4% ~ (p=0.302 n=5+5) FmtFprintfPrefixedInt-64 300.400000ns +- 0% 299.200000ns +- 1% ~ (p=0.119 n=5+5) FmtFprintfFloat-64 360.000000ns +- 0% 361.600000ns +- 3% ~ (p=0.349 n=4+5) FmtManyArgs-64 1064.400000ns +- 1% 1061.400000ns +- 0% ~ (p=0.087 n=5+5) GobDecode-64 12080404.400000ns +- 2% 11637601.000000ns +- 1% -3.67% (p=0.008 n=5+5) GobEncode-64 8474973.800000ns +- 2% 7977801.600000ns +- 2% -5.87% (p=0.008 n=5+5) Gzip-64 416501238.400000ns +- 0% 410463405.400000ns +- 0% -1.45% (p=0.008 n=5+5) Gunzip-64 58088415.200000ns +- 0% 58826209.600000ns +- 0% +1.27% (p=0.008 n=5+5) HTTPClientServer-64 128660.200000ns +-23% 117840.800000ns +- 8% ~ (p=0.222 n=5+5) JSONEncode-64 17547746.800000ns +- 4% 17216180.000000ns +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 80879896.000000ns +- 1% 80063737.200000ns +- 0% -1.01% (p=0.008 n=5+5) Mandelbrot200-64 5484901.600000ns +- 0% 5483614.400000ns +- 0% ~ (p=0.310 n=5+5) GoParse-64 6201166.800000ns +- 6% 6150920.600000ns +- 1% ~ (p=0.548 n=5+5) RegexpMatchEasy0_32-64 135.000000ns +- 0% 139.200000ns +- 7% ~ (p=0.643 n=5+5) RegexpMatchEasy0_1K-64 484.600000ns +- 2% 483.800000ns +- 2% ~ (p=0.984 n=5+5) RegexpMatchEasy1_32-64 128.000000ns +- 1% 124.600000ns +- 1% -2.66% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 769.400000ns +- 2% 761.400000ns +- 1% ~ (p=0.460 n=5+5) RegexpMatchMedium_32-64 12.900000ns +- 0% 12.500000ns +- 0% -3.10% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 57879.200000ns +- 1% 56512.200000ns +- 0% -2.36% (p=0.008 n=5+5) RegexpMatchHard_32-64 3091.600000ns +- 1% 3071.000000ns +- 0% -0.67% (p=0.048 n=5+5) RegexpMatchHard_1K-64 92941.200000ns +- 1% 92794.000000ns +- 0% ~ (p=1.000 n=5+5) Revcomp-64 1695605187.000000ns +-54% 1821697637.400000ns +-47% ~ (p=1.000 n=5+5) Template-64 112839686.800000ns +- 1% 109964069.200000ns +- 3% ~ (p=0.095 n=5+5) TimeParse-64 587.000000ns +- 0% 587.000000ns +- 0% ~ (all equal) TimeFormat-64 586.000000ns +- 1% 584.200000ns +- 1% ~ (p=0.659 n=5+5) [Geo mean] 81804.262218ns 80694.712973ns -1.36% name old speed new speed delta pkg:test/bench/go1 goos:linux goarch:arm64 GobDecode-64 63.6MB/s +- 2% 66.0MB/s +- 1% +3.78% (p=0.008 n=5+5) GobEncode-64 90.6MB/s +- 2% 96.2MB/s +- 2% +6.23% (p=0.008 n=5+5) Gzip-64 46.6MB/s +- 0% 47.3MB/s +- 0% +1.47% (p=0.008 n=5+5) Gunzip-64 334MB/s +- 0% 330MB/s +- 0% -1.25% (p=0.008 n=5+5) JSONEncode-64 111MB/s +- 4% 113MB/s +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 24.0MB/s +- 1% 24.2MB/s +- 0% +1.02% (p=0.008 n=5+5) GoParse-64 9.35MB/s +- 6% 9.42MB/s +- 1% ~ (p=0.571 n=5+5) RegexpMatchEasy0_32-64 237MB/s +- 0% 231MB/s +- 7% ~ (p=0.690 n=5+5) RegexpMatchEasy0_1K-64 2.11GB/s +- 2% 2.12GB/s +- 2% ~ (p=1.000 n=5+5) RegexpMatchEasy1_32-64 250MB/s +- 1% 257MB/s +- 1% +2.63% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 1.33GB/s +- 2% 1.35GB/s +- 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32-64 77.6MB/s +- 0% 79.8MB/s +- 0% +2.80% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 17.7MB/s +- 1% 18.1MB/s +- 0% +2.41% (p=0.008 n=5+5) RegexpMatchHard_32-64 10.4MB/s +- 1% 10.4MB/s +- 0% ~ (p=0.056 n=5+5) RegexpMatchHard_1K-64 11.0MB/s +- 1% 11.0MB/s +- 0% ~ (p=0.984 n=5+5) Revcomp-64 188MB/s +-71% 155MB/s +-71% ~ (p=1.000 n=5+5) Template-64 17.2MB/s +- 1% 17.7MB/s +- 3% ~ (p=0.095 n=5+5) [Geo mean] 79.2MB/s 79.3MB/s +0.24% Change-Id: I593ac3e7037afafc3605ad4b0cfb51d5dd88015d Reviewed-on: https://go-review.googlesource.com/c/go/+/232438 Trust: Alberto Donizetti Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 15 ++++++++-- src/cmd/internal/obj/arm64/a.out.go | 7 +++-- src/cmd/internal/obj/arm64/anames7.go | 1 + src/cmd/internal/obj/arm64/asm7.go | 46 +++++++++++++++++++++---------- 4 files changed, 50 insertions(+), 19 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e106ff2ae1..acfb16b096 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -340,8 +340,19 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2 MOVD $0, R1 // 010080d2 MOVD $-1, R1 // 01008092 - MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 - MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 + MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + + MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // fb074091610b0091 + MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // fb0740917f231c91 + MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // fb08409161070091 + MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // fbfc7f9161ff3f91 + + MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1 + MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1 + MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1 + MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1 + MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1 // // CLS diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 2839da1437..b3c9e9a18e 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -410,9 +410,10 @@ const ( C_FCON // floating-point constant C_VCONADDR // 64-bit memory address - C_AACON // ADDCON offset in auto constant $a(FP) - C_LACON // 32-bit offset in auto constant $a(FP) - C_AECON // ADDCON offset in extern constant $e(SB) + C_AACON // ADDCON offset in auto constant $a(FP) + C_AACON2 // 24-bit offset in auto constant $a(FP) + C_LACON // 32-bit offset in auto constant $a(FP) + C_AECON // ADDCON offset in extern constant $e(SB) // TODO(aram): only one branch class should be enough C_SBRA // for TYPE_BRANCH diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index e1703fc4ab..96c9f788d9 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -36,6 +36,7 @@ var cnames7 = []string{ "FCON", "VCONADDR", "AACON", + "AACON2", "LACON", "AECON", "SBRA", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index df4bbbbd35..fc2033d689 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -391,7 +391,11 @@ var optab = []Optab{ {AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 16, 0, NOTUSETMP, 0}, {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, - {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON, C_NONE, C_NONE, C_RSP, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON2, C_NONE, C_NONE, C_RSP, 4, 8, REGFROM, 0, 0}, + + /* load long effective stack address (load int32 offset and add) */ + {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, // Move a large constant to a Vn. {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, @@ -594,9 +598,6 @@ var optab = []Optab{ {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, - /* load long effective stack address (load int32 offset and add) */ - {AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0}, - /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -1361,6 +1362,10 @@ func isaddcon(v int64) bool { return v <= 0xFFF } +func isaddcon2(v int64) bool { + return 0 <= v && v <= 0xFFFFFF +} + // isbitcon reports whether a constant can be encoded into a logical instruction. // bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64, // which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones. @@ -1889,10 +1894,14 @@ func (c *ctxt7) aclass(a *obj.Addr) int { default: return C_GOK } - - if isaddcon(c.instoffset) { + cf := c.instoffset + if isaddcon(cf) || isaddcon(-cf) { return C_AACON } + if isaddcon2(cf) { + return C_AACON2 + } + return C_LACON case obj.TYPE_BRANCH: @@ -2046,7 +2055,7 @@ func cmp(a int, b int) bool { return cmp(C_LCON, b) case C_LACON: - if b == C_AACON { + if b == C_AACON || b == C_AACON2 { return true } @@ -3062,11 +3071,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(r&31) << 5) | uint32(rt&31) - case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R */ - o1 = c.opirr(p, p.As) - + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R; mov $addcon2, R */ rt := int(p.To.Reg) r := int(o.param) + if r == 0 { r = REGZERO } else if r == REGFROM { @@ -3075,13 +3083,23 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if r == 0 { r = REGSP } + v := int32(c.regoff(&p.From)) - if (v & 0xFFF000) != 0 { - v >>= 12 - o1 |= 1 << 22 /* shift, by 12 */ + var op int32 + if v < 0 { + v = -v + op = int32(c.opirr(p, ASUB)) + } else { + op = int32(c.opirr(p, AADD)) + } + + if int(o.size) == 8 { + o1 = c.oaddi(p, op, v&0xfff000, r, REGTMP) + o2 = c.oaddi(p, op, v&0x000fff, REGTMP, rt) + break } - o1 |= ((uint32(v) & 0xFFF) << 10) | (uint32(r&31) << 5) | uint32(rt&31) + o1 = c.oaddi(p, op, v, r, rt) case 5: /* b s; bl s */ o1 = c.opbra(p, p.As) -- cgit v1.3 From fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Wed, 16 Sep 2020 14:05:18 +0800 Subject: cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64 The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used 128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector register `Vd`. Because Go does not have 128-bit integers for now, the assembler will report an error of `immediate out of range` when assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction. This patch lets 128-bit integers take two 64-bit operands, for the high and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to move `$hi<<64+$lo' into 128-bit register `Vd`. In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS' and uses them to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively Update the go doc. Fixes #40725 Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a Reviewed-on: https://go-review.googlesource.com/c/go/+/255900 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 18 +++++++++------ src/cmd/asm/internal/asm/asm.go | 21 ++++++++--------- src/cmd/asm/internal/asm/testdata/arm64.s | 6 +++-- src/cmd/internal/obj/arm64/a.out.go | 4 +++- src/cmd/internal/obj/arm64/anames.go | 4 +++- src/cmd/internal/obj/arm64/asm7.go | 38 ++++++++++++++++--------------- src/cmd/internal/obj/arm64/doc.go | 10 ++++++++ 7 files changed, 60 insertions(+), 41 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 3817fcd5c2..e643889aef 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool { return false } +// IsARM64TBL reports whether the op (as defined by an arm64.A* +// constant) is one of the TBL-like instructions and one of its +// inputs does not fit into prog.Reg, so require special handling. +func IsARM64TBL(op obj.As) bool { + switch op { + case arm64.AVTBL, arm64.AVMOVQ: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. @@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) { return 0, false } -// IsARM64TBL reports whether the op (as defined by an arm64.A* -// constant) is one of the table lookup instructions that require special -// handling. -func IsARM64TBL(op obj.As) bool { - return op == arm64.AVTBL -} - // ARM64RegisterExtension parses an ARM64 register with extension or arrangement. func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error { Rnum := (reg & 31) + int16(num<<5) diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 42e217dc23..7878d74549 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -622,8 +622,9 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.SetFrom3(a[1]) prog.To = a[2] case sys.ARM64: - // ARM64 instructions with one input and two outputs. - if arch.IsARM64STLXR(op) { + switch { + case arch.IsARM64STLXR(op): + // ARM64 instructions with one input and two outputs. prog.From = a[0] prog.To = a[1] if a[2].Type != obj.TYPE_REG { @@ -631,20 +632,16 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { return } prog.RegTo2 = a[2].Reg - break - } - if arch.IsARM64TBL(op) { + case arch.IsARM64TBL(op): + // one of its inputs does not fit into prog.Reg. prog.From = a[0] - if a[1].Type != obj.TYPE_REGLIST { - p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1])) - } prog.SetFrom3(a[1]) prog.To = a[2] - break + default: + prog.From = a[0] + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] } - prog.From = a[0] - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] case sys.I386: prog.From = a[0] prog.SetFrom3(a[1]) diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index acfb16b096..e277c04b7c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD $(28.0), F4 // 0490671e // move a large constant to a Vd. - FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 - FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 + VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 + VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 + VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index b3c9e9a18e..1ca41c15ba 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -875,7 +875,9 @@ const ( AFLDPS AFMOVD AFMOVS - AFMOVQ + AVMOVQ + AVMOVD + AVMOVS AFMULD AFMULS AFNEGD diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 48c066abfd..900cdba817 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,7 +381,9 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", - "FMOVQ", + "VMOVQ", + "VMOVD", + "VMOVS", "FMULD", "FMULS", "FNEGD", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index fc2033d689..ee4a33eef4 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 { const ( // Optab.flag LFROM = 1 << 0 // p.From uses constant pool - LTO = 1 << 1 // p.To uses constant pool - NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP + LFROM3 = 1 << 1 // p.From3 uses constant pool + LTO = 1 << 2 // p.To uses constant pool + NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP ) var optab = []Optab{ @@ -397,10 +398,10 @@ var optab = []Optab{ /* load long effective stack address (load int32 offset and add) */ {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, - // Move a large constant to a Vn. - {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("zero-width instruction\n%v", p) } } - switch o.flag & (LFROM | LTO) { - case LFROM: + if o.flag&LFROM != 0 { c.addpool(p, &p.From) - - case LTO: + } + if o.flag&LFROM3 != 0 { + c.addpool(p, p.GetFrom3()) + } + if o.flag<O != 0 { c.addpool(p, &p.To) - break } if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */ @@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { sz := 4 if a.Type == obj.TYPE_CONST { - if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) { - // out of range -0x80000000 ~ 0xffffffff, must store 64-bit + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. t.As = ADWORD sz = 8 } // else store 32-bit @@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AFMOVQ: + case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) - case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ @@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { } else { fp, w := 0, 0 switch as { - case AFMOVS: + case AFMOVS, AVMOVS: fp = 1 w = 0 /* 32-bit SIMD/FP */ - case AFMOVD: + case AFMOVD, AVMOVD: fp = 1 w = 1 /* 64-bit SIMD/FP */ - case AFMOVQ: + case AVMOVQ: fp = 1 w = 2 /* 128-bit SIMD/FP */ diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 7515217544..efd4577f56 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it MOVD $1, R1 RET +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately. + + Examples: + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211 + Special Cases. (1) umov is written as VMOV. -- cgit v1.3 From 28e549dec3954b36d0c83442be913d8709d7e5ae Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 12 Sep 2020 12:33:24 -0400 Subject: runtime: use sigaltstack on macOS/ARM64 Currently we don't use sigaltstack on darwin/arm64, as is not supported on iOS. However, it is supported on macOS. Use it. (iOS remains unchanged.) Change-Id: Icc154c5e2edf2dbdc8ca68741ad9157fc15a72ee Reviewed-on: https://go-review.googlesource.com/c/go/+/256917 Trust: Cherry Zhang Reviewed-by: Ian Lance Taylor --- misc/cgo/test/sigaltstack.go | 2 +- src/cmd/internal/obj/arm64/obj7.go | 2 +- src/runtime/mkpreempt.go | 7 ++----- src/runtime/os_darwin.go | 8 ++++---- src/runtime/preempt_arm64.s | 3 --- src/runtime/stack.go | 2 +- src/runtime/sys_darwin_arm64.s | 22 ++++++++++++++++++---- 7 files changed, 27 insertions(+), 19 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/misc/cgo/test/sigaltstack.go b/misc/cgo/test/sigaltstack.go index 27b753a147..034cc4b371 100644 --- a/misc/cgo/test/sigaltstack.go +++ b/misc/cgo/test/sigaltstack.go @@ -62,7 +62,7 @@ import ( func testSigaltstack(t *testing.T) { switch { - case runtime.GOOS == "solaris", runtime.GOOS == "illumos", (runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64": + case runtime.GOOS == "solaris", runtime.GOOS == "illumos", runtime.GOOS == "ios" && runtime.GOARCH == "arm64": t.Skipf("switching signal stack not implemented on %s/%s", runtime.GOOS, runtime.GOARCH) } diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index 56da854f16..f1bc2583cb 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -589,7 +589,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGSP q1.Spadj = c.autosize - if c.ctxt.Headtype == objabi.Hdarwin { + if objabi.GOOS == "ios" { // iOS does not support SA_ONSTACK. We will run the signal handler // on the G stack. If we write below SP, it may be clobbered by // the signal handler. So we save LR after decrementing SP. diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index c5bfb0f207..40683bb9d9 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -340,12 +340,9 @@ func genARM64() { p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) p("SUB $8, RSP, R29") // set up new frame pointer p("#endif") - // On darwin, save the LR again after decrementing SP. We run the - // signal handler on the G stack (as it doesn't support SA_ONSTACK), + // On iOS, save the LR again after decrementing SP. We run the + // signal handler on the G stack (as it doesn't support sigaltstack), // so any writes below SP may be clobbered. - p("#ifdef GOOS_darwin") - p("MOVD R30, (RSP)") - p("#endif") p("#ifdef GOOS_ios") p("MOVD R30, (RSP)") p("#endif") diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index 01c40b4813..394bd6fb0f 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -289,9 +289,9 @@ func mpreinit(mp *m) { // Called to initialize a new m (including the bootstrap m). // Called on the new thread, cannot allocate memory. func minit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // The signal handler handles it directly. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { minitSignalStack() } minitSignalMask() @@ -301,9 +301,9 @@ func minit() { // Called from dropm to undo the effect of an minit. //go:nosplit func unminit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // See minit. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { unminitSignals() } } diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index d0e77659c3..36ee13282c 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -10,9 +10,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R29, -8(RSP) SUB $8, RSP, R29 #endif - #ifdef GOOS_darwin - MOVD R30, (RSP) - #endif #ifdef GOOS_ios MOVD R30, (RSP) #endif diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 3802cd049e..2afc2635aa 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -66,7 +66,7 @@ const ( // to each stack below the usual guard area for OS-specific // purposes like signal handling. Used on Windows, Plan 9, // and iOS because they do not use a separate stack. - _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + (sys.GoosDarwin+sys.GoosIos)*sys.GoarchArm64*1024 + _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + sys.GoosIos*sys.GoarchArm64*1024 // The minimum size of stack used by Go code _StackMin = 2048 diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s index 585d4f2c64..427cb17781 100644 --- a/src/runtime/sys_darwin_arm64.s +++ b/src/runtime/sys_darwin_arm64.s @@ -202,6 +202,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$192 BEQ 2(PC) BL runtime·load_g(SB) +#ifdef GOOS_ios MOVD RSP, R6 CMP $0, g BEQ nog @@ -226,16 +227,21 @@ nog: // Switch to gsignal stack. MOVD R6, RSP - // Call sigtrampgo. + // Save arguments. MOVW R0, (8*1)(RSP) MOVD R1, (8*2)(RSP) MOVD R2, (8*3)(RSP) +#endif + + // Call sigtrampgo. MOVD $runtime·sigtrampgo(SB), R11 BL (R11) +#ifdef GOOS_ios // Switch to old stack. MOVD (8*4)(RSP), R5 MOVD R5, RSP +#endif // Restore callee-save registers. MOVD (8*4)(RSP), R19 @@ -329,12 +335,20 @@ TEXT runtime·fcntl_trampoline(SB),NOSPLIT,$0 ADD $16, RSP RET -// sigaltstack on iOS is not supported and will always -// run the signal handler on the main stack, so our sigtramp has -// to do the stack switch ourselves. TEXT runtime·sigaltstack_trampoline(SB),NOSPLIT,$0 +#ifdef GOOS_ios + // sigaltstack on iOS is not supported and will always + // run the signal handler on the main stack, so our sigtramp has + // to do the stack switch ourselves. MOVW $43, R0 BL libc_exit(SB) +#else + MOVD 8(R0), R1 // arg 2 old + MOVD 0(R0), R0 // arg 1 new + CALL libc_sigaltstack(SB) + CBZ R0, 2(PC) + BL notok<>(SB) +#endif RET // Thread related functions -- cgit v1.3 From 3f7b4d12075277f28427e6b57708258225841ecd Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 7 Oct 2020 14:39:47 -0400 Subject: cmd/internal/obj/arm64: only emit R_CALLIND relocations on calls Don't emit it for jumps. In particular, not for the return instruction, which is JMP (LR). Reduce some binary size and linker resources. Change-Id: Idb3242b86c5a137597fb8accb8aadfe0244c14cf Reviewed-on: https://go-review.googlesource.com/c/go/+/260341 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Than McIntosh --- src/cmd/internal/obj/arm64/asm7.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index ee4a33eef4..7c35fce106 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -3120,12 +3120,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 6: /* b ,O(R); bl ,O(R) */ o1 = c.opbrr(p, p.As) - o1 |= uint32(p.To.Reg&31) << 5 - rel := obj.Addrel(c.cursym) - rel.Off = int32(c.pc) - rel.Siz = 0 - rel.Type = objabi.R_CALLIND + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } case 7: /* beq s */ o1 = c.opbra(p, p.As) -- cgit v1.3 From 3036b76df0ae748856e3e0008b67241cc580e263 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Sat, 13 Jun 2020 00:06:49 +0800 Subject: cmd/asm: Add SHA3 hardware instructions for ARM64 Armv8.2-SHA introduced four SHA3-related instructions EOR3 .16B, .16B, .16B, .16B RAX1 .2D, .2D, .2D XAR .2D, .2D, .2D, # BCAX .16B, .16B, .16B, .16B We convert them into Go asm style as: VEOR3 .B16, .B16, .B16, .B16 VRAX1 .D2, .D2, .D2 VXAR $imm6, .D2, .D2, .D2 VBCAX .B16, .B16, .B16, .B16 Armv8 Reference Manual: * EOR3 (Three-way Exclusive OR) on C7.2.42 * RAX1 (Rotate and Exclusive OR) on C7.2.217 * XAR (Exclusive OR and Rotate) on C7.2.401 * BCAX (Bit Clear and Exclusive OR) on C7.2.12 Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a Reviewed-on: https://go-review.googlesource.com/c/go/+/180757 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Emmanuel Odeke --- src/cmd/asm/internal/asm/testdata/arm64.s | 4 ++ src/cmd/internal/obj/arm64/a.out.go | 4 ++ src/cmd/internal/obj/arm64/anames.go | 4 ++ src/cmd/internal/obj/arm64/asm7.go | 73 ++++++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 2 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e277c04b7c..7f495b90bb 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 SHA512H2 V4.D2, V3, V2 // 628464ce SHA512SU0 V9.D2, V8.D2 // 2881c0ce SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce + VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace + VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f VUADDLV V6.B16, V6 // c638306e @@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e VTBL V3.B8, [V27.B16], V8.B8 // 6803030e + VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce + VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 1ca41c15ba..33319e48df 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -958,9 +958,11 @@ const ( AVADDP AVAND AVBIF + AVBCAX AVCMEQ AVCNT AVEOR + AVEOR3 AVMOV AVLD1 AVLD2 @@ -989,6 +991,7 @@ const ( AVPMULL2 AVEXT AVRBIT + AVRAX1 AVUSHR AVUSHLL AVUSHLL2 @@ -1001,6 +1004,7 @@ const ( AVBSL AVBIT AVTBL + AVXAR AVZIP1 AVZIP2 AVCMTST diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 900cdba817..e5534e26b9 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -464,9 +464,11 @@ var Anames = []string{ "VADDP", "VAND", "VBIF", + "VBCAX", "VCMEQ", "VCNT", "VEOR", + "VEOR3", "VMOV", "VLD1", "VLD2", @@ -495,6 +497,7 @@ var Anames = []string{ "VPMULL2", "VEXT", "VRBIT", + "VRAX1", "VUSHR", "VUSHLL", "VUSHLL2", @@ -507,6 +510,7 @@ var Anames = []string{ "VBSL", "VBIT", "VTBL", + "VXAR", "VZIP1", "VZIP2", "VCMTST", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7c35fce106..c46066313e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -843,6 +843,8 @@ var optab = []Optab{ {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) { case AVADD: oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) case AAESD: oprangeset(AAESE, t) @@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVLD4, t) oprangeset(AVLD4R, t) + case AVEOR3: + oprangeset(AVBCAX, t) + case ASHA1H, AVCNT, AVMOV, @@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) { AVDUP, AVMOVI, APRFM, - AVEXT: + AVEXT, + AVXAR: break case obj.ANOP, @@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } else { size = 0 } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 } o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) @@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm., Vn., Vd. */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + } out[0] = o1 out[1] = o2 @@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVAND: return 7<<25 | 1<<21 | 7<<10 + case AVBCAX: + return 0xCE<<24 | 1<<21 + case AVCMEQ: return 1<<29 | 0x71<<21 | 0x23<<10 @@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVEOR: return 1<<29 | 0x71<<21 | 7<<10 + case AVEOR3: + return 0xCE << 24 + case AVORR: return 7<<25 | 5<<21 | 7<<10 case AVREV16: return 3<<26 | 2<<24 | 1<<21 | 3<<11 + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + case AVREV32: return 11<<26 | 2<<24 | 1<<21 | 1<<11 @@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: + return 0xCE<<24 | 1<<23 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From 912262b806a432a29302e0cee45e4f42ef7038a2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 19 Jul 2020 00:30:12 -0400 Subject: cmd/internal/obj: move LSym.Func into LSym.Extra This creates space for a different kind of extension field in LSym without making the struct any larger. (There are many LSym, so we care about keeping the struct small.) Change-Id: Ib16edb9e15f54c2a7351c8b875e19684058711e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/243943 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/asm.go | 2 +- src/cmd/compile/internal/gc/dwinl.go | 4 +- src/cmd/compile/internal/gc/gsubr.go | 2 +- src/cmd/compile/internal/gc/obj.go | 9 ++-- src/cmd/compile/internal/gc/pgen.go | 18 ++++---- src/cmd/compile/internal/gc/plive.go | 9 ++-- src/cmd/compile/internal/gc/scope.go | 4 +- src/cmd/compile/internal/gc/ssa.go | 10 ++-- src/cmd/internal/obj/arm/asm5.go | 8 ++-- src/cmd/internal/obj/arm/obj5.go | 34 +++++++------- src/cmd/internal/obj/arm64/asm7.go | 12 ++--- src/cmd/internal/obj/arm64/obj7.go | 36 +++++++-------- src/cmd/internal/obj/dwarf.go | 31 +++++++------ src/cmd/internal/obj/ld.go | 2 +- src/cmd/internal/obj/link.go | 23 ++++++++- src/cmd/internal/obj/mips/asm0.go | 10 ++-- src/cmd/internal/obj/mips/obj0.go | 42 ++++++++--------- src/cmd/internal/obj/objfile.go | 90 ++++++++++++++++++------------------ src/cmd/internal/obj/pass.go | 6 +-- src/cmd/internal/obj/pcln.go | 28 +++++------ src/cmd/internal/obj/plist.go | 14 +++--- src/cmd/internal/obj/ppc64/asm9.go | 10 ++-- src/cmd/internal/obj/ppc64/obj9.go | 28 +++++------ src/cmd/internal/obj/riscv/obj.go | 38 +++++++-------- src/cmd/internal/obj/s390x/asmz.go | 6 +-- src/cmd/internal/obj/s390x/objz.go | 28 +++++------ src/cmd/internal/obj/sym.go | 6 ++- src/cmd/internal/obj/wasm/wasmobj.go | 34 +++++++------- src/cmd/internal/obj/x86/asm6.go | 8 ++-- src/cmd/internal/obj/x86/obj6.go | 26 +++++------ 30 files changed, 304 insertions(+), 274 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 7878d74549..b9efa454ed 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -181,7 +181,7 @@ func (p *Parser) asmText(operands [][]lex.Token) { // Argsize set below. }, } - nameAddr.Sym.Func.Text = prog + nameAddr.Sym.Func().Text = prog prog.To.Val = int32(argSize) p.append(prog, "", true) } diff --git a/src/cmd/compile/internal/gc/dwinl.go b/src/cmd/compile/internal/gc/dwinl.go index 27e2cbcd98..5120fa1166 100644 --- a/src/cmd/compile/internal/gc/dwinl.go +++ b/src/cmd/compile/internal/gc/dwinl.go @@ -34,7 +34,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { // Walk progs to build up the InlCalls data structure var prevpos src.XPos - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := fnsym.Func().Text; p != nil; p = p.Link { if p.Pos == prevpos { continue } @@ -150,7 +150,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { start := int64(-1) curii := -1 var prevp *obj.Prog - for p := fnsym.Func.Text; p != nil; prevp, p = p, p.Link { + for p := fnsym.Func().Text; p != nil; prevp, p = p, p.Link { if prevp != nil && p.Pos == prevp.Pos { continue } diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index 480d411f49..14c217ff3b 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -199,7 +199,7 @@ func (pp *Progs) settext(fn *Node) { ptxt := pp.Prog(obj.ATEXT) pp.Text = ptxt - fn.Func.lsym.Func.Text = ptxt + fn.Func.lsym.Func().Text = ptxt ptxt.From.Type = obj.TYPE_MEM ptxt.From.Name = obj.NAME_EXTERN ptxt.From.Sym = fn.Func.lsym diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index d7f4a94041..f6557e2d15 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -305,20 +305,21 @@ func dumpglobls() { // global symbols can't be declared during parallel compilation. func addGCLocals() { for _, s := range Ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } - for _, gcsym := range []*obj.LSym{s.Func.GCArgs, s.Func.GCLocals, s.Func.GCRegs} { + for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} { if gcsym != nil && !gcsym.OnList() { ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK) } } - if x := s.Func.StackObjects; x != nil { + if x := fn.StackObjects; x != nil { attr := int16(obj.RODATA) ggloblsym(x, int32(len(x.P)), attr) x.Set(obj.AttrStatic, true) } - if x := s.Func.OpenCodedDeferInfo; x != nil { + if x := fn.OpenCodedDeferInfo; x != nil { ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK) } } diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 52b1ed351d..353f4b08c9 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -266,8 +266,8 @@ func compile(fn *Node) { dtypesym(n.Type) // Also make sure we allocate a linker symbol // for the stack object data, for the same reason. - if fn.Func.lsym.Func.StackObjects == nil { - fn.Func.lsym.Func.StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") + if fn.Func.lsym.Func().StackObjects == nil { + fn.Func.lsym.Func().StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") } } } @@ -415,7 +415,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S case PAUTO: if !n.Name.Used() { // Text == nil -> generating abstract function - if fnsym.Func.Text != nil { + if fnsym.Func().Text != nil { Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)") } continue @@ -425,7 +425,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S continue } apdecls = append(apdecls, n) - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } decls, dwarfVars := createDwarfVars(fnsym, fn.Func, apdecls) @@ -435,7 +435,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S // the function symbol to insure that the type included in DWARF // processing during linking. typesyms := []*obj.LSym{} - for t, _ := range fnsym.Func.Autot { + for t, _ := range fnsym.Func().Autot { typesyms = append(typesyms, t) } sort.Sort(obj.BySymName(typesyms)) @@ -444,7 +444,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S r.Sym = sym r.Type = objabi.R_USETYPE } - fnsym.Func.Autot = nil + fnsym.Func().Autot = nil var varScopes []ScopeID for _, decl := range decls { @@ -522,7 +522,7 @@ func createSimpleVar(fnsym *obj.LSym, n *Node) *dwarf.Var { } typename := dwarf.InfoPrefix + typesymname(n.Type) - delete(fnsym.Func.Autot, ngotype(n).Linksym()) + delete(fnsym.Func().Autot, ngotype(n).Linksym()) inlIndex := 0 if genDwarfInline > 1 { if n.Name.InlFormal() || n.Name.InlLocal() { @@ -667,7 +667,7 @@ func createDwarfVars(fnsym *obj.LSym, fn *Func, apDecls []*Node) ([]*Node, []*dw ChildIndex: -1, }) // Record go type of to insure that it gets emitted by the linker. - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } return decls, vars @@ -731,7 +731,7 @@ func createComplexVar(fnsym *obj.LSym, fn *Func, varID ssa.VarID) *dwarf.Var { } gotype := ngotype(n).Linksym() - delete(fnsym.Func.Autot, gotype) + delete(fnsym.Func().Autot, gotype) typename := dwarf.InfoPrefix + gotype.Name[len("type."):] inlIndex := 0 if genDwarfInline > 1 { diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index a9ea37701e..b471accb65 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -1552,26 +1552,27 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { // Emit the live pointer map data structures ls := e.curfn.Func.lsym - ls.Func.GCArgs, ls.Func.GCLocals, ls.Func.GCRegs = lv.emit() + fninfo := ls.Func() + fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit() p := pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCArgs + p.To.Sym = fninfo.GCArgs p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_LocalsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCLocals + p.To.Sym = fninfo.GCLocals if !go115ReduceLiveness { p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCRegs + p.To.Sym = fninfo.GCRegs } return lv.livenessMap diff --git a/src/cmd/compile/internal/gc/scope.go b/src/cmd/compile/internal/gc/scope.go index d7239d5693..e66b859e10 100644 --- a/src/cmd/compile/internal/gc/scope.go +++ b/src/cmd/compile/internal/gc/scope.go @@ -62,9 +62,9 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) { if len(marks) == 0 { return } - p0 := fnsym.Func.Text + p0 := fnsym.Func().Text scope := findScope(marks, p0.Pos) - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := p0; p != nil; p = p.Link { if p.Pos == p0.Pos { continue } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 3d5fa4cd0a..d8f627c213 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -240,7 +240,7 @@ func dvarint(x *obj.LSym, off int, v int64) int { // - Offset of where argument should be placed in the args frame when making call func (s *state) emitOpenDeferInfo() { x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer") - s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x + s.curfn.Func.lsym.Func().OpenCodedDeferInfo = x off := 0 // Compute maxargsize (max size of arguments for all defers) @@ -6108,7 +6108,7 @@ func emitStackObjects(e *ssafn, pp *Progs) { // Populate the stack object data. // Format must match runtime/stack.go:stackObjectRecord. - x := e.curfn.Func.lsym.Func.StackObjects + x := e.curfn.Func.lsym.Func().StackObjects off := 0 off = duintptr(x, off, uint64(len(vars))) for _, v := range vars { @@ -6145,7 +6145,7 @@ func genssa(f *ssa.Func, pp *Progs) { s.livenessMap = liveness(e, f, pp) emitStackObjects(e, pp) - openDeferInfo := e.curfn.Func.lsym.Func.OpenCodedDeferInfo + openDeferInfo := e.curfn.Func.lsym.Func().OpenCodedDeferInfo if openDeferInfo != nil { // This function uses open-coded defers -- write out the funcdata // info that we computed at the end of genssa. @@ -6350,7 +6350,7 @@ func genssa(f *ssa.Func, pp *Progs) { // some of the inline marks. // Use this instruction instead. p.Pos = p.Pos.WithIsStmt() // promote position to a statement - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[m]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[m]) // Make the inline mark a real nop, so it doesn't generate any code. m.As = obj.ANOP m.Pos = src.NoXPos @@ -6362,7 +6362,7 @@ func genssa(f *ssa.Func, pp *Progs) { // Any unmatched inline marks now need to be added to the inlining tree (and will generate a nop instruction). for _, p := range inlMarkList { if p.As != obj.ANOP { - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[p]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[p]) } } } diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go index 269a4223d5..ebb98b4859 100644 --- a/src/cmd/internal/obj/arm/asm5.go +++ b/src/cmd/internal/obj/arm/asm5.go @@ -390,7 +390,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var p *obj.Prog var op *obj.Prog - p = cursym.Func.Text + p = cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -482,8 +482,8 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bflag = 0 pc = 0 times++ - c.cursym.Func.Text.Pc = 0 // force re-layout the code. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { o = c.oplook(p) if int64(pc) > p.Pc { p.Pc = int64(pc) @@ -558,7 +558,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * perhaps we'd be able to parallelize the span loop above. */ - p = c.cursym.Func.Text + p = c.cursym.Func().Text c.autosize = p.To.Offset + 4 c.cursym.Grow(c.cursym.Size) diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go index 4d9187b530..f2bfb9679f 100644 --- a/src/cmd/internal/obj/arm/obj5.go +++ b/src/cmd/internal/obj/arm/obj5.go @@ -249,13 +249,13 @@ const ( func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset == -4 { // Historical way to mark NOFRAME. @@ -271,30 +271,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - cursym.Func.Locals = autoffset - cursym.Func.Args = p.To.Val.(int32) + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) /* * find leaf subroutines */ - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case ADIV, ADIVU, AMOD, AMODU: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF case ABL, ABX, obj.ADUFFZERO, obj.ADUFFCOPY: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF } } var q2 *obj.Prog - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -311,20 +311,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && cursym.Func.Text.Mark&LEAF == 0 { + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // are not identified as leaves but still have no frame. if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", cursym.Name) } - cursym.Func.Text.Mark |= LEAF + cursym.Func().Text.Mark |= LEAF } // FP offsets need an updated p.To.Offset. p.To.Offset = int64(autosize) - 4 - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -347,7 +347,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REGSP p.Spadj = autosize - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVW g_panic(g), R1 @@ -460,7 +460,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: nocache(p) - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AB p.From = obj.Addr{} @@ -508,7 +508,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } case ADIV, ADIVU, AMOD, AMODU: - if cursym.Func.Text.From.Sym.NoSplit() { + if cursym.Func().Text.From.Sym.NoSplit() { ctxt.Diag("cannot divide in NOSPLIT function") } const debugdivmod = false @@ -720,7 +720,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -751,7 +751,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -762,7 +762,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { b := obj.Appendp(pcdata, c.newprog) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH - b.To.SetTarget(c.cursym.Func.Text.Link) + b.To.SetTarget(c.cursym.Func().Text.Link) b.Spadj = +framesize return end diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index c46066313e..6b9fe27c05 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -913,7 +913,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -943,8 +943,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { alignedValue := p.From.Offset m = pcAlignPadLength(pc, alignedValue, ctxt) // Update the current text symbol alignment value. - if int32(alignedValue) > cursym.Func.Align { - cursym.Func.Align = int32(alignedValue) + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: @@ -983,7 +983,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { if p.As == ADWORD && (pc&7) != 0 { pc += 4 } @@ -1047,7 +1047,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { psz := int32(0) var i int var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) @@ -1088,7 +1088,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index f1bc2583cb..0baf51973a 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -166,7 +166,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -209,7 +209,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -220,7 +220,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { jmp := obj.Appendp(pcdata, c.newprog) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(c.cursym.Func.Text.Link) + jmp.To.SetTarget(c.cursym.Func().Text.Link) jmp.Spadj = +framesize return end @@ -441,13 +441,13 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Historical way to mark NOFRAME. @@ -463,13 +463,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF @@ -477,18 +477,18 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF } } var q *obj.Prog var q1 *obj.Prog var retjmp *obj.LSym - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: - c.cursym.Func.Text = p + c.cursym.Func().Text = p c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { @@ -514,7 +514,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) } c.autosize += extrasize - c.cursym.Func.Locals += extrasize + c.cursym.Func().Locals += extrasize // low 32 bits for autosize // high 32 bits for extrasize @@ -524,14 +524,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Offset = 0 } - if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { if c.ctxt.Debugvlog { - c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name) + c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -641,7 +641,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGFP } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retjmp = p.To.Sym p.To = obj.Addr{} - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if c.autosize != 0 { p.As = AADD p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index 9abb31b558..328fb03b24 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -46,12 +46,12 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // we expect at the start of a new sequence. stmt := true line := int64(1) - pc := s.Func.Text.Pc + pc := s.Func().Text.Pc var lastpc int64 // last PC written to line table, not last PC in func name := "" prologue, wrotePrologue := false, false // Walk the progs, generating the DWARF table. - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) // If we're not at a real instruction, keep looping! if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { @@ -103,7 +103,7 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // text address before the end sequence op. If this isn't done, // GDB will assign a line number of zero the last row in the line // table, which we don't want. - lastlen := uint64(s.Size - (lastpc - s.Func.Text.Pc)) + lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) putpclcdelta(ctxt, dctxt, lines, lastlen, 0) dctxt.AddUint8(lines, 0) // start extended opcode dwarf.Uleb128put(dctxt, lines, 1) @@ -301,26 +301,27 @@ func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, if s.Type != objabi.STEXT { ctxt.Diag("dwarfSym of non-TEXT %v", s) } - if s.Func.dwarfInfoSym == nil { - s.Func.dwarfInfoSym = &LSym{ + fn := s.Func() + if fn.dwarfInfoSym == nil { + fn.dwarfInfoSym = &LSym{ Type: objabi.SDWARFFCN, } if ctxt.Flag_locationlists { - s.Func.dwarfLocSym = &LSym{ + fn.dwarfLocSym = &LSym{ Type: objabi.SDWARFLOC, } } - s.Func.dwarfRangesSym = &LSym{ + fn.dwarfRangesSym = &LSym{ Type: objabi.SDWARFRANGE, } - s.Func.dwarfDebugLinesSym = &LSym{ + fn.dwarfDebugLinesSym = &LSym{ Type: objabi.SDWARFLINES, } if s.WasInlined() { - s.Func.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) + fn.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) } } - return s.Func.dwarfInfoSym, s.Func.dwarfLocSym, s.Func.dwarfRangesSym, s.Func.dwarfAbsFnSym, s.Func.dwarfDebugLinesSym + return fn.dwarfInfoSym, fn.dwarfLocSym, fn.dwarfRangesSym, fn.dwarfAbsFnSym, fn.dwarfDebugLinesSym } func (s *LSym) Length(dwarfContext interface{}) int64 { @@ -331,7 +332,7 @@ func (s *LSym) Length(dwarfContext interface{}) int64 { // first instruction (prog) of the specified function. This will // presumably be the file in which the function is defined. func (ctxt *Link) fileSymbol(fn *LSym) *LSym { - p := fn.Func.Text + p := fn.Func().Text if p != nil { f, _ := linkgetlineFromPos(ctxt, p.Pos) fsym := ctxt.Lookup(f) @@ -405,8 +406,8 @@ func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath str if absfn.Size != 0 { ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s) } - if s.Func == nil { - s.Func = new(FuncInfo) + if s.Func() == nil { + s.NewFuncInfo() } scopes, _ := ctxt.DebugInfo(s, absfn, curfn) dwctxt := dwCtxt{ctxt} @@ -527,8 +528,8 @@ func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) { // wrapper generation as opposed to the main inlining phase) it's // possible that we didn't cache the abstract function sym for the // text symbol -- do so now if needed. See issue 38068. - if s.Func != nil && s.Func.dwarfAbsFnSym == nil { - s.Func.dwarfAbsFnSym = absfn + if fn := s.Func(); fn != nil && fn.dwarfAbsFnSym == nil { + fn.dwarfAbsFnSym = absfn } ft.precursor[s] = fnState{precursor: fn, absfn: absfn} diff --git a/src/cmd/internal/obj/ld.go b/src/cmd/internal/obj/ld.go index 4ba52c7785..5d6c000dc6 100644 --- a/src/cmd/internal/obj/ld.go +++ b/src/cmd/internal/obj/ld.go @@ -59,7 +59,7 @@ func mkfwd(sym *LSym) { } i := 0 - for p := sym.Func.Text; p != nil && p.Link != nil; p = p.Link { + for p := sym.Func().Text; p != nil && p.Link != nil; p = p.Link { i-- if i < 0 { i = LOG - 1 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index f14b691802..ae85dbbe4e 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -38,6 +38,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "fmt" + "log" "sync" ) @@ -400,7 +401,7 @@ type LSym struct { P []byte R []Reloc - Func *FuncInfo + Extra *interface{} // *FuncInfo if present Pkg string PkgIdx int32 @@ -433,6 +434,26 @@ type FuncInfo struct { FuncInfoSym *LSym } +// NewFuncInfo allocates and returns a FuncInfo for LSym. +func (s *LSym) NewFuncInfo() *FuncInfo { + if s.Extra != nil { + log.Fatalf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra) + } + f := new(FuncInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// Func returns the *FuncInfo associated with s, or else nil. +func (s *LSym) Func() *FuncInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FuncInfo) + return f +} + type InlMark struct { // When unwinding from an instruction in an inlined body, mark // where we should unwind to. diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go index 6107974745..fd29f9fa21 100644 --- a/src/cmd/internal/obj/mips/asm0.go +++ b/src/cmd/internal/obj/mips/asm0.go @@ -410,7 +410,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -455,7 +455,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -512,7 +512,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [4]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { @@ -529,7 +529,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. @@ -1302,7 +1302,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = OP_JMP(c.opirr(p.As), uint32(v)) if p.To.Sym == nil { - p.To.Sym = c.cursym.Func.Text.From.Sym + p.To.Sym = c.cursym.Func().Text.From.Sym p.To.Offset = p.To.Target().Pc } rel := obj.Addrel(c.cursym) diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go index f19facc00c..135a8df3aa 100644 --- a/src/cmd/internal/obj/mips/obj0.go +++ b/src/cmd/internal/obj/mips/obj0.go @@ -133,11 +133,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a switch for enabling/disabling instruction scheduling nosched := true - if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil { + if c.cursym.Func().Text == nil || c.cursym.Func().Text.Link == nil { return } - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -ctxt.FixedFrameSize() { // Historical way to mark NOFRAME. @@ -153,8 +153,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -162,7 +162,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * expand BECOME pseudo */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -203,7 +203,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { AJAL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case AJMP, @@ -267,7 +267,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -288,19 +288,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { - if c.cursym.Func.Text.From.Sym.NoSplit() { + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } } p.To.Offset = int64(autosize) - ctxt.FixedFrameSize() - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -344,7 +344,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -438,7 +438,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction p.To.Sym = nil - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AJMP p.From = obj.Addr{} @@ -540,7 +540,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if c.ctxt.Arch.Family == sys.MIPS { // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access - for p = c.cursym.Func.Text; p != nil; p = p1 { + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link if p.As != AMOVD { @@ -580,7 +580,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if nosched { // if we don't do instruction scheduling, simply add // NOP after each branch instruction. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + for p = c.cursym.Func().Text; p != nil; p = p.Link { if p.Mark&BRANCH != 0 { c.addnop(p) } @@ -589,10 +589,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // instruction scheduling - q = nil // p - 1 - q1 = c.cursym.Func.Text // top of block - o := 0 // count of instructions - for p = c.cursym.Func.Text; p != nil; p = p1 { + q = nil // p - 1 + q1 = c.cursym.Func().Text // top of block + o := 0 // count of instructions + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link o++ if p.Mark&NOSCHED != 0 { @@ -791,7 +791,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -805,7 +805,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.As = AJMP p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) p.Mark |= BRANCH // placeholder for q1's jump target diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index fa60c9ad6d..a08de891d3 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -189,8 +189,8 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // object file, and the Pcln variables haven't been filled in. As such, we // need to check that Pcsp exists, and assume the other pcln variables exist // as well. Tests like test/fixedbugs/issue22200.go demonstrate this issue. - if s.Func != nil && s.Func.Pcln.Pcsp != nil { - pc := &s.Func.Pcln + if fn := s.Func(); fn != nil && fn.Pcln.Pcsp != nil { + pc := &fn.Pcln w.Bytes(pc.Pcsp.P) w.Bytes(pc.Pcfile.P) w.Bytes(pc.Pcline.P) @@ -303,8 +303,8 @@ func (w *writer) Sym(s *LSym) { name = filepath.ToSlash(name) } var align uint32 - if s.Func != nil { - align = uint32(s.Func.Align) + if fn := s.Func(); fn != nil { + align = uint32(fn.Align) } if s.ContentAddressable() { // We generally assume data symbols are natually aligned, @@ -470,38 +470,38 @@ func (w *writer) Aux(s *LSym) { if s.Gotype != nil { w.aux1(goobj.AuxGotype, s.Gotype) } - if s.Func != nil { - w.aux1(goobj.AuxFuncInfo, s.Func.FuncInfoSym) + if fn := s.Func(); fn != nil { + w.aux1(goobj.AuxFuncInfo, fn.FuncInfoSym) - for _, d := range s.Func.Pcln.Funcdata { + for _, d := range fn.Pcln.Funcdata { w.aux1(goobj.AuxFuncdata, d) } - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { - w.aux1(goobj.AuxDwarfInfo, s.Func.dwarfInfoSym) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + w.aux1(goobj.AuxDwarfInfo, fn.dwarfInfoSym) } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { - w.aux1(goobj.AuxDwarfLoc, s.Func.dwarfLocSym) + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + w.aux1(goobj.AuxDwarfLoc, fn.dwarfLocSym) } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { - w.aux1(goobj.AuxDwarfRanges, s.Func.dwarfRangesSym) + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + w.aux1(goobj.AuxDwarfRanges, fn.dwarfRangesSym) } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { - w.aux1(goobj.AuxDwarfLines, s.Func.dwarfDebugLinesSym) + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + w.aux1(goobj.AuxDwarfLines, fn.dwarfDebugLinesSym) } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { - w.aux1(goobj.AuxPcsp, s.Func.Pcln.Pcsp) + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, fn.Pcln.Pcsp) } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { - w.aux1(goobj.AuxPcfile, s.Func.Pcln.Pcfile) + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, fn.Pcln.Pcfile) } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { - w.aux1(goobj.AuxPcline, s.Func.Pcln.Pcline) + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, fn.Pcln.Pcline) } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { - w.aux1(goobj.AuxPcinline, s.Func.Pcln.Pcinline) + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, fn.Pcln.Pcinline) } - for _, pcSym := range s.Func.Pcln.Pcdata { + for _, pcSym := range fn.Pcln.Pcdata { w.aux1(goobj.AuxPcdata, pcSym) } @@ -571,34 +571,34 @@ func nAuxSym(s *LSym) int { if s.Gotype != nil { n++ } - if s.Func != nil { + if fn := s.Func(); fn != nil { // FuncInfo is an aux symbol, each Funcdata is an aux symbol - n += 1 + len(s.Func.Pcln.Funcdata) - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { + n += 1 + len(fn.Pcln.Funcdata) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { n++ } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { n++ } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { n++ } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { n++ } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { n++ } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { n++ } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { n++ } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { n++ } - n += len(s.Func.Pcln.Pcdata) + n += len(fn.Pcln.Pcdata) } return n } @@ -620,15 +620,16 @@ func genFuncInfoSyms(ctxt *Link) { var b bytes.Buffer symidx := int32(len(ctxt.defs)) for _, s := range ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } o := goobj.FuncInfo{ - Args: uint32(s.Func.Args), - Locals: uint32(s.Func.Locals), - FuncID: objabi.FuncID(s.Func.FuncID), + Args: uint32(fn.Args), + Locals: uint32(fn.Locals), + FuncID: objabi.FuncID(fn.FuncID), } - pc := &s.Func.Pcln + pc := &fn.Pcln o.Pcsp = makeSymRef(preparePcSym(pc.Pcsp)) o.Pcfile = makeSymRef(preparePcSym(pc.Pcfile)) o.Pcline = makeSymRef(preparePcSym(pc.Pcline)) @@ -670,10 +671,10 @@ func genFuncInfoSyms(ctxt *Link) { isym.Set(AttrIndexed, true) symidx++ infosyms = append(infosyms, isym) - s.Func.FuncInfoSym = isym + fn.FuncInfoSym = isym b.Reset() - dwsyms := []*LSym{s.Func.dwarfRangesSym, s.Func.dwarfLocSym, s.Func.dwarfDebugLinesSym, s.Func.dwarfInfoSym} + dwsyms := []*LSym{fn.dwarfRangesSym, fn.dwarfLocSym, fn.dwarfDebugLinesSym, fn.dwarfInfoSym} for _, s := range dwsyms { if s == nil || s.Size == 0 { continue @@ -744,14 +745,15 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { } fmt.Fprintf(ctxt.Bso, "size=%d", s.Size) if s.Type == objabi.STEXT { - fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(s.Func.Args), uint64(s.Func.Locals), uint64(s.Func.FuncID)) + fn := s.Func() + fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(fn.Args), uint64(fn.Locals), uint64(fn.FuncID)) if s.Leaf() { fmt.Fprintf(ctxt.Bso, " leaf") } } fmt.Fprintf(ctxt.Bso, "\n") if s.Type == objabi.STEXT { - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc))) if ctxt.Debugasm > 1 { io.WriteString(ctxt.Bso, p.String()) diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index 09d520b4e9..01657dd4f6 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -118,7 +118,7 @@ func checkaddr(ctxt *Link, p *Prog, a *Addr) { } func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { checkaddr(ctxt, p, &p.From) if p.GetFrom3() != nil { checkaddr(ctxt, p, p.GetFrom3()) @@ -138,7 +138,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { if p.To.Sym != nil { continue } - q := sym.Func.Text + q := sym.Func().Text for q != nil && p.To.Offset != q.Pc { if q.Forwd != nil && p.To.Offset >= q.Forwd.Pc { q = q.Forwd @@ -164,7 +164,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { } // Collapse series of jumps to jumps. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { if p.To.Target() == nil { continue } diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index ce0d3714c0..67c4f9a62b 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -35,20 +35,21 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, val := int32(-1) oldval := val - if func_.Func.Text == nil { + fn := func_.Func() + if fn.Text == nil { // Return the emtpy symbol we've built so far. return sym } - pc := func_.Func.Text.Pc + pc := fn.Text.Pc if dbg { - ctxt.Logf("%6x %6d %v\n", uint64(pc), val, func_.Func.Text) + ctxt.Logf("%6x %6d %v\n", uint64(pc), val, fn.Text) } buf := make([]byte, binary.MaxVarintLen32) started := false - for p := func_.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { // Update val. If it's not changing, keep going. val = valfunc(ctxt, func_, val, p, 0, arg) @@ -107,7 +108,7 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, if started { if dbg { - ctxt.Logf("%6x done\n", uint64(func_.Func.Text.Pc+func_.Size)) + ctxt.Logf("%6x done\n", uint64(fn.Text.Pc+func_.Size)) } v := (func_.Size - pc) / int64(ctxt.Arch.MinLC) if v < 0 { @@ -257,12 +258,12 @@ func pctopcdata(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg i } func linkpcln(ctxt *Link, cursym *LSym) { - pcln := &cursym.Func.Pcln + pcln := &cursym.Func().Pcln pcln.UsedFiles = make(map[goobj.CUFileIndex]struct{}) npcdata := 0 nfuncdata := 0 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { // Find the highest ID of any used PCDATA table. This ignores PCDATA table // that consist entirely of "-1", since that's the assumed default value. // From.Offset is table ID @@ -288,11 +289,12 @@ func linkpcln(ctxt *Link, cursym *LSym) { // Check that all the Progs used as inline markers are still reachable. // See issue #40473. - inlMarkProgs := make(map[*Prog]struct{}, len(cursym.Func.InlMarks)) - for _, inlMark := range cursym.Func.InlMarks { + fn := cursym.Func() + inlMarkProgs := make(map[*Prog]struct{}, len(fn.InlMarks)) + for _, inlMark := range fn.InlMarks { inlMarkProgs[inlMark.p] = struct{}{} } - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if _, ok := inlMarkProgs[p]; ok { delete(inlMarkProgs, p) } @@ -303,7 +305,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { pcinlineState := new(pcinlineState) pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) - for _, inlMark := range cursym.Func.InlMarks { + for _, inlMark := range fn.InlMarks { pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) } pcln.InlTree = pcinlineState.localTree @@ -316,7 +318,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // tabulate which pc and func data we have. havepc := make([]uint32, (npcdata+31)/32) havefunc := make([]uint32, (nfuncdata+31)/32) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As == AFUNCDATA { if (havefunc[p.From.Offset/32]>>uint64(p.From.Offset%32))&1 != 0 { ctxt.Diag("multiple definitions for FUNCDATA $%d", p.From.Offset) @@ -344,7 +346,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // funcdata if nfuncdata > 0 { - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As != AFUNCDATA { continue } diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index 6e33f29959..eb54c67f6a 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -81,7 +81,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string continue } found := false - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.As == AFUNCDATA && p.From.Type == TYPE_CONST && p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps { found = true break @@ -89,7 +89,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string } if !found { - p := Appendp(s.Func.Text, newprog) + p := Appendp(s.Func().Text, newprog) p.As = AFUNCDATA p.From.Type = TYPE_CONST p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps @@ -120,15 +120,15 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { // func _() { } return } - if s.Func != nil { + if s.Func() != nil { ctxt.Diag("InitTextSym double init for %s", s.Name) } - s.Func = new(FuncInfo) + s.NewFuncInfo() if s.OnList() { ctxt.Diag("symbol %s listed multiple times", s.Name) } name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1) - s.Func.FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) + s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) s.Set(AttrOnList, true) s.Set(AttrDuplicateOK, flag&DUPOK != 0) s.Set(AttrNoSplit, flag&NOSPLIT != 0) @@ -185,7 +185,7 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit stack map. func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_StackMapIndex @@ -198,7 +198,7 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit register map. func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_RegMapIndex diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c2e8e9e9d0..dcabb3cd6a 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -663,8 +663,8 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // the function alignment is not changed which might // result in 16 byte alignment but that is still fine. // TODO: alignment on AIX - if ctxt.Headtype != objabi.Haix && cursym.Func.Align < 32 { - cursym.Func.Align = 32 + if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 { + cursym.Func().Align = 32 } default: ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) @@ -673,7 +673,7 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { } func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -722,7 +722,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -784,7 +784,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index c012762a18..3ab19de602 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -402,13 +402,13 @@ func (c *ctxt9) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt9{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -424,8 +424,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -435,7 +435,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var q *obj.Prog var q1 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -541,7 +541,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -598,7 +598,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -664,7 +664,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { rel.Type = objabi.R_ADDRPOWER_PCREL } - if !c.cursym.Func.Text.From.Sym.NoSplit() { + if !c.cursym.Func().Text.From.Sym.NoSplit() { q = c.stacksplit(q, autosize) // emit split check } @@ -732,14 +732,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = 24 } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -853,7 +853,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 || c.cursym.Name == "runtime.racecallbackthunk" { p.As = ABR p.From = obj.Addr{} @@ -1161,7 +1161,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { var morestacksym *obj.LSym if c.cursym.CFunc() { morestacksym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { morestacksym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { morestacksym = c.ctxt.Lookup("runtime.morestack") diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 841b30d85c..045c2250b5 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -427,7 +427,7 @@ func InvertBranch(as obj.As) obj.As { // instruction. Must be called after progedit. func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ACALL: return true @@ -499,12 +499,12 @@ func stackOffset(a *obj.Addr, stacksize int64) { // concrete, real RISC-V instructions or directive pseudo-ops like TEXT, // PCDATA, and FUNCDATA. func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } // Generate the prologue. - text := cursym.Func.Text + text := cursym.Func().Text if text.As != obj.ATEXT { ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") return @@ -538,12 +538,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { stacksize += ctxt.FixedFrameSize() } - cursym.Func.Args = text.To.Val.(int32) - cursym.Func.Locals = int32(stacksize) + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) prologue := text - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check } @@ -567,7 +567,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), X11 @@ -647,13 +647,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Update stack-based offsets. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { stackOffset(&p.From, stacksize) stackOffset(&p.To, stacksize) } // Additional instruction rewriting. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.AGETCALLERPC: if cursym.Leaf() { @@ -733,7 +733,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Rewrite MOV pseudo-instructions. This cannot be done in // progedit, as SP offsets need to be applied before we split // up some of the Addrs. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: rewriteMOV(ctxt, newprog, p) @@ -741,7 +741,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Split immediates larger than 12-bits. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { // $imm, REG, TO case AADDI, AANDI, AORI, AXORI: @@ -858,9 +858,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { rescan := false - setPCs(cursym.Func.Text, 0) + setPCs(cursym.Func().Text, 0) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: if p.To.Type != obj.TYPE_BRANCH { @@ -917,7 +917,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Now that there are no long branches, resolve branch and jump targets. // At this point, instruction rewriting which changes the number of // instructions will break everything--don't do it! - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: switch p.To.Type { @@ -940,7 +940,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Validate all instructions - this provides nice error messages. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { for _, ins := range instructionsForProg(p) { ins.validate(ctxt) } @@ -1068,7 +1068,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.To.Type = obj.TYPE_BRANCH if cursym.CFunc() { p.To.Sym = ctxt.Lookup("runtime.morestackc") - } else if !cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = ctxt.Lookup("runtime.morestack") @@ -1083,7 +1083,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.As = AJAL p.To = obj.Addr{Type: obj.TYPE_BRANCH} p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} - p.To.SetTarget(cursym.Func.Text.Link) + p.To.SetTarget(cursym.Func().Text.Link) // placeholder for to_done's jump target p = obj.Appendp(p, newprog) @@ -1926,7 +1926,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var symcode []uint32 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AJALR: if p.To.Sym != nil { @@ -1981,7 +1981,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Arch.ByteOrder.PutUint32(p, symcode[i]) } - obj.MarkUnsafePoints(ctxt, cursym.Func.Text, newprog, isUnsafePoint, nil) + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) } func isUnsafePoint(p *obj.Prog) bool { diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index cb3a2c3196..da14dd3c41 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -447,7 +447,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -473,7 +473,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.cursym.R[nrelocs0+i] = obj.Reloc{} } c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { changed = true @@ -504,7 +504,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, nil) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, nil) } // Return whether p is an unsafe point. diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 625bb0f7b4..3af5425b36 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -205,13 +205,13 @@ func (c *ctxtz) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxtz{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -227,8 +227,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -237,7 +237,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { */ var q *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: q = p @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, ABCL: q = p - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -294,7 +294,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: @@ -356,19 +356,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -461,7 +461,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} @@ -696,7 +696,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -709,7 +709,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.As = ABR p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) return p } diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index e5d7b2cbfd..0182773f8e 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -358,7 +358,8 @@ func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) { } func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent *LSym, aux *LSym)) { - pc := &fsym.Func.Pcln + fninfo := fsym.Func() + pc := &fninfo.Pcln if flag&traverseAux == 0 { // NB: should it become necessary to walk aux sym reloc references // without walking the aux syms themselves, this can be changed. @@ -389,7 +390,8 @@ func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent fn(fsym, filesym) } } - dwsyms := []*LSym{fsym.Func.dwarfRangesSym, fsym.Func.dwarfLocSym, fsym.Func.dwarfDebugLinesSym, fsym.Func.dwarfInfoSym} + + dwsyms := []*LSym{fninfo.dwarfRangesSym, fninfo.dwarfLocSym, fninfo.dwarfDebugLinesSym, fninfo.dwarfInfoSym} for _, dws := range dwsyms { if dws == nil || dws.Size == 0 { continue diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index a9e093a8ad..f7f66a1255 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -182,14 +182,14 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { return p } - framesize := s.Func.Text.To.Offset + framesize := s.Func().Text.To.Offset if framesize < 0 { panic("bad framesize") } - s.Func.Args = s.Func.Text.To.Val.(int32) - s.Func.Locals = int32(framesize) + s.Func().Args = s.Func().Text.To.Val.(int32) + s.Func().Locals = int32(framesize) - if s.Func.Text.From.Sym.Wrapper() { + if s.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -222,7 +222,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { Offset: 0, // panic.argp } - p := s.Func.Text + p := s.Func().Text p = appendp(p, AMOVD, gpanic, regAddr(REG_R0)) p = appendp(p, AGet, regAddr(REG_R0)) @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } if framesize > 0 { - p := s.Func.Text + p := s.Func().Text p = appendp(p, AGet, regAddr(REG_SP)) p = appendp(p, AI32Const, constAddr(framesize)) p = appendp(p, AI32Sub) @@ -260,8 +260,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction var tableIdxs []uint64 tablePC := int64(0) - base := ctxt.PosTable.Pos(s.Func.Text.Pos).Base() - for p := s.Func.Text; p != nil; p = p.Link { + base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base() + for p := s.Func().Text; p != nil; p = p.Link { prevBase := base base = ctxt.PosTable.Pos(p.Pos).Base() switch p.As { @@ -313,8 +313,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { tableIdxs = append(tableIdxs, uint64(numResumePoints)) s.Size = pc + 1 - if !s.Func.Text.From.Sym.NoSplit() { - p := s.Func.Text + if !s.Func().Text.From.Sym.NoSplit() { + p := s.Func().Text if framesize <= objabi.StackSmall { // small stack: SP <= stackguard @@ -352,7 +352,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { p = appendp(p, AIf) p = appendp(p, obj.ACALL, constAddr(0)) - if s.Func.Text.From.Sym.NeedCtxt() { + if s.Func().Text.From.Sym.NeedCtxt() { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack} } else { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt} @@ -365,7 +365,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var entryPointLoopBranches []*obj.Prog var unwindExitBranches []*obj.Prog currentDepth := 0 - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -562,7 +562,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.From.Name { case obj.NAME_AUTO: p.From.Offset += int64(framesize) @@ -712,7 +712,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } { - p := s.Func.Text + p := s.Func().Text if len(unwindExitBranches) > 0 { p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack for _, b := range unwindExitBranches { @@ -749,7 +749,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { currentDepth = 0 blockDepths := make(map[*obj.Prog]int) - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -850,7 +850,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { hasLocalSP = true var regUsed [MAXREG - MINREG]bool - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.From.Reg != 0 { regUsed[p.From.Reg-MINREG] = true } @@ -896,7 +896,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { updateLocalSP(w) } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case AGet: if p.From.Type != obj.TYPE_REG { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 4940c79eaa..c412f4945d 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2050,7 +2050,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("x86 tables not initialized, call x86.instinit first") } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { p.To.SetTarget(p) } @@ -2085,7 +2085,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } var count int64 // rough count of number of instructions - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { count++ p.Back = branchShort // use short branches first time through if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { @@ -2113,7 +2113,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { c = 0 var pPrev *obj.Prog nops = nops[:0] - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { c0 := c c = pjc.padJump(ctxt, s, p, c) @@ -2227,7 +2227,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // the first instruction.) return p.From.Index == REG_TLS } - obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil) + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) } } diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 18a6afcd77..e11fa13f65 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -563,11 +563,11 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } - p := cursym.Func.Text + p := cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 @@ -602,12 +602,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } textarg := int64(p.To.Val.(int32)) - cursym.Func.Args = int32(textarg) - cursym.Func.Locals = int32(p.To.Offset) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) // TODO(rsc): Remove. - if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { - cursym.Func.Locals = 0 + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 } // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. @@ -642,7 +642,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p = load_g_cx(ctxt, p, newprog) // load g into CX } - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check } @@ -690,7 +690,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REG_BP } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -808,7 +808,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var deltasp int32 - for p = cursym.Func.Text; p != nil; p = p.Link { + for p = cursym.Func().Text; p != nil; p = p.Link { pcsize := ctxt.Arch.RegSize switch p.From.Name { case obj.NAME_AUTO: @@ -1103,7 +1103,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA end := ctxt.EndUnsafePoint(jls, newprog, -1) var last *obj.Prog - for last = cursym.Func.Text; last.Link != nil; last = last.Link { + for last = cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -1117,7 +1117,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA pcdata = ctxt.StartUnsafePoint(pcdata, newprog) call := obj.Appendp(pcdata, newprog) - call.Pos = cursym.Func.Text.Pos + call.Pos = cursym.Func().Text.Pos call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH call.To.Name = obj.NAME_EXTERN @@ -1125,7 +1125,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA switch { case cursym.CFunc(): morestack = "runtime.morestackc" - case !cursym.Func.Text.From.Sym.NeedCtxt(): + case !cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = ctxt.Lookup(morestack) @@ -1144,7 +1144,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA jmp := obj.Appendp(pcdata, newprog) jmp.As = obj.AJMP jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(cursym.Func.Text.Link) + jmp.To.SetTarget(cursym.Func().Text.Link) jmp.Spadj = +framesize jls.To.SetTarget(call) -- cgit v1.3 From 308ec220a2e55e0948505881b051cbefa05cf696 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 16 Oct 2020 18:35:30 +0800 Subject: cmd/asm: refactor some encoding functions for load/store with immediate offset on arm64 Some of the current functions for encoding load/store with immediate offset instructions, like opstr12(), opstr9(), opldr12(), opldr9() and opldrpp(), etc., they have the same code, so this patch refactors them and merges them into two functions opstr() and opldr(). Change-Id: I60367f8b720b77c7ebe6d66905a950dcf7701836 Reviewed-on: https://go-review.googlesource.com/c/go/+/263479 Run-TryBot: fannie zhang TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: fannie zhang --- src/cmd/internal/obj/arm64/asm7.go | 153 +++++++++++-------------------------- 1 file changed, 44 insertions(+), 109 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 6b9fe27c05..92c5729d25 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -205,12 +205,8 @@ func SYSHINT(x uint32) uint32 { return SYSOP(0, 0, 3, 2, 0, x, 0x1F) } -func LDSTR12U(sz uint32, v uint32, opc uint32) uint32 { - return sz<<30 | 7<<27 | v<<26 | 1<<24 | opc<<22 -} - -func LDSTR9S(sz uint32, v uint32, opc uint32) uint32 { - return sz<<30 | 7<<27 | v<<26 | 0<<24 | opc<<22 +func LDSTR(sz uint32, v uint32, opc uint32) uint32 { + return sz<<30 | 7<<27 | v<<26 | opc<<22 } func LD2STR(o uint32) uint32 { @@ -3392,10 +3388,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { r = int(o.param) } if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ - o1 = c.olsr9s(p, int32(c.opstr9(p, p.As)), v, r, int(p.From.Reg)) + o1 = c.olsr9s(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) } else { v = int32(c.offsetshift(p, int64(v), int(o.a4))) - o1 = c.olsr12u(p, int32(c.opstr12(p, p.As)), v, r, int(p.From.Reg)) + o1 = c.olsr12u(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) } case 21: /* movT O(R),R -> ldrT */ @@ -3407,11 +3403,11 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { r = int(o.param) } if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ - o1 = c.olsr9s(p, int32(c.opldr9(p, p.As)), v, r, int(p.To.Reg)) + o1 = c.olsr9s(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) } else { v = int32(c.offsetshift(p, int64(v), int(o.a1))) //print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1); - o1 = c.olsr12u(p, int32(c.opldr12(p, p.As)), v, r, int(p.To.Reg)) + o1 = c.olsr12u(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) } case 22: /* movT (R)O!,R; movT O(R)!, R -> ldrT */ @@ -3424,7 +3420,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if v < -256 || v > 255 { c.ctxt.Diag("offset out of range [-255,254]: %v", p) } - o1 = c.opldrpp(p, p.As) + o1 = c.opldr(p, p.As) if o.scond == C_XPOST { o1 |= 1 << 10 } else { @@ -3442,7 +3438,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if v < -256 || v > 255 { c.ctxt.Diag("offset out of range [-255,254]: %v", p) } - o1 = LD2STR(c.opldrpp(p, p.As)) + o1 = c.opstr(p, p.As) if o.scond == C_XPOST { o1 |= 1 << 10 } else { @@ -3594,7 +3590,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) - o2 = c.olsr12u(p, int32(c.opstr12(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) + o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) break storeusepool: @@ -3638,7 +3634,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) break loadusepool: @@ -4127,7 +4123,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.To.Sym rel.Add = p.To.Offset rel.Type = objabi.R_ADDRARM64 - o3 = c.olsr12u(p, int32(c.opstr12(p, p.As)), 0, REGTMP, int(p.From.Reg)) + o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) case 65: /* movT addr,R -> adrp + add + movT (REGTMP), R */ o1 = ADR(1, 0, REGTMP) @@ -4138,7 +4134,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.From.Sym rel.Add = p.From.Offset rel.Type = objabi.R_ADDRARM64 - o3 = c.olsr12u(p, int32(c.opldr12(p, p.As)), 0, REGTMP, int(p.To.Reg)) + o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */ v := int32(c.regoff(&p.From)) @@ -4191,7 +4187,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 70: /* IE model movd $tlsvar, reg -> adrp REGTMP, 0; ldr reg, [REGTMP, #0] + relocs */ o1 = ADR(1, 0, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 @@ -4204,7 +4200,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 71: /* movd sym@GOT, reg -> adrp REGTMP, #0; ldr reg, [REGTMP, #0] + relocs */ o1 = ADR(1, 0, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 @@ -4765,7 +4761,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } } - o1 = c.opldrpp(p, p.As) + o1 = c.opirr(p, p.As) o1 |= (uint32(r&31) << 5) | (uint32((imm>>3)&0xfff) << 10) | (uint32(v & 31)) case 92: /* vmov Vn.[index], Vd.[index] */ @@ -6106,8 +6102,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: return 0xCE<<24 | 1<<23 + + case APRFM: + return 0xf9<<24 | 2<<22 } c.ctxt.Diag("%v: bad irr %v", p, a) @@ -6502,7 +6502,7 @@ func (c *ctxt7) opstore(p *obj.Prog, a obj.As) uint32 { } /* - * load/store register (unsigned immediate) C3.3.13 + * load/store register (scaled 12-bit unsigned immediate) C3.3.13 * these produce 64-bit values (when there's an option) */ func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { @@ -6512,49 +6512,12 @@ func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { o |= (v & 0xFFF) << 10 o |= int32(b&31) << 5 o |= int32(r & 31) + o |= 1 << 24 return uint32(o) } -func (c *ctxt7) opldr12(p *obj.Prog, a obj.As) uint32 { - switch a { - case AMOVD: - return LDSTR12U(3, 0, 1) /* imm12<<10 | Rn<<5 | Rt */ - - case AMOVW: - return LDSTR12U(2, 0, 2) - - case AMOVWU: - return LDSTR12U(2, 0, 1) - - case AMOVH: - return LDSTR12U(1, 0, 2) - - case AMOVHU: - return LDSTR12U(1, 0, 1) - - case AMOVB: - return LDSTR12U(0, 0, 2) - - case AMOVBU: - return LDSTR12U(0, 0, 1) - - case AFMOVS: - return LDSTR12U(2, 1, 1) - - case AFMOVD: - return LDSTR12U(3, 1, 1) - } - - c.ctxt.Diag("bad opldr12 %v\n%v", a, p) - return 0 -} - -func (c *ctxt7) opstr12(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr12(p, a)) -} - /* - * load/store register (unscaled immediate) C3.3.12 + * load/store register (unscaled 9-bit signed immediate) C3.3.12 */ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { if v < -256 || v > 255 { @@ -6566,76 +6529,48 @@ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { return uint32(o) } -func (c *ctxt7) opldr9(p *obj.Prog, a obj.As) uint32 { - switch a { - case AMOVD: - return LDSTR9S(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ - - case AMOVW: - return LDSTR9S(2, 0, 2) - - case AMOVWU: - return LDSTR9S(2, 0, 1) - - case AMOVH: - return LDSTR9S(1, 0, 2) - - case AMOVHU: - return LDSTR9S(1, 0, 1) - - case AMOVB: - return LDSTR9S(0, 0, 2) - - case AMOVBU: - return LDSTR9S(0, 0, 1) - - case AFMOVS: - return LDSTR9S(2, 1, 1) - - case AFMOVD: - return LDSTR9S(3, 1, 1) - } - - c.ctxt.Diag("bad opldr9 %v\n%v", a, p) - return 0 -} - -func (c *ctxt7) opstr9(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr9(p, a)) +// store(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed store. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { + return LD2STR(c.opldr(p, a)) } -func (c *ctxt7) opldrpp(p *obj.Prog, a obj.As) uint32 { +// load(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed load. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { switch a { case AMOVD: - return 3<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 /* simm9<<12 | Rn<<5 | Rt */ + return LDSTR(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ case AMOVW: - return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(2, 0, 2) case AMOVWU: - return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(2, 0, 1) case AMOVH: - return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(1, 0, 2) case AMOVHU: - return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(1, 0, 1) case AMOVB: - return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(0, 0, 2) case AMOVBU: - return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(0, 0, 1) case AFMOVS: - return 2<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 + return LDSTR(2, 1, 1) case AFMOVD: - return 3<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 - - case APRFM: - return 0xf9<<24 | 2<<22 - + return LDSTR(3, 1, 1) } c.ctxt.Diag("bad opldr %v\n%v", a, p) -- cgit v1.3 From 3089ef6bd7ecc7af4b23eb68e3d7879d340aa673 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Mon, 28 Sep 2020 18:58:51 +0800 Subject: cmd/asm: add several arm64 SIMD instructions This patch enables VSLI, VUADDW(2), VUSRA and FMOVQ SIMD instructions required by the issue #40725. And the GNU syntax of 'FMOVQ' is 128-bit ldr/str(immediate, simd&fp). Add test cases. Fixes #40725 Change-Id: Ide968ef4a9385ce4cd8f69bce854289014d30456 Reviewed-on: https://go-review.googlesource.com/c/go/+/258397 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 11 + src/cmd/asm/internal/asm/testdata/arm64error.s | 16 +- src/cmd/internal/obj/arm64/a.out.go | 59 +++-- src/cmd/internal/obj/arm64/anames.go | 5 + src/cmd/internal/obj/arm64/anames7.go | 10 + src/cmd/internal/obj/arm64/asm7.go | 300 ++++++++++++++++++------- 6 files changed, 290 insertions(+), 111 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7f495b90bb..b6c22e0d6f 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -454,6 +454,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD F20, (R2) // 540000fd FMOVD.P F20, 8(R1) // 348400fc FMOVD.W 8(R1), F20 // 348c40fc + FMOVQ.P F13, 11(R10) // 4db5803c + FMOVQ.W F15, 11(R20) // 8fbe803c + FMOVQ.P 11(R10), F13 // 4db5c03c + FMOVQ.W 11(R20), F15 // 8fbec03c + FMOVQ F10, 65520(R10) // 4afdbf3d + FMOVQ F11, 64(RSP) // eb13803d + FMOVQ F11, 8(R20) // 8b82803c + FMOVQ F11, 4(R20) // 8b42803c + FMOVQ 32(R5), F2 // a208c03d + FMOVQ 65520(R10), F10 // 4afdff3d + FMOVQ 64(RSP), F11 // eb13c03d PRFM (R2), PLDL1KEEP // 400080f9 PRFM 16(R2), PLDL1KEEP // 400880f9 PRFM 48(R6), PSTL2STRM // d31880f9 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 20b1f3e9f0..c3a617066a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -87,13 +87,13 @@ TEXT errors(SB),$0 VLD1.P 32(R1), [V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VLD1.P 48(R1), [V7.S4, V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VPMULL V1.D1, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.B16, V2.B16, V3.H8 // ERROR "invalid arrangement" + VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.B16, V2.B16, V3.H8 // ERROR "operand mismatch" VPMULL2 V1.D2, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "invalid arrangement" + VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "operand mismatch" VEXT $8, V1.B16, V2.B8, V2.B16 // ERROR "invalid arrangement" VEXT $8, V1.H8, V2.H8, V2.H8 // ERROR "invalid arrangement" VRBIT V1.B16, V2.B8 // ERROR "invalid arrangement" @@ -353,4 +353,8 @@ TEXT errors(SB),$0 VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" + VUADDW V9.B8, V12.H8, V14.B8 // ERROR "invalid arrangement" + VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" + VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" + VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 33319e48df..98504353e2 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -419,27 +419,33 @@ const ( C_SBRA // for TYPE_BRANCH C_LBRA - C_ZAUTO // 0(RSP) - C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 - C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 - C_NSAUTO // -256 <= x < 0 - C_NPAUTO // -512 <= x < 0, 0 mod 8 - C_NAUTO4K // -4095 <= x < 0 - C_PSAUTO_8 // 0 to 255, 0 mod 8 - C_PSAUTO_4 // 0 to 255, 0 mod 4 - C_PSAUTO // 0 to 255 - C_PPAUTO // 0 to 504, 0 mod 8 - C_UAUTO4K_8 // 0 to 4095, 0 mod 8 - C_UAUTO4K_4 // 0 to 4095, 0 mod 4 - C_UAUTO4K_2 // 0 to 4095, 0 mod 2 - C_UAUTO4K // 0 to 4095 - C_UAUTO8K_8 // 0 to 8190, 0 mod 8 - C_UAUTO8K_4 // 0 to 8190, 0 mod 4 - C_UAUTO8K // 0 to 8190, 0 mod 2 - C_UAUTO16K_8 // 0 to 16380, 0 mod 8 - C_UAUTO16K // 0 to 16380, 0 mod 4 - C_UAUTO32K // 0 to 32760, 0 mod 8 - C_LAUTO // any other 32-bit constant + C_ZAUTO // 0(RSP) + C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 + C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 + C_NSAUTO // -256 <= x < 0 + C_NPAUTO // -512 <= x < 0, 0 mod 8 + C_NAUTO4K // -4095 <= x < 0 + C_PSAUTO_8 // 0 to 255, 0 mod 8 + C_PSAUTO_4 // 0 to 255, 0 mod 4 + C_PSAUTO // 0 to 255 + C_PPAUTO_16 // 0 to 504, 0 mod 16 + C_PPAUTO // 0 to 504, 0 mod 8 + C_UAUTO4K_16 // 0 to 4095, 0 mod 16 + C_UAUTO4K_8 // 0 to 4095, 0 mod 8 + C_UAUTO4K_4 // 0 to 4095, 0 mod 4 + C_UAUTO4K_2 // 0 to 4095, 0 mod 2 + C_UAUTO4K // 0 to 4095 + C_UAUTO8K_16 // 0 to 8190, 0 mod 16 + C_UAUTO8K_8 // 0 to 8190, 0 mod 8 + C_UAUTO8K_4 // 0 to 8190, 0 mod 4 + C_UAUTO8K // 0 to 8190, 0 mod 2 + C_PSAUTO + C_UAUTO16K_16 // 0 to 16380, 0 mod 16 + C_UAUTO16K_8 // 0 to 16380, 0 mod 8 + C_UAUTO16K // 0 to 16380, 0 mod 4 + C_PSAUTO + C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO + C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO + C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO + C_LAUTO // any other 32-bit constant C_SEXT1 // 0 to 4095, direct C_SEXT2 // 0 to 8190 @@ -457,17 +463,23 @@ const ( C_PSOREG_8 C_PSOREG_4 C_PSOREG + C_PPOREG_16 C_PPOREG + C_UOREG4K_16 C_UOREG4K_8 C_UOREG4K_4 C_UOREG4K_2 C_UOREG4K + C_UOREG8K_16 C_UOREG8K_8 C_UOREG8K_4 C_UOREG8K + C_UOREG16K_16 C_UOREG16K_8 C_UOREG16K + C_UOREG32K_16 C_UOREG32K + C_UOREG64K C_LOREG C_ADDR // TODO(aram): explain difference from C_VCONADDR @@ -873,6 +885,7 @@ const ( AFDIVS AFLDPD AFLDPS + AFMOVQ AFMOVD AFMOVS AVMOVQ @@ -1001,6 +1014,7 @@ const ( AVUZP2 AVSHL AVSRI + AVSLI AVBSL AVBIT AVTBL @@ -1008,6 +1022,9 @@ const ( AVZIP1 AVZIP2 AVCMTST + AVUADDW2 + AVUADDW + AVUSRA ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index e5534e26b9..126eefd032 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -379,6 +379,7 @@ var Anames = []string{ "FDIVS", "FLDPD", "FLDPS", + "FMOVQ", "FMOVD", "FMOVS", "VMOVQ", @@ -507,6 +508,7 @@ var Anames = []string{ "VUZP2", "VSHL", "VSRI", + "VSLI", "VBSL", "VBIT", "VTBL", @@ -514,5 +516,8 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", + "VUADDW2", + "VUADDW", + "VUSRA", "LAST", } diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index 96c9f788d9..f7e99517ce 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -51,16 +51,21 @@ var cnames7 = []string{ "PSAUTO_4", "PSAUTO", "PPAUTO", + "UAUTO4K_16", "UAUTO4K_8", "UAUTO4K_4", "UAUTO4K_2", "UAUTO4K", + "UAUTO8K_16", "UAUTO8K_8", "UAUTO8K_4", "UAUTO8K", + "UAUTO16K_16", "UAUTO16K_8", "UAUTO16K", + "UAUTO32K_8", "UAUTO32K", + "UAUTO64K", "LAUTO", "SEXT1", "SEXT2", @@ -78,16 +83,21 @@ var cnames7 = []string{ "PSOREG_4", "PSOREG", "PPOREG", + "UOREG4K_16", "UOREG4K_8", "UOREG4K_4", "UOREG4K_2", "UOREG4K", + "UOREG8K_16", "UOREG8K_8", "UOREG8K_4", "UOREG8K", + "UOREG16K_16", "UOREG16K_8", "UOREG16K", + "UOREG32K_16", "UOREG32K", + "UOREG64K", "LOREG", "ADDR", "GOTADDR", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 92c5729d25..8cbf5e719f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -483,6 +483,7 @@ var optab = []Optab{ {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, 105, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -509,6 +510,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UAUTO64K, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UOREG64K, 20, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, @@ -526,6 +529,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, /* scaled 12-bit unsigned displacement load */ {AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -543,6 +548,8 @@ var optab = []Optab{ {AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_UAUTO32K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_UOREG32K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_UAUTO64K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_UOREG64K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement load */ {AMOVB, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -560,6 +567,8 @@ var optab = []Optab{ {AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* long displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, @@ -577,6 +586,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, /* long displacement load */ {AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0}, @@ -594,6 +605,8 @@ var optab = []Optab{ {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -603,6 +616,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, @@ -611,6 +625,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, /* pre/post-indexed store (unscaled, signed 9-bit offset) */ {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, @@ -620,6 +635,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, @@ -628,6 +644,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, /* load with shifted or extended register offset */ {AMOVD, C_ROFF, C_NONE, C_NONE, C_REG, 98, 4, 0, 0, 0}, @@ -1207,37 +1224,49 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { C_PSAUTO, C_PSAUTO_8, C_PSAUTO_4, + C_PPAUTO_16, C_PPAUTO, + C_UAUTO4K_16, C_UAUTO4K_8, C_UAUTO4K_4, C_UAUTO4K_2, C_UAUTO4K, + C_UAUTO8K_16, C_UAUTO8K_8, C_UAUTO8K_4, C_UAUTO8K, + C_UAUTO16K_16, C_UAUTO16K_8, C_UAUTO16K, + C_UAUTO32K_16, C_UAUTO32K, + C_UAUTO64K, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO, C_NAUTO4K, C_LAUTO, - C_PPOREG, C_PSOREG, - C_PSOREG_4, C_PSOREG_8, + C_PSOREG_4, + C_PPOREG_16, + C_PPOREG, + C_UOREG4K_16, C_UOREG4K_8, C_UOREG4K_4, C_UOREG4K_2, C_UOREG4K, + C_UOREG8K_16, C_UOREG8K_8, C_UOREG8K_4, C_UOREG8K, + C_UOREG16K_16, C_UOREG16K_8, C_UOREG16K, + C_UOREG32K_16, C_UOREG32K, + C_UOREG64K, C_NSOREG_8, C_NSOREG_4, C_NSOREG, @@ -1532,10 +1561,18 @@ func autoclass(l int64) int { } return C_PSAUTO } - if l <= 504 && l&7 == 0 { - return C_PPAUTO + if l <= 504 { + if l&15 == 0 { + return C_PPAUTO_16 + } + if l&7 == 0 { + return C_PPAUTO + } } if l <= 4095 { + if l&15 == 0 { + return C_UAUTO4K_16 + } if l&7 == 0 { return C_UAUTO4K_8 } @@ -1548,6 +1585,9 @@ func autoclass(l int64) int { return C_UAUTO4K } if l <= 8190 { + if l&15 == 0 { + return C_UAUTO8K_16 + } if l&7 == 0 { return C_UAUTO8K_8 } @@ -1559,6 +1599,9 @@ func autoclass(l int64) int { } } if l <= 16380 { + if l&15 == 0 { + return C_UAUTO16K_16 + } if l&7 == 0 { return C_UAUTO16K_8 } @@ -1566,8 +1609,16 @@ func autoclass(l int64) int { return C_UAUTO16K } } - if l <= 32760 && (l&7) == 0 { - return C_UAUTO32K + if l <= 32760 { + if l&15 == 0 { + return C_UAUTO32K_16 + } + if l&7 == 0 { + return C_UAUTO32K + } + } + if l <= 65520 && (l&15) == 0 { + return C_UAUTO64K } return C_LAUTO } @@ -1595,6 +1646,8 @@ func (c *ctxt7) offsetshift(p *obj.Prog, v int64, cls int) int64 { s = 2 case C_UAUTO32K, C_UOREG32K: s = 3 + case C_UAUTO64K, C_UOREG64K: + s = 4 default: c.ctxt.Diag("bad class: %v\n%v", DRconv(cls), p) } @@ -2126,46 +2179,66 @@ func cmp(a int, b int) bool { case C_PPAUTO: switch b { - case C_ZAUTO, C_PSAUTO_8: + case C_ZAUTO, C_PSAUTO_8, C_PPAUTO_16: return true } case C_UAUTO4K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16: return true } case C_UAUTO8K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16: return true } case C_UAUTO16K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO16K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16: return true } case C_UAUTO32K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_8, C_UAUTO8K_8, C_UAUTO16K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_UAUTO64K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16, + C_UAUTO32K_16: return true } case C_LAUTO: switch b { - case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, - C_NAUTO4K, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, - C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, - C_UAUTO16K, C_UAUTO16K_8, - C_UAUTO32K: + case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, C_NAUTO4K, + C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K, C_UAUTO32K_16, + C_UAUTO64K: return true } @@ -2192,6 +2265,11 @@ func cmp(a int, b int) bool { return true } + case C_PSOREG_8: + if b == C_ZOREG { + return true + } + case C_PSOREG_4: switch b { case C_ZOREG, C_PSOREG_8: @@ -2206,48 +2284,66 @@ func cmp(a int, b int) bool { case C_PPOREG: switch b { - case C_ZOREG, C_PSOREG_8: + case C_ZOREG, C_PSOREG_8, C_PPOREG_16: return true } case C_UOREG4K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16: return true } case C_UOREG8K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K_4, C_UOREG8K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16: return true } case C_UOREG16K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, - C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16: return true } case C_UOREG32K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_8, C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_UOREG64K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16, + C_UOREG32K_16: return true } case C_LOREG: switch b { - case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, - C_NOREG4K, C_PSOREG_4, C_PSOREG_8, C_PSOREG, C_PPOREG, - C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, - C_UOREG16K, C_UOREG16K_8, - C_UOREG32K: + case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, C_NOREG4K, + C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K, C_UOREG32K_16, + C_UOREG64K: return true } @@ -2675,7 +2771,8 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: + case AFMOVQ, AFMOVD, AFMOVS, + AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -2802,6 +2899,8 @@ func buildop(ctxt *obj.Link) { case AVUSHR: oprangeset(AVSHL, t) oprangeset(AVSRI, t) + oprangeset(AVSLI, t) + oprangeset(AVUSRA, t) case AVREV32: oprangeset(AVCNT, t) @@ -2829,6 +2928,9 @@ func buildop(ctxt *obj.Link) { case AVEOR3: oprangeset(AVBCAX, t) + case AVUADDW: + oprangeset(AVUADDW2, t) + case ASHA1H, AVCNT, AVMOV, @@ -4805,47 +4907,31 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 93: /* vpmull{2} Vm., Vn., Vd */ - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - a := int((p.Reg >> 5) & 15) + case 93: /* vpmull{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if af != a { + c.ctxt.Diag("invalid arrangement: %v", p) + } var Q, size uint32 - if p.As == AVPMULL { - Q = 0 - } else { + if p.As == AVPMULL2 { Q = 1 } - - var fArng int - switch at { - case ARNG_8H: - if Q == 0 { - fArng = ARNG_8B - } else { - fArng = ARNG_16B - } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): size = 0 - case ARNG_1Q: - if Q == 0 { - fArng = ARNG_1D - } else { - fArng = ARNG_2D - } + case pack(0, ARNG_1Q, ARNG_1D), pack(1, ARNG_1Q, ARNG_2D): size = 3 default: - c.ctxt.Diag("invalid arrangement on Vd.: %v", p) - } - - if af != a || af != fArng { - c.ctxt.Diag("invalid arrangement: %v", p) + c.ctxt.Diag("operand mismatch: %v\n", p) } o1 = c.oprrr(p, p.As) rf := int((p.From.Reg) & 31) rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) case 94: /* vext $imm4, Vm., Vn., Vd. */ @@ -4883,7 +4969,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= ((Q & 1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 95: /* vushr $shift, Vn., Vd. */ + case 95: /* vushr/vshl/vsri/vsli/vusra $shift, Vn., Vd. */ at := int((p.To.Reg >> 5) & 15) af := int((p.Reg >> 5) & 15) shift := int(p.From.Offset) @@ -4920,14 +5006,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } imm := 0 - switch p.As { - case AVUSHR, AVSRI: + case AVUSHR, AVSRI, AVUSRA: imm = esize*2 - shift if imm < esize || imm > imax { c.ctxt.Diag("shift out of range: %v", p) } - case AVSHL: + case AVSHL, AVSLI: imm = esize + shift if imm > imax { c.ctxt.Diag("shift out of range: %v", p) @@ -4940,7 +5025,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) rf := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | (uint32(imm&127) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + o1 |= ((Q & 1) << 30) | (uint32(imm&0x7f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) case 96: /* vst1 Vt1.[index], offset(Rn) */ af := int((p.From.Reg >> 5) & 15) @@ -5169,11 +5254,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { shift = 0 } - pack := func(q, x, y uint8) uint32 { - return uint32(q)<<16 | uint32(x)<<8 | uint32(y) - } - - var Q uint8 = uint8(o1>>30) & 1 + Q := (o1 >> 30) & 1 var immh, width uint8 switch pack(Q, af, at) { case pack(0, ARNG_8B, ARNG_8H): @@ -5195,6 +5276,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ ta := (p.From.Reg >> 5) & 15 tm := (p.Reg >> 5) & 15 @@ -5240,6 +5322,35 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + case 105: /* vuaddw{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if at != a { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q, size uint32 + if p.As == AVUADDW2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_4S, ARNG_4H), pack(1, ARNG_4S, ARNG_8H): + size = 1 + case pack(0, ARNG_2D, ARNG_2S), pack(1, ARNG_2D, ARNG_4S): + size = 2 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5898,6 +6009,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVUZP2: return 7<<25 | 1<<14 | 3<<11 + + case AVUADDW, AVUADDW2: + return 0x17<<25 | 1<<21 | 1<<12 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6097,6 +6211,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + case AVSLI: + return 0x5E<<23 | 21<<10 + case AVUSHLL, AVUXTL: return 1<<29 | 15<<24 | 0x29<<10 @@ -6106,6 +6223,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVXAR: return 0xCE<<24 | 1<<23 + case AVUSRA: + return 1<<29 | 15<<24 | 5<<10 + case APRFM: return 0xf9<<24 | 2<<22 } @@ -6535,7 +6655,14 @@ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { // pre/post-indexed store. // and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr(p, a)) + enc := c.opldr(p, a) + switch p.As { + case AFMOVQ: + enc = enc &^ (1 << 22) + default: + enc = LD2STR(enc) + } + return enc } // load(immediate) @@ -6571,6 +6698,9 @@ func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { case AFMOVD: return LDSTR(3, 1, 1) + + case AFMOVQ: + return LDSTR(0, 1, 3) } c.ctxt.Diag("bad opldr %v\n%v", a, p) @@ -7063,10 +7193,13 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { */ func movesize(a obj.As) int { switch a { - case AMOVD: + case AFMOVQ: + return 4 + + case AMOVD, AFMOVD: return 3 - case AMOVW, AMOVWU: + case AMOVW, AMOVWU, AFMOVS: return 2 case AMOVH, AMOVHU: @@ -7075,12 +7208,6 @@ func movesize(a obj.As) int { case AMOVB, AMOVBU: return 0 - case AFMOVS: - return 2 - - case AFMOVD: - return 3 - default: return -1 } @@ -7145,3 +7272,8 @@ func (c *ctxt7) encRegShiftOrExt(a *obj.Addr, r int16) uint32 { return 0 } + +// pack returns the encoding of the "Q" field and two arrangement specifiers. +func pack(q uint32, arngA, arngB uint8) uint32 { + return uint32(q)<<16 | uint32(arngA)<<8 | uint32(arngB) +} -- cgit v1.3 From 15131caeaa83e57fd8bdf87dde2801443f5602db Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 8 May 2020 10:51:29 +0800 Subject: cmd/internal/obj/arm64: add CASx/CASPx instructions This patch adds support for CASx and CASPx atomic instructions. go syntax gnu syntax CASD Rs, (Rn|RSP), Rt => cas Xs, Xt, (Xn|SP) CASALW Rs, (Rn|RSP), Rt => casal Ws, Wt, (Xn|SP) CASPD (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Xs, Xs+1, Xt, Xt+1, (Xn|SP) CASPW (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Ws, Ws+1, Wt, Wt+1, (Xn|SP) This patch changes the type of prog.RestArgs from "[]Addr" to "[]struct{Addr, Pos}", Pos is a enum, indicating the position of the operand. This patch also adds test cases. Change-Id: Ib971cfda7890b7aa895d17bab22dea326c7fcaa4 Reviewed-on: https://go-review.googlesource.com/c/go/+/233277 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 13 ++- src/cmd/asm/internal/asm/asm.go | 18 +++- src/cmd/asm/internal/asm/testdata/arm64.s | 18 ++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 4 + src/cmd/compile/internal/s390x/ssa.go | 12 +-- src/cmd/internal/obj/arm64/a.out.go | 14 +++ src/cmd/internal/obj/arm64/anames.go | 14 +++ src/cmd/internal/obj/arm64/asm7.go | 132 +++++++++++++++++++------ src/cmd/internal/obj/link.go | 84 +++++++++++----- src/cmd/internal/obj/riscv/obj.go | 8 +- src/cmd/internal/obj/s390x/asmz.go | 2 +- src/cmd/internal/obj/util.go | 15 ++- src/cmd/internal/obj/x86/asm6.go | 6 +- 13 files changed, 266 insertions(+), 74 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index e643889aef..e557630ca6 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -75,7 +75,7 @@ func IsARM64STLXR(op obj.As) bool { arm64.ASTXP, arm64.ASTXPW, arm64.ASTLXP, arm64.ASTLXPW: return true } - // atomic instructions + // LDADDx/SWPx/CASx atomic instructions if arm64.IsAtomicInstruction(op) { return true } @@ -93,6 +93,17 @@ func IsARM64TBL(op obj.As) bool { return false } +// IsARM64CASP reports whether the op (as defined by an arm64.A* +// constant) is one of the CASP-like instructions, and its 2nd +// destination is a register pair that require special handling. +func IsARM64CASP(op obj.As) bool { + switch op { + case arm64.ACASPD, arm64.ACASPW: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index b9efa454ed..c4032759bb 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -637,6 +637,18 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.From = a[0] prog.SetFrom3(a[1]) prog.To = a[2] + case arch.IsARM64CASP(op): + prog.From = a[0] + prog.To = a[1] + // both 1st operand and 3rd operand are (Rs, Rs+1) register pair. + // And the register pair must be contiguous. + if (a[0].Type != obj.TYPE_REGREG) || (a[2].Type != obj.TYPE_REGREG) { + p.errorf("invalid addressing modes for 1st or 3rd operand to %s instruction, must be register pair", op) + return + } + // For ARM64 CASP-like instructions, its 2nd destination operand is register pair(Rt, Rt+1) that can + // not fit into prog.RegTo2, so save it to the prog.RestArgs. + prog.SetTo2(a[2]) default: prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) @@ -725,7 +737,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2]} + prog.SetRestArgs([]obj.Addr{a[1], a[2]}) prog.To = a[3] break } @@ -808,13 +820,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } if p.arch.Family == sys.S390X { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7943990e16..5547cf634c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -777,6 +777,24 @@ again: LDORLH R5, (RSP), R7 // e7336578 LDORLB R5, (R6), R7 // c7306538 LDORLB R5, (RSP), R7 // e7336538 + CASD R1, (R2), ZR // 5f7ca1c8 + CASW R1, (RSP), ZR // ff7fa188 + CASB ZR, (R5), R3 // a37cbf08 + CASH R3, (RSP), ZR // ff7fa348 + CASW R5, (R7), R6 // e67ca588 + CASLD ZR, (RSP), R8 // e8ffbfc8 + CASLW R9, (R10), ZR // 5ffda988 + CASAD R7, (R11), R15 // 6f7de7c8 + CASAW R10, (RSP), R19 // f37fea88 + CASALD R5, (R6), R7 // c7fce5c8 + CASALD R5, (RSP), R7 // e7ffe5c8 + CASALW R5, (R6), R7 // c7fce588 + CASALW R5, (RSP), R7 // e7ffe588 + CASALH ZR, (R5), R8 // a8fcff48 + CASALB R8, (R9), ZR // 3ffde808 + CASPD (R30, ZR), (RSP), (R8, R9) // e87f3e48 + CASPW (R6, R7), (R8), (R4, R5) // 047d2608 + CASPD (R2, R3), (R2), (R8, R9) // 487c2248 // RET RET diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index c3a617066a..99e4d62d25 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -357,4 +357,8 @@ TEXT errors(SB),$0 VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" + CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register" + CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register" + CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous" + CASPD (R2, R3), (R2), (R8, R10) // ERROR "destination register pair must be contiguous" RET diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index e23b31f385..84b9f491e4 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -182,11 +182,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { i := v.Aux.(s390x.RotateParams) p := s.Prog(v.Op.Asm()) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(i.Start)} - p.RestArgs = []obj.Addr{ + p.SetRestArgs([]obj.Addr{ {Type: obj.TYPE_CONST, Offset: int64(i.End)}, {Type: obj.TYPE_CONST, Offset: int64(i.Amount)}, {Type: obj.TYPE_REG, Reg: r2}, - } + }) p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, @@ -913,7 +913,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(s390x.NotEqual & s390x.NotUnordered) // unordered is not possible p.Reg = s390x.REG_R3 - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: 0}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) if b.Succs[0].Block() != next { s.Br(s390x.ABR, b.Succs[0].Block()) } @@ -956,17 +956,17 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}) case ssa.BlockS390XCGIJ, ssa.BlockS390XCIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}) case ssa.BlockS390XCLGIJ, ssa.BlockS390XCLIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}) default: b.Fatalf("branch not implemented: %s", b.LongString()) } diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 98504353e2..5844b71ca7 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -843,6 +843,20 @@ const ( ASWPLW ASWPLH ASWPLB + ACASD + ACASW + ACASH + ACASB + ACASAD + ACASAW + ACASLD + ACASLW + ACASALD + ACASALW + ACASALH + ACASALB + ACASPD + ACASPW ABEQ ABNE ABCS diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 126eefd032..fb216f9a94 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -337,6 +337,20 @@ var Anames = []string{ "SWPLW", "SWPLH", "SWPLB", + "CASD", + "CASW", + "CASH", + "CASB", + "CASAD", + "CASAW", + "CASLD", + "CASLW", + "CASALD", + "CASALW", + "CASALH", + "CASALB", + "CASPD", + "CASPW", "BEQ", "BNE", "BCS", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 8cbf5e719f..4fc62d5c7f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -80,12 +80,17 @@ type Optab struct { } func IsAtomicInstruction(as obj.As) bool { - _, ok := atomicInstructions[as] - return ok + if _, ok := atomicLDADD[as]; ok { + return true + } + if _, ok := atomicSWP[as]; ok { + return true + } + return false } // known field values of an instruction. -var atomicInstructions = map[obj.As]uint32{ +var atomicLDADD = map[obj.As]uint32{ ALDADDAD: 3<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAW: 2<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAH: 1<<30 | 0x1c5<<21 | 0x00<<10, @@ -150,22 +155,41 @@ var atomicInstructions = map[obj.As]uint32{ ALDORLW: 2<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLH: 1<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLB: 0<<30 | 0x1c3<<21 | 0x0c<<10, - ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, - ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, - ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, - ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, - ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, - ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, - ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, +} + +var atomicSWP = map[obj.As]uint32{ + ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, + ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, + ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, + ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, + ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, + ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, + ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, + ACASD: 3<<30 | 0x45<<21 | 0x1f<<10, + ACASW: 2<<30 | 0x45<<21 | 0x1f<<10, + ACASH: 1<<30 | 0x45<<21 | 0x1f<<10, + ACASB: 0<<30 | 0x45<<21 | 0x1f<<10, + ACASAD: 3<<30 | 0x47<<21 | 0x1f<<10, + ACASAW: 2<<30 | 0x47<<21 | 0x1f<<10, + ACASLD: 3<<30 | 0x45<<21 | 0x3f<<10, + ACASLW: 2<<30 | 0x45<<21 | 0x3f<<10, + ACASALD: 3<<30 | 0x47<<21 | 0x3f<<10, + ACASALW: 2<<30 | 0x47<<21 | 0x3f<<10, + ACASALH: 1<<30 | 0x47<<21 | 0x3f<<10, + ACASALB: 0<<30 | 0x47<<21 | 0x3f<<10, +} +var atomicCASP = map[obj.As]uint32{ + ACASPD: 1<<30 | 0x41<<21 | 0x1f<<10, + ACASPW: 0<<30 | 0x41<<21 | 0x1f<<10, } var oprange [ALAST & obj.AMask][]Optab @@ -794,8 +818,10 @@ var optab = []Optab{ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST}, {ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, - {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG - {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZOREG, 106, 4, 0, 0, 0}, // RegTo2=C_REGREG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZAUTO, 106, 4, REGSP, 0, 0}, // RegTo2=C_REGREG {ALDAR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDAXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, @@ -2011,7 +2037,7 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab { a1-- a3 := C_NONE + 1 - if p.GetFrom3() != nil { + if p.GetFrom3() != nil && p.RestArgs[0].Pos == 0 { a3 = int(p.GetFrom3().Class) if a3 == 0 { a3 = c.aclass(p.GetFrom3()) + 1 @@ -2496,10 +2522,18 @@ func buildop(ctxt *obj.Link) { oprangeset(AMOVZW, t) case ASWPD: - for i := range atomicInstructions { + for i := range atomicLDADD { + oprangeset(i, t) + } + for i := range atomicSWP { + if i == ASWPD { + continue + } oprangeset(i, t) } + case ACASPD: + oprangeset(ACASPW, t) case ABEQ: oprangeset(ABNE, t) oprangeset(ABCS, t) @@ -3994,17 +4028,27 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: /* SWPx/LDADDx/LDANDx/LDEORx/LDORx Rs, (Rb), Rt */ + case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg - fields := atomicInstructions[p.As] - // rt can't be sp. rt can't be r31 when field A is 0, A bit is the 23rd bit. - if rt == REG_RSP || (rt == REGZERO && (fields&(1<<23) == 0)) { + // rt can't be sp. + if rt == REG_RSP { c.ctxt.Diag("illegal destination register: %v\n", p) } - o1 |= fields | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + if enc, ok := atomicLDADD[p.As]; ok { + // for LDADDx-like instructions, rt can't be r31 when field.enc A is 0, A bit is the 23rd bit. + if (rt == REGZERO) && (enc&(1<<23) == 0) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc + } else if enc, ok := atomicSWP[p.As]; ok { + o1 |= enc + } else { + c.ctxt.Diag("invalid atomic instructions: %v\n", p) + } + o1 |= uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) case 48: /* ADD $C_ADDCON2, Rm, Rd */ // NOTE: this case does not use REGTMP. If it ever does, @@ -5351,6 +5395,38 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 106: // CASPx (Rs, Rs+1), (Rb), (Rt, Rt+1) + rs := p.From.Reg + rt := p.GetTo2().Reg + rb := p.To.Reg + rs1 := int16(p.From.Offset) + rt1 := int16(p.GetTo2().Offset) + + enc, ok := atomicCASP[p.As] + if !ok { + c.ctxt.Diag("invalid CASP-like atomic instructions: %v\n", p) + } + // for CASPx-like instructions, Rs<0> != 1 && Rt<0> != 1 + switch { + case rs&1 != 0: + c.ctxt.Diag("source register pair must start from even register: %v\n", p) + break + case rt&1 != 0: + c.ctxt.Diag("destination register pair must start from even register: %v\n", p) + break + case rs != rs1-1: + c.ctxt.Diag("source register pair must be contiguous: %v\n", p) + break + case rt != rt1-1: + c.ctxt.Diag("destination register pair must be contiguous: %v\n", p) + break + } + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) } out[0] = o1 out[1] = o2 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 2037beca72..b578b6a09a 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -283,28 +283,41 @@ func (a *Addr) SetTarget(t *Prog) { // The other fields not yet mentioned are for use by the back ends and should // be left zeroed by creators of Prog lists. type Prog struct { - Ctxt *Link // linker context - Link *Prog // next Prog in linked list - From Addr // first source operand - RestArgs []Addr // can pack any operands that not fit into {Prog.From, Prog.To} - To Addr // destination operand (second is RegTo2 below) - Pool *Prog // constant pool entry, for arm,arm64 back ends - Forwd *Prog // for x86 back end - Rel *Prog // for x86, arm back ends - Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase - Pos src.XPos // source position of this instruction - Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) - As As // assembler opcode - Reg int16 // 2nd source operand - RegTo2 int16 // 2nd destination operand - Mark uint16 // bitmask of arch-specific items - Optab uint16 // arch-specific opcode index - Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) - Back uint8 // for x86 back end: backwards branch state - Ft uint8 // for x86 back end: type index of Prog.From - Tt uint8 // for x86 back end: type index of Prog.To - Isize uint8 // for x86 back end: size of the instruction in bytes -} + Ctxt *Link // linker context + Link *Prog // next Prog in linked list + From Addr // first source operand + RestArgs []AddrPos // can pack any operands that not fit into {Prog.From, Prog.To} + To Addr // destination operand (second is RegTo2 below) + Pool *Prog // constant pool entry, for arm,arm64 back ends + Forwd *Prog // for x86 back end + Rel *Prog // for x86, arm back ends + Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase + Pos src.XPos // source position of this instruction + Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) + As As // assembler opcode + Reg int16 // 2nd source operand + RegTo2 int16 // 2nd destination operand + Mark uint16 // bitmask of arch-specific items + Optab uint16 // arch-specific opcode index + Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) + Back uint8 // for x86 back end: backwards branch state + Ft uint8 // for x86 back end: type index of Prog.From + Tt uint8 // for x86 back end: type index of Prog.To + Isize uint8 // for x86 back end: size of the instruction in bytes +} + +// Pos indicates whether the oprand is the source or the destination. +type AddrPos struct { + Addr + Pos OperandPos +} + +type OperandPos int8 + +const ( + Source OperandPos = iota + Destination +) // From3Type returns p.GetFrom3().Type, or TYPE_NONE when // p.GetFrom3() returns nil. @@ -330,15 +343,36 @@ func (p *Prog) GetFrom3() *Addr { if p.RestArgs == nil { return nil } - return &p.RestArgs[0] + return &p.RestArgs[0].Addr } -// SetFrom3 assigns []Addr{a} to p.RestArgs. +// SetFrom3 assigns []Args{{a, 0}} to p.RestArgs. // In pair with Prog.GetFrom3 it can help in emulation of Prog.From3. // // Deprecated: for the same reasons as Prog.GetFrom3. func (p *Prog) SetFrom3(a Addr) { - p.RestArgs = []Addr{a} + p.RestArgs = []AddrPos{{a, Source}} +} + +// SetTo2 assings []Args{{a, 1}} to p.RestArgs when the second destination +// operand does not fit into prog.RegTo2. +func (p *Prog) SetTo2(a Addr) { + p.RestArgs = []AddrPos{{a, Destination}} +} + +// GetTo2 returns the second destination operand. +func (p *Prog) GetTo2() *Addr { + if p.RestArgs == nil { + return nil + } + return &p.RestArgs[0].Addr +} + +// SetRestArgs assigns more than one source operands to p.RestArgs. +func (p *Prog) SetRestArgs(args []Addr) { + for i := range args { + p.RestArgs = append(p.RestArgs, AddrPos{args[i], Source}) + } } // An As denotes an assembler opcode. diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 5301e44002..0cffa54fa6 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -48,7 +48,7 @@ func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *ob p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -234,7 +234,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: to.Reg} @@ -275,7 +275,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_STYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -340,7 +340,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = to diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index da14dd3c41..f0f9d5cefc 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -718,7 +718,7 @@ func (c *ctxtz) oplook(p *obj.Prog) *Optab { p.From.Class = int8(c.aclass(&p.From) + 1) p.To.Class = int8(c.aclass(&p.To) + 1) for i := range p.RestArgs { - p.RestArgs[i].Class = int8(c.aclass(&p.RestArgs[i]) + 1) + p.RestArgs[i].Addr.Class = int8(c.aclass(&p.RestArgs[i].Addr) + 1) } // Mirrors the argument list in Optab. diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index 21e28807a6..b9bacb7a22 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -175,9 +175,11 @@ func (p *Prog) WriteInstructionString(w io.Writer) { sep = ", " } for i := range p.RestArgs { - io.WriteString(w, sep) - WriteDconv(w, p, &p.RestArgs[i]) - sep = ", " + if p.RestArgs[i].Pos == Source { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } } if p.As == ATEXT { @@ -198,6 +200,13 @@ func (p *Prog) WriteInstructionString(w io.Writer) { if p.RegTo2 != REG_NONE { fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.RegTo2))) } + for i := range p.RestArgs { + if p.RestArgs[i].Pos == Destination { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } + } } func (ctxt *Link) NewProg() *Prog { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index c412f4945d..94aed44871 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -4270,7 +4270,7 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { args = append(args, ft) } for i := range p.RestArgs { - args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) + args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) } if tt != Ynone*Ymax { args = append(args, tt) @@ -5438,10 +5438,10 @@ func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { // unpackOps4 extracts 4 operands from p. func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To } // unpackOps5 extracts 5 operands from p. func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To } -- cgit v1.3 From 5f0fca1475e042a6e85f81ff6db676ec18805041 Mon Sep 17 00:00:00 2001 From: Jonathan Swinney Date: Mon, 2 Nov 2020 16:36:10 +0000 Subject: cmd/asm: rename arm64 instructions LDANDx to LDCLRx The LDANDx instructions were misleading because they correspond to the mnemonic LDCLRx as defined in the Arm Architecture Reference Manual for Armv8. This changes the assembler to use the same mnemonic as the GNU assembler and the manual. The instruction has the form: LDCLRx Rs, (Rb), Rt: *Rb -> Rt, Rs AND NOT(*Rb) -> *Rb Change-Id: I94ae003e99e817209bba1afe960e612bf3a0b410 Reviewed-on: https://go-review.googlesource.com/c/go/+/267138 Reviewed-by: Cherry Zhang Reviewed-by: fannie zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: fannie zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 64 +++++++++++++------------- src/cmd/asm/internal/asm/testdata/arm64error.s | 48 +++++++++---------- src/cmd/internal/obj/arm64/a.out.go | 32 ++++++------- src/cmd/internal/obj/arm64/anames.go | 32 ++++++------- src/cmd/internal/obj/arm64/asm7.go | 34 +++++++------- 5 files changed, 105 insertions(+), 105 deletions(-) (limited to 'src/cmd/internal/obj/arm64') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 5547cf634c..91e3a0ca0a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -681,38 +681,38 @@ again: LDADDLH R5, (RSP), R7 // e7036578 LDADDLB R5, (R6), R7 // c7006538 LDADDLB R5, (RSP), R7 // e7036538 - LDANDAD R5, (R6), R7 // c710a5f8 - LDANDAD R5, (RSP), R7 // e713a5f8 - LDANDAW R5, (R6), R7 // c710a5b8 - LDANDAW R5, (RSP), R7 // e713a5b8 - LDANDAH R5, (R6), R7 // c710a578 - LDANDAH R5, (RSP), R7 // e713a578 - LDANDAB R5, (R6), R7 // c710a538 - LDANDAB R5, (RSP), R7 // e713a538 - LDANDALD R5, (R6), R7 // c710e5f8 - LDANDALD R5, (RSP), R7 // e713e5f8 - LDANDALW R5, (R6), R7 // c710e5b8 - LDANDALW R5, (RSP), R7 // e713e5b8 - LDANDALH R5, (R6), R7 // c710e578 - LDANDALH R5, (RSP), R7 // e713e578 - LDANDALB R5, (R6), R7 // c710e538 - LDANDALB R5, (RSP), R7 // e713e538 - LDANDD R5, (R6), R7 // c71025f8 - LDANDD R5, (RSP), R7 // e71325f8 - LDANDW R5, (R6), R7 // c71025b8 - LDANDW R5, (RSP), R7 // e71325b8 - LDANDH R5, (R6), R7 // c7102578 - LDANDH R5, (RSP), R7 // e7132578 - LDANDB R5, (R6), R7 // c7102538 - LDANDB R5, (RSP), R7 // e7132538 - LDANDLD R5, (R6), R7 // c71065f8 - LDANDLD R5, (RSP), R7 // e71365f8 - LDANDLW R5, (R6), R7 // c71065b8 - LDANDLW R5, (RSP), R7 // e71365b8 - LDANDLH R5, (R6), R7 // c7106578 - LDANDLH R5, (RSP), R7 // e7136578 - LDANDLB R5, (R6), R7 // c7106538 - LDANDLB R5, (RSP), R7 // e7136538 + LDCLRAD R5, (R6), R7 // c710a5f8 + LDCLRAD R5, (RSP), R7 // e713a5f8 + LDCLRAW R5, (R6), R7 // c710a5b8 + LDCLRAW R5, (RSP), R7 // e713a5b8 + LDCLRAH R5, (R6), R7 // c710a578 + LDCLRAH R5, (RSP), R7 // e713a578 + LDCLRAB R5, (R6), R7 // c710a538 + LDCLRAB R5, (RSP), R7 // e713a538 + LDCLRALD R5, (R6), R7 // c710e5f8 + LDCLRALD R5, (RSP), R7 // e713e5f8 + LDCLRALW R5, (R6), R7 // c710e5b8 + LDCLRALW R5, (RSP), R7 // e713e5b8 + LDCLRALH R5, (R6), R7 // c710e578 + LDCLRALH R5, (RSP), R7 // e713e578 + LDCLRALB R5, (R6), R7 // c710e538 + LDCLRALB R5, (RSP), R7 // e713e538 + LDCLRD R5, (R6), R7 // c71025f8 + LDCLRD R5, (RSP), R7 // e71325f8 + LDCLRW R5, (R6), R7 // c71025b8 + LDCLRW R5, (RSP), R7 // e71325b8 + LDCLRH R5, (R6), R7 // c7102578 + LDCLRH R5, (RSP), R7 // e7132578 + LDCLRB R5, (R6), R7 // c7102538 + LDCLRB R5, (RSP), R7 // e7132538 + LDCLRLD R5, (R6), R7 // c71065f8 + LDCLRLD R5, (RSP), R7 // e71365f8 + LDCLRLW R5, (R6), R7 // c71065b8 + LDCLRLW R5, (RSP), R7 // e71365b8 + LDCLRLH R5, (R6), R7 // c7106578 + LDCLRLH R5, (RSP), R7 // e7136578 + LDCLRLB R5, (R6), R7 // c7106538 + LDCLRLB R5, (RSP), R7 // e7136538 LDEORAD R5, (R6), R7 // c720a5f8 LDEORAD R5, (RSP), R7 // e723a5f8 LDEORAW R5, (R6), R7 // c720a5b8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 99e4d62d25..e579f20836 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -123,14 +123,14 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), ZR // ERROR "illegal destination register" LDADDLH R5, (R6), ZR // ERROR "illegal destination register" LDADDLB R5, (R6), ZR // ERROR "illegal destination register" - LDANDD R5, (R6), ZR // ERROR "illegal destination register" - LDANDW R5, (R6), ZR // ERROR "illegal destination register" - LDANDH R5, (R6), ZR // ERROR "illegal destination register" - LDANDB R5, (R6), ZR // ERROR "illegal destination register" - LDANDLD R5, (R6), ZR // ERROR "illegal destination register" - LDANDLW R5, (R6), ZR // ERROR "illegal destination register" - LDANDLH R5, (R6), ZR // ERROR "illegal destination register" - LDANDLB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLB R5, (R6), ZR // ERROR "illegal destination register" LDEORD R5, (R6), ZR // ERROR "illegal destination register" LDEORW R5, (R6), ZR // ERROR "illegal destination register" LDEORH R5, (R6), ZR // ERROR "illegal destination register" @@ -163,22 +163,22 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), RSP // ERROR "illegal destination register" LDADDLH R5, (R6), RSP // ERROR "illegal destination register" LDADDLB R5, (R6), RSP // ERROR "illegal destination register" - LDANDAD R5, (R6), RSP // ERROR "illegal destination register" - LDANDAW R5, (R6), RSP // ERROR "illegal destination register" - LDANDAH R5, (R6), RSP // ERROR "illegal destination register" - LDANDAB R5, (R6), RSP // ERROR "illegal destination register" - LDANDALD R5, (R6), RSP // ERROR "illegal destination register" - LDANDALW R5, (R6), RSP // ERROR "illegal destination register" - LDANDALH R5, (R6), RSP // ERROR "illegal destination register" - LDANDALB R5, (R6), RSP // ERROR "illegal destination register" - LDANDD R5, (R6), RSP // ERROR "illegal destination register" - LDANDW R5, (R6), RSP // ERROR "illegal destination register" - LDANDH R5, (R6), RSP // ERROR "illegal destination register" - LDANDB R5, (R6), RSP // ERROR "illegal destination register" - LDANDLD R5, (R6), RSP // ERROR "illegal destination register" - LDANDLW R5, (R6), RSP // ERROR "illegal destination register" - LDANDLH R5, (R6), RSP // ERROR "illegal destination register" - LDANDLB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLB R5, (R6), RSP // ERROR "illegal destination register" LDEORAD R5, (R6), RSP // ERROR "illegal destination register" LDEORAW R5, (R6), RSP // ERROR "illegal destination register" LDEORAH R5, (R6), RSP // ERROR "illegal destination register" diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 5844b71ca7..1d1bea505c 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -617,22 +617,6 @@ const ( ALDADDLD ALDADDLH ALDADDLW - ALDANDAB - ALDANDAD - ALDANDAH - ALDANDAW - ALDANDALB - ALDANDALD - ALDANDALH - ALDANDALW - ALDANDB - ALDANDD - ALDANDH - ALDANDW - ALDANDLB - ALDANDLD - ALDANDLH - ALDANDLW ALDAR ALDARB ALDARH @@ -643,6 +627,22 @@ const ( ALDAXRB ALDAXRH ALDAXRW + ALDCLRAB + ALDCLRAD + ALDCLRAH + ALDCLRAW + ALDCLRALB + ALDCLRALD + ALDCLRALH + ALDCLRALW + ALDCLRB + ALDCLRD + ALDCLRH + ALDCLRW + ALDCLRLB + ALDCLRLD + ALDCLRLH + ALDCLRLW ALDEORAB ALDEORAD ALDEORAH diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index fb216f9a94..a98f8c7ed5 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -111,22 +111,6 @@ var Anames = []string{ "LDADDLD", "LDADDLH", "LDADDLW", - "LDANDAB", - "LDANDAD", - "LDANDAH", - "LDANDAW", - "LDANDALB", - "LDANDALD", - "LDANDALH", - "LDANDALW", - "LDANDB", - "LDANDD", - "LDANDH", - "LDANDW", - "LDANDLB", - "LDANDLD", - "LDANDLH", - "LDANDLW", "LDAR", "LDARB", "LDARH", @@ -137,6 +121,22 @@ var Anames = []string{ "LDAXRB", "LDAXRH", "LDAXRW", + "LDCLRAB", + "LDCLRAD", + "LDCLRAH", + "LDCLRAW", + "LDCLRALB", + "LDCLRALD", + "LDCLRALH", + "LDCLRALW", + "LDCLRB", + "LDCLRD", + "LDCLRH", + "LDCLRW", + "LDCLRLB", + "LDCLRLD", + "LDCLRLH", + "LDCLRLW", "LDEORAB", "LDEORAD", "LDEORAH", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 4fc62d5c7f..1a359f1921 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -107,22 +107,22 @@ var atomicLDADD = map[obj.As]uint32{ ALDADDLW: 2<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLH: 1<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLB: 0<<30 | 0x1c3<<21 | 0x00<<10, - ALDANDAD: 3<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAW: 2<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAH: 1<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAB: 0<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDALD: 3<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALW: 2<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALH: 1<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALB: 0<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDD: 3<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDW: 2<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDH: 1<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDB: 0<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDLD: 3<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLW: 2<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLH: 1<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLB: 0<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRAD: 3<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAW: 2<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAH: 1<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAB: 0<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRALD: 3<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALW: 2<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALH: 1<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALB: 0<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRD: 3<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRW: 2<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRH: 1<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRB: 0<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRLD: 3<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLW: 2<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLH: 1<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLB: 0<<30 | 0x1c3<<21 | 0x04<<10, ALDEORAD: 3<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAW: 2<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAH: 1<<30 | 0x1c5<<21 | 0x08<<10, @@ -4028,7 +4028,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt + case 47: // SWPx/LDADDx/LDCLRx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg -- cgit v1.3