diff options
Diffstat (limited to 'src/cmd/internal/obj')
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 188 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/doc.go | 54 |
4 files changed, 217 insertions, 29 deletions
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 93322c77e1..3bb897c7a8 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -766,6 +766,8 @@ const ( AVMOVI AVUADDLV AVSUB + AVFMLA + AVFMLS ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 13dbaae894..c369b66198 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -383,5 +383,7 @@ var Anames = []string{ "VMOVI", "VUADDLV", "VSUB", + "VFMLA", + "VFMLS", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 423f55f741..9d064806a1 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -146,6 +146,10 @@ func FPOP2S(m uint32, s uint32, type_ uint32, op uint32) uint32 { return m<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | op<<12 | 2<<10 } +func FPOP3S(m uint32, s uint32, type_ uint32, op uint32, op2 uint32) uint32 { + return m<<31 | s<<29 | 0x1F<<24 | type_<<22 | op<<21 | op2<<15 +} + func FPCVTI(sf uint32, s uint32, type_ uint32, rmode uint32, op uint32) uint32 { return sf<<31 | s<<29 | 0x1E<<24 | type_<<22 | 1<<21 | rmode<<19 | op<<16 | 0<<10 } @@ -539,6 +543,7 @@ var optab = []Optab{ {AFADDS, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0}, {AFADDS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, {AFADDS, C_FCON, C_FREG, C_FREG, 54, 4, 0, 0, 0}, + {AFMSUBD, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0}, {AFMOVS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, {AFMOVS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, {AFMOVD, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, @@ -589,6 +594,7 @@ var optab = []Optab{ {AVLD1, C_ROFF, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0}, {AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, {AVMOV, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVMOV, C_REG, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, {AVMOV, C_ELEM, C_NONE, C_VREG, 80, 4, 0, 0, 0}, @@ -600,6 +606,7 @@ var optab = []Optab{ {AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0}, {AVCNT, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, {AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, + {AVFMLA, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -1987,6 +1994,15 @@ func buildop(ctxt *obj.Link) { oprangeset(AFMINNMS, t) oprangeset(AFDIVD, t) + case AFMSUBD: + oprangeset(AFMSUBS, t) + oprangeset(AFMADDS, t) + oprangeset(AFMADDD, t) + oprangeset(AFNMSUBS, t) + oprangeset(AFNMSUBD, t) + oprangeset(AFNMADDS, t) + oprangeset(AFNMADDD, t) + case AFCVTSD: oprangeset(AFCVTDS, t) oprangeset(AFABSD, t) @@ -2126,6 +2142,9 @@ func buildop(ctxt *obj.Link) { case AVADDV: oprangeset(AVUADDLV, t) + case AVFMLA: + oprangeset(AVFMLS, t) + case ASHA1H, AVCNT, AVMOV, @@ -2189,6 +2208,13 @@ func SYSARG4(op1 int, Cn int, Cm int, op2 int) int { return SYSARG5(0, op1, Cn, Cm, op2) } +/* checkindex checks if index >= 0 && index <= maxindex */ +func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) { + if index < 0 || index > maxindex { + c.ctxt.Diag("register element index out of range 0 to %d: %v", maxindex, p) + } +} + func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 := uint32(0) o2 := uint32(0) @@ -2420,7 +2446,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = 0 } - case 15: /* mul/mneg/umulh/umull r,[r,]r; madd/msub Rm,Ra,Rn,Rd */ + case 15: /* mul/mneg/umulh/umull r,[r,]r; madd/msub/fmadd/fmsub/fnmadd/fnmsub Rm,Ra,Rn,Rd */ o1 = c.oprrr(p, p.As) rf := int(p.From.Reg) @@ -3283,12 +3309,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor Vm.<T>, Vn.<T>, Vd.<T> */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm.<T>, Vn.<T>, Vd.<T> */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) if af != af3 || af != at { - c.ctxt.Diag("invalid arrangement: %v\n", p) + c.ctxt.Diag("operand mismatch: %v", p) + break } o1 = c.oprrr(p, p.As) rf := int((p.From.Reg) & 31) @@ -3320,16 +3347,25 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { Q = 1 size = 1 default: - c.ctxt.Diag("invalid arrangement: %v\n", p) + c.ctxt.Diag("invalid arrangement: %v", p) } if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) && (af != ARNG_16B && af != ARNG_8B) { - c.ctxt.Diag("invalid arrangement on op %v", p.As) + c.ctxt.Diag("invalid arrangement: %v", p) + } else if (p.As == AVFMLA || p.As == AVFMLS) && + (af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) { + c.ctxt.Diag("invalid arrangement: %v", p) } else if p.As == AVORR { size = 2 } else if p.As == AVAND || p.As == AVEOR { size = 0 + } else if (p.As == AVFMLA || p.As == AVFMLS) { + if af == ARNG_2D { + size = 1 + } else { + size = 0 + } } o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) @@ -3339,22 +3375,27 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int(p.To.Reg) imm5 := 0 o1 = 7<<25 | 0xf<<10 + index := int(p.From.Index) switch (p.From.Reg >> 5) & 15 { case ARNG_B: + c.checkindex(p, index, 15) imm5 |= 1 - imm5 |= int(p.From.Index) << 1 + imm5 |= index << 1 case ARNG_H: + c.checkindex(p, index, 7) imm5 |= 2 - imm5 |= int(p.From.Index) << 2 + imm5 |= index << 2 case ARNG_S: + c.checkindex(p, index, 3) imm5 |= 4 - imm5 |= int(p.From.Index) << 3 + imm5 |= index << 3 case ARNG_D: + c.checkindex(p, index, 1) imm5 |= 8 - imm5 |= int(p.From.Index) << 4 + imm5 |= index << 4 o1 |= 1 << 30 default: - c.ctxt.Diag("invalid arrangement on op V.<T>[index], R: %v\n", p) + c.ctxt.Diag("invalid arrangement: %v", p) } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) @@ -3471,21 +3512,26 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int(p.To.Reg) imm5 := 0 o1 = 1<<30 | 7<<25 | 7<<10 + index :=int(p.From.Index) switch (p.To.Reg >> 5) & 15 { case ARNG_B: + c.checkindex(p, index, 15) imm5 |= 1 - imm5 |= int(p.From.Index) << 1 + imm5 |= index << 1 case ARNG_H: + c.checkindex(p, index, 7) imm5 |= 2 - imm5 |= int(p.From.Index) << 2 + imm5 |= index << 2 case ARNG_S: + c.checkindex(p, index, 3) imm5 |= 4 - imm5 |= int(p.From.Index) << 3 + imm5 |= index << 3 case ARNG_D: + c.checkindex(p, index, 1) imm5 |= 8 - imm5 |= int(p.From.Index) << 4 + imm5 |= index << 4 default: - c.ctxt.Diag("invalid arrangement on op R, V.<T>[index]: %v\n", p) + c.ctxt.Diag("invalid arrangement: %v", p) } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) @@ -3493,38 +3539,46 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rf := int(p.From.Reg) rt := int(p.To.Reg) o1 = 7<<25 | 1<<10 - var imm5, Q uint32 + var imm5, Q int + index := int(p.From.Index) switch (p.To.Reg >> 5) & 15 { case ARNG_16B: + c.checkindex(p, index, 15) Q = 1 imm5 = 1 - imm5 |= uint32(p.From.Index) << 1 + imm5 |= index << 1 case ARNG_2D: + c.checkindex(p, index, 1) Q = 1 imm5 = 8 - imm5 |= uint32(p.From.Index) << 4 + imm5 |= index << 4 case ARNG_2S: + c.checkindex(p, index, 3) Q = 0 imm5 = 4 - imm5 |= uint32(p.From.Index) << 3 + imm5 |= index << 3 case ARNG_4H: + c.checkindex(p, index, 7) Q = 0 imm5 = 2 - imm5 |= uint32(p.From.Index) << 2 + imm5 |= index << 2 case ARNG_4S: + c.checkindex(p, index, 3) Q = 1 imm5 = 4 - imm5 |= uint32(p.From.Index) << 3 + imm5 |= index << 3 case ARNG_8B: + c.checkindex(p, index, 15) Q = 0 imm5 = 1 - imm5 |= uint32(p.From.Index) << 1 + imm5 |= index << 1 case ARNG_8H: + c.checkindex(p, index, 7) Q = 1 imm5 = 2 - imm5 |= uint32(p.From.Index) << 2 + imm5 |= index << 2 default: - c.ctxt.Diag("invalid arrangement on VDUP Vn.<T>[index], Vd.<T>: %v\n", p) + c.ctxt.Diag("invalid arrangement: %v", p) } o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16) o1 |= (uint32(rf&31) << 5) | uint32(rt&31) @@ -3533,24 +3587,29 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rf := int(p.From.Reg) rt := int(p.To.Reg) imm5 := 0 + index := int(p.From.Index) switch p.As { case AVMOV: o1 = 1<<30 | 15<<25 | 1<<10 switch (p.From.Reg >> 5) & 15 { case ARNG_B: + c.checkindex(p, index, 15) imm5 |= 1 - imm5 |= int(p.From.Index) << 1 + imm5 |= index << 1 case ARNG_H: + c.checkindex(p, index, 7) imm5 |= 2 - imm5 |= int(p.From.Index) << 2 + imm5 |= index << 2 case ARNG_S: + c.checkindex(p, index, 3) imm5 |= 4 - imm5 |= int(p.From.Index) << 3 + imm5 |= index << 3 case ARNG_D: + c.checkindex(p, index, 1) imm5 |= 8 - imm5 |= int(p.From.Index) << 4 + imm5 |= index << 4 default: - c.ctxt.Diag("invalid arrangement on op V.<T>[index], Vn: %v\n", p) + c.ctxt.Diag("invalid arrangement: %v", p) } default: c.ctxt.Diag("unsupported op %v", p.As) @@ -3759,6 +3818,47 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 90: o1 = 0xbea71700 + case 92: /* vmov Vn.<T>[index], Vd.<T>[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm4 := 0 + imm5 := 0 + o1 = 3<<29 | 7<<25 | 1<<10 + index1 := int(p.To.Index) + index2 := int(p.From.Index) + if ((p.To.Reg >> 5) & 15) != ((p.From.Reg >> 5) & 15) { + c.ctxt.Diag("operand mismatch: %v", p) + } + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + c.checkindex(p, index1, 15) + c.checkindex(p, index2, 15) + imm5 |= 1 + imm5 |= index1 << 1 + imm4 |= index2 + case ARNG_H: + c.checkindex(p, index1, 7) + c.checkindex(p, index2, 7) + imm5 |= 2 + imm5 |= index1 << 2 + imm4 |= index2 << 1 + case ARNG_S: + c.checkindex(p, index1, 3) + c.checkindex(p, index2, 3) + imm5 |= 4 + imm5 |= index1 << 3 + imm4 |= index2 << 2 + case ARNG_D: + c.checkindex(p, index1, 1) + c.checkindex(p, index2, 1) + imm5 |= 8 + imm5 |= index1 << 4 + imm4 |= index2 << 3 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + break case 91: /* prfm imm(Rn), <prfop | $imm5> */ @@ -4157,6 +4257,30 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AFSUBD: return FPOP2S(0, 0, 1, 3) + case AFMADDD: + return FPOP3S(0, 0, 1, 0, 0) + + case AFMADDS: + return FPOP3S(0, 0, 0, 0, 0) + + case AFMSUBD: + return FPOP3S(0, 0, 1, 0, 1) + + case AFMSUBS: + return FPOP3S(0, 0, 0, 0, 1) + + case AFNMADDD: + return FPOP3S(0, 0, 1, 1, 0) + + case AFNMADDS: + return FPOP3S(0, 0, 0, 1, 0) + + case AFNMSUBD: + return FPOP3S(0, 0, 1, 1, 1) + + case AFNMSUBS: + return FPOP3S(0, 0, 0, 1, 1) + case AFMULS: return FPOP2S(0, 0, 0, 0) @@ -4345,6 +4469,12 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVUADDLV: return 1<<29 | 7<<25 | 3<<20 | 7<<11 + + case AVFMLA: + return 7<<25 | 0<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVFMLS: + return 7<<25 | 1<<23 | 1<<21 | 3<<14 | 3<<10 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 3d65541fd2..a808d4c3ad 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -22,6 +22,46 @@ Go Assembly for ARM64 Reference Manual 2. Alphabetical list of float-point instructions // TODO + FMADDD: 64-bit floating-point fused Multiply-Add + FMADDD <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, + adds the product to <Fa>, and writes the result to <Fd>. + + FMADDS: 32-bit floating-point fused Multiply-Add + FMADDS <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, + adds the product to <Fa>, and writes the result to <Fd>. + + FMSUBD: 64-bit floating-point fused Multiply-Subtract + FMSUBD <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, negates the product, + adds the product to <Fa>, and writes the result to <Fd>. + + FMSUBS: 32-bit floating-point fused Multiply-Subtract + FMSUBS <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, negates the product, + adds the product to <Fa>, and writes the result to <Fd>. + + FNMADDD: 64-bit floating-point negated fused Multiply-Add + FNMADDD <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, negates the product, + subtracts the value of <Fa>, and writes the result to <Fd>. + + FNMADDS: 32-bit floating-point negated fused Multiply-Add + FNMADDS <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, negates the product, + subtracts the value of <Fa>, and writes the result to <Fd>. + + FNMSUBD: 64-bit floating-point negated fused Multiply-Subtract + FNMSUBD <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, + subtracts the value of <Fa>, and writes the result to <Fd>. + + FNMSUBS: 32-bit floating-point negated fused Multiply-Subtract + FNMSUBS <Fm>, <Fa>, <Fn>, <Fd> + Multiplies the values of <Fm> and <Fn>, + subtracts the value of <Fa>, and writes the result to <Fd>. + 3. Alphabetical list of SIMD instructions VADD: Add (scalar) VADD <Vm>, <Vn>, <Vd> @@ -65,6 +105,16 @@ Go Assembly for ARM64 Reference Manual <T> Is an arrangement specifier and can have the following values: B8, B16 + VFMLA: Floating-point fused Multiply-Add to accumulator (vector) + VFMLA <Vm>.<T>, <Vn>.<T>, <Vd>.<T> + <T> Is an arrangement specifier and can have the following values: + S2, S4, D2 + + VFMLS: Floating-point fused Multiply-Subtract from accumulator (vector) + VFMLS <Vm>.<T>, <Vn>.<T>, <Vd>.<T> + <T> Is an arrangement specifier and can have the following values: + S2, S4, D2 + VLD1: Load multiple single-element structures VLD1 (Rn), [<Vt>.<T>, <Vt2>.<T> ...] // no offset VLD1.P imm(Rn), [<Vt>.<T>, <Vt2>.<T> ...] // immediate offset variant @@ -96,6 +146,10 @@ Go Assembly for ARM64 Reference Manual <T> Is an element size specifier and can have the following values: B, H, S, D + VMOV <Vn>.<T>[index], <Vd>.<T>[index] // Move vector element to another vector element. + <T> Is an element size specifier and can have the following values: + B, H, S, D + VMOVI: Move Immediate (vector). VMOVI $imm8, <Vd>.<T> <T> is an arrangement specifier and can have the following values: |
