diff options
| author | Fangming.Fang <fangming.fang@arm.com> | 2018-03-16 03:19:01 +0000 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2018-04-03 15:36:31 +0000 |
| commit | ef9bdd11e8e5d93d268f13b54feedbed7e3fa595 (patch) | |
| tree | bd6f3a03106ce352616c1d51d6c3b2787128c30b /src/cmd/internal/obj/arm64 | |
| parent | dcaf3fb134d5ca3b74a85b765c39aa5b632638cd (diff) | |
| download | go-ef9bdd11e8e5d93d268f13b54feedbed7e3fa595.tar.xz | |
cmd/asm: add essential instructions for AES-GCM on ARM64
This change adds VLD1, VST1, VPMULL{2}, VEXT, VRBIT, VUSHR and VSHL instructions
for supporting AES-GCM implementation later.
Fixes #24400
Change-Id: I556feb88067f195cbe25629ec2b7a817acc58709
Reviewed-on: https://go-review.googlesource.com/101095
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/internal/obj/arm64')
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 7 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 6 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 335 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/doc.go | 44 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/list7.go | 2 |
5 files changed, 387 insertions, 7 deletions
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 5a6c4dc5f1..473ce08fe3 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -877,6 +877,12 @@ const ( AVSUB AVFMLA AVFMLS + AVPMULL + AVPMULL2 + AVEXT + AVRBIT + AVUSHR + AVSHL ALAST AB = obj.AJMP ABL = obj.ACALL @@ -900,6 +906,7 @@ const ( ARNG_2S ARNG_4S ARNG_2D + ARNG_1Q ARNG_B ARNG_H ARNG_S diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 77cd27c212..64348d7534 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -388,5 +388,11 @@ var Anames = []string{ "VSUB", "VFMLA", "VFMLS", + "VPMULL", + "VPMULL2", + "VEXT", + "VRBIT", + "VUSHR", + "VSHL", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index baf0df0408..a719bd0a74 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -643,6 +643,9 @@ var optab = []Optab{ {AVLD1, C_ZOREG, C_NONE, C_LIST, 81, 4, 0, 0, 0}, {AVLD1, C_LOREG, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_ROFF, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, + {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, {AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0}, {AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, {AVMOV, C_ELEM, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, @@ -653,11 +656,17 @@ var optab = []Optab{ {AVST1, C_LIST, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, {AVST1, C_LIST, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, {AVST1, C_LIST, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_ROFF, 96, 4, 0, 0, C_XPOST}, + {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, 0}, {AVDUP, C_ELEM, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, {AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0}, {AVCNT, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, {AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, {AVFMLA, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + {AVPMULL, C_ARNG, C_ARNG, C_ARNG, 93, 4, 0, 0, 0}, + {AVEXT, C_VCON, C_ARNG, C_ARNG, 94, 4, 0, 0, 0}, + {AVUSHR, C_VCON, C_ARNG, C_ARNG, 95, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -1527,7 +1536,8 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab { if ops == nil { ops = optab } - return &ops[0] + // Turn illegal instruction into an UNDEF, avoid crashing in asmout + return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0} } func cmp(a int, b int) bool { @@ -2231,16 +2241,25 @@ func buildop(ctxt *obj.Link) { case AVFMLA: oprangeset(AVFMLS, t) + case AVPMULL: + oprangeset(AVPMULL2, t) + + case AVUSHR: + oprangeset(AVSHL, t) + + case AVREV32: + oprangeset(AVRBIT, t) + case ASHA1H, AVCNT, AVMOV, AVLD1, - AVREV32, AVST1, AVDUP, AVMOVS, AVMOVI, - APRFM: + APRFM, + AVEXT: break case obj.ANOP, @@ -3758,14 +3777,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("invalid arrangement: %v\n", p) } - if (p.As == AVMOV) && (af != ARNG_16B && af != ARNG_8B) { - c.ctxt.Diag("invalid arrangement on op %v", p.As) + if (p.As == AVMOV || p.As == AVRBIT) && (af != ARNG_16B && af != ARNG_8B) { + c.ctxt.Diag("invalid arrangement: %v", p) } if p.As == AVMOV { o1 |= uint32(rf&31) << 16 } + if p.As == AVRBIT { + size = 1 + } + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31) case 84: /* vst1 [Vt1.<T>, Vt2.<T>, ...], (Rn) */ @@ -3950,6 +3973,291 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = c.opldrpp(p, p.As) o1 |= (uint32(r&31) << 5) | ((imm >> 3) & 0xfff << 10) | (v & 31) + case 93: /* vpmull{2} Vm.<T>, Vn.<T>, Vd */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + a := int((p.Reg >> 5) & 15) + + var Q, size uint32 + if p.As == AVPMULL { + Q = 0 + } else { + Q = 1 + } + + var fArng int + switch at { + case ARNG_8H: + if Q == 0 { + fArng = ARNG_8B + } else { + fArng = ARNG_16B + } + size = 0 + case ARNG_1Q: + if Q == 0 { + fArng = ARNG_1D + } else { + fArng = ARNG_2D + } + size = 3 + default: + c.ctxt.Diag("invalid arrangement on Vd.<T>: %v", p) + } + + if af != a || af != fArng { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + o1 |= ((Q&1) << 30) | ((size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 94: /* vext $imm4, Vm.<T>, Vn.<T>, Vd.<T> */ + if p.From3Type() != obj.TYPE_REG { + c.ctxt.Diag("illegal combination: %v", p) + break + } + af := int(((p.GetFrom3().Reg) >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + a := int((p.Reg >> 5) & 15) + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q uint32 + var b int + if af == ARNG_8B { + Q = 0 + b = 7 + } else if af == ARNG_16B { + Q = 1 + b = 15 + } else { + c.ctxt.Diag("invalid arrangement, should be 8B or 16B: %v", p) + break + } + + if index < 0 || index > b { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := int((p.GetFrom3().Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + o1 |= ((Q&1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 95: /* vushr $shift, Vn.<T>, Vd.<T> */ + at := int((p.To.Reg >> 5) & 15) + af := int((p.Reg >> 5) & 15) + shift := int(p.From.Offset) + + if af != at { + c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) + } + + var Q uint32 + var imax, esize int + + switch af { + case ARNG_8B, ARNG_4H, ARNG_2S: + Q = 0 + case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) + } + + switch af { + case ARNG_8B, ARNG_16B: + imax = 15 + esize = 8 + case ARNG_4H, ARNG_8H: + imax = 31 + esize = 16 + case ARNG_2S, ARNG_4S: + imax = 63 + esize = 32 + case ARNG_2D: + imax = 127 + esize = 64 + } + + imm := 0 + + if p.As == AVUSHR { + imm = esize*2 - shift + if imm < esize || imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + } + + if p.As == AVSHL { + imm = esize + shift + if imm > imax { + c.ctxt.Diag("shift out of range: %v", p) + } + } + + o1 = c.opirr(p, p.As) + rt := int((p.To.Reg) & 31) + rf := int((p.Reg) & 31) + + o1 |= ((Q&1) << 30) | (uint32(imm&127) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 96: /* vst1 Vt1.<T>[index], offset(Rn) */ + af := int((p.From.Reg >> 5) & 15) + rt := int((p.From.Reg) & 31) + rf := int((p.To.Reg) & 31) + r := int(p.To.Index & 31) + index := int(p.From.Index) + offset := int32(c.regoff(&p.To)) + + if o.scond == C_XPOST { + if (p.To.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.To.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + var Q, S, size int + var opcode uint32 + switch af { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 27 << 23 + } else { + o1 |= 26 << 23 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode&7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 97: /* vld1 offset(Rn), vt.<T>[index] */ + at := int((p.To.Reg >> 5) & 15) + rt := int((p.To.Reg) & 31) + rf := int((p.From.Reg) & 31) + r := int(p.From.Index & 31) + index := int(p.To.Index) + offset := int32(c.regoff(&p.From)) + + if o.scond == C_XPOST { + if (p.From.Index != 0) && (offset != 0) { + c.ctxt.Diag("invalid offset: %v", p) + } + if p.From.Index == 0 && offset == 0 { + c.ctxt.Diag("invalid offset: %v", p) + } + } + + if offset != 0 { + r = 31 + } + + Q := 0 + S := 0 + size := 0 + var opcode uint32 + switch at { + case ARNG_B: + c.checkindex(p, index, 15) + if o.scond == C_XPOST && offset != 0 && offset != 1 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 3 + S = (index >> 2) & 1 + size = index & 3 + opcode = 0 + case ARNG_H: + c.checkindex(p, index, 7) + if o.scond == C_XPOST && offset != 0 && offset != 2 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 2 + S = (index >> 1) & 1 + size = (index & 1) << 1 + opcode = 2 + case ARNG_S: + c.checkindex(p, index, 3) + if o.scond == C_XPOST && offset != 0 && offset != 4 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index >> 1 + S = index & 1 + size = 0 + opcode = 4 + case ARNG_D: + c.checkindex(p, index, 1) + if o.scond == C_XPOST && offset != 0 && offset != 8 { + c.ctxt.Diag("invalid offset: %v", p) + } + Q = index + S = 0 + size = 1 + opcode = 4 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + + if o.scond == C_XPOST { + o1 |= 110 << 21 + } else { + o1 |= 106 << 21 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode&7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -4540,6 +4848,12 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVFMLS: return 7<<25 | 1<<23 | 1<<21 | 3<<14 | 3<<10 + + case AVPMULL, AVPMULL2: + return 0xE<<24 | 1<<21 | 0x38<<10 + + case AVRBIT: + return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -4726,6 +5040,15 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AHINT: return SYSOP(0, 0, 3, 2, 0, 0, 0x1F) + + case AVEXT: + return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15 + + case AVUSHR: + return 0x5E<<23 | 1<<10 + + case AVSHL: + return 0x1E<<23 | 21<<10 } c.ctxt.Diag("%v: bad irr %v", p, a) @@ -5522,4 +5845,4 @@ func movesize(a obj.As) int { default: return -1 } -}
\ No newline at end of file +} diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 7ed2f242c3..918814ea38 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -170,6 +170,11 @@ Go Assembly for ARM64 Reference Manual <T> Is an arrangement specifier and can have the following values: S2, S4, D2 + VEXT: Extracts vector elements from src SIMD registers to dst SIMD register + VEXT $index, <Vm>.<T>, <Vn>.<T>, <Vd>.<T> + <T> is an arrangment specifier and can be B8, B16 + $index is the lowest numbered byte element to be exracted. + VLD1: Load multiple single-element structures VLD1 (Rn), [<Vt>.<T>, <Vt2>.<T> ...] // no offset VLD1.P imm(Rn), [<Vt>.<T>, <Vt2>.<T> ...] // immediate offset variant @@ -177,6 +182,13 @@ Go Assembly for ARM64 Reference Manual <T> Is an arrangement specifier and can have the following values: B8, B16, H4, H8, S2, S4, D1, D2 + VLD1: Load one single-element structure + VLD1 (Rn), <Vt>.<T>[index] // no offset + VLD1.P imm(Rn), <Vt>.<T>[index] // immediate offset variant + VLD1.P (Rn)(Rm), <Vt>.<T>[index] // register offset variant + <T> is an arrangement specifier and can have the following values: + B, H, S D + VMOV: move VMOV <Vn>.<T>[index], Rd // Move vector element to general-purpose register. <T> Is a source width specifier and can have the following values: @@ -224,11 +236,21 @@ Go Assembly for ARM64 Reference Manual <T> Is an arrangement specifier and can have the following values: B8, B16 + VRBIT: Reverse bit order (vector) + VRBIT <Vn>.<T>, <Vd>.<T> + <T> is an arrangment specifier and can be B8, B16 + VREV32: Reverse elements in 32-bit words (vector). REV32 <Vn>.<T>, <Vd>.<T> <T> Is an arrangement specifier and can have the following values: B8, B16, H4, H8 + VSHL: Shift Left(immediate) + VSHL $shift, <Vn>.<T>, <Vd>.<T> + <T> is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D1, D2 + $shift Is the left shift amount + VST1: Store multiple single-element structures VST1 [<Vt>.<T>, <Vt2>.<T> ...], (Rn) // no offset VST1.P [<Vt>.<T>, <Vt2>.<T> ...], imm(Rn) // immediate offset variant @@ -246,8 +268,29 @@ Go Assembly for ARM64 Reference Manual <T> Is an arrangement specifier and can have the following values: 8B, 16B, H4, H8, S4 + VST1: Store one single-element structure + VST1 <Vt>.<T>.<Index>, (Rn) // no offset + VST1.P <Vt>.<T>.<Index>, imm(Rn) // immediate offset variant + VST1.P <Vt>.<T>.<Index>, (Rn)(Rm) // register offset variant + <T> Is an arrangement specifier and can have the following values: + B, H, S, D + + VUSHR: Unsigned shift right(immediate) + VUSHR $shift, <Vn>.<T>, <Vm>.<T> + <T> is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D1, D2 + $shift is the right shift amount + + 4. Alphabetical list of cryptographic extension instructions + VPMULL{2}: Polynomial multiply long. + VPMULL{2} <Vm>.<Tb>, <Vn>.<Tb>, <Vd>.<Ta> + VPMULL multiplies corresponding elements in the lower half of the + vectors of two source SIMD registers and VPMULL{2} operates in the upper half. + <Ta> is an arrangement specifier, it can be H8, Q1 + <Tb> is an arrangement specifier, it can be B8, B16, D1, D2 + SHA1C, SHA1M, SHA1P: SHA1 hash update. SHA1C <Vm>.S4, Vn, Vd SHA1M <Vm>.S4, Vn, Vd @@ -270,5 +313,4 @@ Go Assembly for ARM64 Reference Manual SHA256H <Vm>.S4, Vn, Vd SHA256H2 <Vm>.S4, Vn, Vd - */ diff --git a/src/cmd/internal/obj/arm64/list7.go b/src/cmd/internal/obj/arm64/list7.go index 266e2baaee..1bf20ae71b 100644 --- a/src/cmd/internal/obj/arm64/list7.go +++ b/src/cmd/internal/obj/arm64/list7.go @@ -86,6 +86,8 @@ func arrange(a int) string { return "S" case ARNG_D: return "D" + case ARNG_1Q: + return "Q1" default: return "" } |
