aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj/arm64
diff options
context:
space:
mode:
authorFangming.Fang <fangming.fang@arm.com>2018-03-16 03:19:01 +0000
committerCherry Zhang <cherryyz@google.com>2018-04-03 15:36:31 +0000
commitef9bdd11e8e5d93d268f13b54feedbed7e3fa595 (patch)
treebd6f3a03106ce352616c1d51d6c3b2787128c30b /src/cmd/internal/obj/arm64
parentdcaf3fb134d5ca3b74a85b765c39aa5b632638cd (diff)
downloadgo-ef9bdd11e8e5d93d268f13b54feedbed7e3fa595.tar.xz
cmd/asm: add essential instructions for AES-GCM on ARM64
This change adds VLD1, VST1, VPMULL{2}, VEXT, VRBIT, VUSHR and VSHL instructions for supporting AES-GCM implementation later. Fixes #24400 Change-Id: I556feb88067f195cbe25629ec2b7a817acc58709 Reviewed-on: https://go-review.googlesource.com/101095 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/internal/obj/arm64')
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go7
-rw-r--r--src/cmd/internal/obj/arm64/anames.go6
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go335
-rw-r--r--src/cmd/internal/obj/arm64/doc.go44
-rw-r--r--src/cmd/internal/obj/arm64/list7.go2
5 files changed, 387 insertions, 7 deletions
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 5a6c4dc5f1..473ce08fe3 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -877,6 +877,12 @@ const (
AVSUB
AVFMLA
AVFMLS
+ AVPMULL
+ AVPMULL2
+ AVEXT
+ AVRBIT
+ AVUSHR
+ AVSHL
ALAST
AB = obj.AJMP
ABL = obj.ACALL
@@ -900,6 +906,7 @@ const (
ARNG_2S
ARNG_4S
ARNG_2D
+ ARNG_1Q
ARNG_B
ARNG_H
ARNG_S
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 77cd27c212..64348d7534 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -388,5 +388,11 @@ var Anames = []string{
"VSUB",
"VFMLA",
"VFMLS",
+ "VPMULL",
+ "VPMULL2",
+ "VEXT",
+ "VRBIT",
+ "VUSHR",
+ "VSHL",
"LAST",
}
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index baf0df0408..a719bd0a74 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -643,6 +643,9 @@ var optab = []Optab{
{AVLD1, C_ZOREG, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD1, C_LOREG, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
+ {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
+ {AVLD1, C_ROFF, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
+ {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, 0},
{AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0},
{AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0},
{AVMOV, C_ELEM, C_NONE, C_ELEM, 92, 4, 0, 0, 0},
@@ -653,11 +656,17 @@ var optab = []Optab{
{AVST1, C_LIST, C_NONE, C_ZOREG, 84, 4, 0, 0, 0},
{AVST1, C_LIST, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST},
{AVST1, C_LIST, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST},
+ {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, C_XPOST},
+ {AVST1, C_ELEM, C_NONE, C_ROFF, 96, 4, 0, 0, C_XPOST},
+ {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, 0},
{AVDUP, C_ELEM, C_NONE, C_ARNG, 79, 4, 0, 0, 0},
{AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0},
{AVCNT, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0},
{AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0},
{AVFMLA, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0},
+ {AVPMULL, C_ARNG, C_ARNG, C_ARNG, 93, 4, 0, 0, 0},
+ {AVEXT, C_VCON, C_ARNG, C_ARNG, 94, 4, 0, 0, 0},
+ {AVUSHR, C_VCON, C_ARNG, C_ARNG, 95, 4, 0, 0, 0},
{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0},
{obj.APCDATA, C_VCON, C_NONE, C_VCON, 0, 0, 0, 0, 0},
@@ -1527,7 +1536,8 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
if ops == nil {
ops = optab
}
- return &ops[0]
+ // Turn illegal instruction into an UNDEF, avoid crashing in asmout
+ return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}
}
func cmp(a int, b int) bool {
@@ -2231,16 +2241,25 @@ func buildop(ctxt *obj.Link) {
case AVFMLA:
oprangeset(AVFMLS, t)
+ case AVPMULL:
+ oprangeset(AVPMULL2, t)
+
+ case AVUSHR:
+ oprangeset(AVSHL, t)
+
+ case AVREV32:
+ oprangeset(AVRBIT, t)
+
case ASHA1H,
AVCNT,
AVMOV,
AVLD1,
- AVREV32,
AVST1,
AVDUP,
AVMOVS,
AVMOVI,
- APRFM:
+ APRFM,
+ AVEXT:
break
case obj.ANOP,
@@ -3758,14 +3777,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
c.ctxt.Diag("invalid arrangement: %v\n", p)
}
- if (p.As == AVMOV) && (af != ARNG_16B && af != ARNG_8B) {
- c.ctxt.Diag("invalid arrangement on op %v", p.As)
+ if (p.As == AVMOV || p.As == AVRBIT) && (af != ARNG_16B && af != ARNG_8B) {
+ c.ctxt.Diag("invalid arrangement: %v", p)
}
if p.As == AVMOV {
o1 |= uint32(rf&31) << 16
}
+ if p.As == AVRBIT {
+ size = 1
+ }
+
o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31)
case 84: /* vst1 [Vt1.<T>, Vt2.<T>, ...], (Rn) */
@@ -3950,6 +3973,291 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o1 = c.opldrpp(p, p.As)
o1 |= (uint32(r&31) << 5) | ((imm >> 3) & 0xfff << 10) | (v & 31)
+ case 93: /* vpmull{2} Vm.<T>, Vn.<T>, Vd */
+ af := int((p.From.Reg >> 5) & 15)
+ at := int((p.To.Reg >> 5) & 15)
+ a := int((p.Reg >> 5) & 15)
+
+ var Q, size uint32
+ if p.As == AVPMULL {
+ Q = 0
+ } else {
+ Q = 1
+ }
+
+ var fArng int
+ switch at {
+ case ARNG_8H:
+ if Q == 0 {
+ fArng = ARNG_8B
+ } else {
+ fArng = ARNG_16B
+ }
+ size = 0
+ case ARNG_1Q:
+ if Q == 0 {
+ fArng = ARNG_1D
+ } else {
+ fArng = ARNG_2D
+ }
+ size = 3
+ default:
+ c.ctxt.Diag("invalid arrangement on Vd.<T>: %v", p)
+ }
+
+ if af != a || af != fArng {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+
+ o1 = c.oprrr(p, p.As)
+ rf := int((p.From.Reg) & 31)
+ rt := int((p.To.Reg) & 31)
+ r := int((p.Reg) & 31)
+
+ o1 |= ((Q&1) << 30) | ((size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31)
+
+ case 94: /* vext $imm4, Vm.<T>, Vn.<T>, Vd.<T> */
+ if p.From3Type() != obj.TYPE_REG {
+ c.ctxt.Diag("illegal combination: %v", p)
+ break
+ }
+ af := int(((p.GetFrom3().Reg) >> 5) & 15)
+ at := int((p.To.Reg >> 5) & 15)
+ a := int((p.Reg >> 5) & 15)
+ index := int(p.From.Offset)
+
+ if af != a || af != at {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ break
+ }
+
+ var Q uint32
+ var b int
+ if af == ARNG_8B {
+ Q = 0
+ b = 7
+ } else if af == ARNG_16B {
+ Q = 1
+ b = 15
+ } else {
+ c.ctxt.Diag("invalid arrangement, should be 8B or 16B: %v", p)
+ break
+ }
+
+ if index < 0 || index > b {
+ c.ctxt.Diag("illegal offset: %v", p)
+ }
+
+ o1 = c.opirr(p, p.As)
+ rf := int((p.GetFrom3().Reg) & 31)
+ rt := int((p.To.Reg) & 31)
+ r := int((p.Reg) & 31)
+
+ o1 |= ((Q&1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31)
+
+ case 95: /* vushr $shift, Vn.<T>, Vd.<T> */
+ at := int((p.To.Reg >> 5) & 15)
+ af := int((p.Reg >> 5) & 15)
+ shift := int(p.From.Offset)
+
+ if af != at {
+ c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p)
+ }
+
+ var Q uint32
+ var imax, esize int
+
+ switch af {
+ case ARNG_8B, ARNG_4H, ARNG_2S:
+ Q = 0
+ case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D:
+ Q = 1
+ default:
+ c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p)
+ }
+
+ switch af {
+ case ARNG_8B, ARNG_16B:
+ imax = 15
+ esize = 8
+ case ARNG_4H, ARNG_8H:
+ imax = 31
+ esize = 16
+ case ARNG_2S, ARNG_4S:
+ imax = 63
+ esize = 32
+ case ARNG_2D:
+ imax = 127
+ esize = 64
+ }
+
+ imm := 0
+
+ if p.As == AVUSHR {
+ imm = esize*2 - shift
+ if imm < esize || imm > imax {
+ c.ctxt.Diag("shift out of range: %v", p)
+ }
+ }
+
+ if p.As == AVSHL {
+ imm = esize + shift
+ if imm > imax {
+ c.ctxt.Diag("shift out of range: %v", p)
+ }
+ }
+
+ o1 = c.opirr(p, p.As)
+ rt := int((p.To.Reg) & 31)
+ rf := int((p.Reg) & 31)
+
+ o1 |= ((Q&1) << 30) | (uint32(imm&127) << 16) | (uint32(rf&31) << 5) | uint32(rt&31)
+
+ case 96: /* vst1 Vt1.<T>[index], offset(Rn) */
+ af := int((p.From.Reg >> 5) & 15)
+ rt := int((p.From.Reg) & 31)
+ rf := int((p.To.Reg) & 31)
+ r := int(p.To.Index & 31)
+ index := int(p.From.Index)
+ offset := int32(c.regoff(&p.To))
+
+ if o.scond == C_XPOST {
+ if (p.To.Index != 0) && (offset != 0) {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ if p.To.Index == 0 && offset == 0 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ }
+
+ if offset != 0 {
+ r = 31
+ }
+
+ var Q, S, size int
+ var opcode uint32
+ switch af {
+ case ARNG_B:
+ c.checkindex(p, index, 15)
+ if o.scond == C_XPOST && offset != 0 && offset != 1 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 3
+ S = (index >> 2) & 1
+ size = index & 3
+ opcode = 0
+ case ARNG_H:
+ c.checkindex(p, index, 7)
+ if o.scond == C_XPOST && offset != 0 && offset != 2 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 2
+ S = (index >> 1) & 1
+ size = (index & 1) << 1
+ opcode = 2
+ case ARNG_S:
+ c.checkindex(p, index, 3)
+ if o.scond == C_XPOST && offset != 0 && offset != 4 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 1
+ S = index & 1
+ size = 0
+ opcode = 4
+ case ARNG_D:
+ c.checkindex(p, index, 1)
+ if o.scond == C_XPOST && offset != 0 && offset != 8 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index
+ S = 0
+ size = 1
+ opcode = 4
+ default:
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+
+ if o.scond == C_XPOST {
+ o1 |= 27 << 23
+ } else {
+ o1 |= 26 << 23
+ }
+
+ o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode&7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31)
+
+ case 97: /* vld1 offset(Rn), vt.<T>[index] */
+ at := int((p.To.Reg >> 5) & 15)
+ rt := int((p.To.Reg) & 31)
+ rf := int((p.From.Reg) & 31)
+ r := int(p.From.Index & 31)
+ index := int(p.To.Index)
+ offset := int32(c.regoff(&p.From))
+
+ if o.scond == C_XPOST {
+ if (p.From.Index != 0) && (offset != 0) {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ if p.From.Index == 0 && offset == 0 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ }
+
+ if offset != 0 {
+ r = 31
+ }
+
+ Q := 0
+ S := 0
+ size := 0
+ var opcode uint32
+ switch at {
+ case ARNG_B:
+ c.checkindex(p, index, 15)
+ if o.scond == C_XPOST && offset != 0 && offset != 1 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 3
+ S = (index >> 2) & 1
+ size = index & 3
+ opcode = 0
+ case ARNG_H:
+ c.checkindex(p, index, 7)
+ if o.scond == C_XPOST && offset != 0 && offset != 2 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 2
+ S = (index >> 1) & 1
+ size = (index & 1) << 1
+ opcode = 2
+ case ARNG_S:
+ c.checkindex(p, index, 3)
+ if o.scond == C_XPOST && offset != 0 && offset != 4 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index >> 1
+ S = index & 1
+ size = 0
+ opcode = 4
+ case ARNG_D:
+ c.checkindex(p, index, 1)
+ if o.scond == C_XPOST && offset != 0 && offset != 8 {
+ c.ctxt.Diag("invalid offset: %v", p)
+ }
+ Q = index
+ S = 0
+ size = 1
+ opcode = 4
+ default:
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+
+ if o.scond == C_XPOST {
+ o1 |= 110 << 21
+ } else {
+ o1 |= 106 << 21
+ }
+
+ o1 |= (uint32(Q&1) << 30) | (uint32(r&31) << 16) | ((opcode&7) << 13) | (uint32(S&1) << 12) | (uint32(size&3) << 10) | (uint32(rf&31) << 5) | uint32(rt&31)
}
out[0] = o1
out[1] = o2
@@ -4540,6 +4848,12 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVFMLS:
return 7<<25 | 1<<23 | 1<<21 | 3<<14 | 3<<10
+
+ case AVPMULL, AVPMULL2:
+ return 0xE<<24 | 1<<21 | 0x38<<10
+
+ case AVRBIT:
+ return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10
}
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -4726,6 +5040,15 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
case AHINT:
return SYSOP(0, 0, 3, 2, 0, 0, 0x1F)
+
+ case AVEXT:
+ return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15
+
+ case AVUSHR:
+ return 0x5E<<23 | 1<<10
+
+ case AVSHL:
+ return 0x1E<<23 | 21<<10
}
c.ctxt.Diag("%v: bad irr %v", p, a)
@@ -5522,4 +5845,4 @@ func movesize(a obj.As) int {
default:
return -1
}
-} \ No newline at end of file
+}
diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go
index 7ed2f242c3..918814ea38 100644
--- a/src/cmd/internal/obj/arm64/doc.go
+++ b/src/cmd/internal/obj/arm64/doc.go
@@ -170,6 +170,11 @@ Go Assembly for ARM64 Reference Manual
<T> Is an arrangement specifier and can have the following values:
S2, S4, D2
+ VEXT: Extracts vector elements from src SIMD registers to dst SIMD register
+ VEXT $index, <Vm>.<T>, <Vn>.<T>, <Vd>.<T>
+ <T> is an arrangment specifier and can be B8, B16
+ $index is the lowest numbered byte element to be exracted.
+
VLD1: Load multiple single-element structures
VLD1 (Rn), [<Vt>.<T>, <Vt2>.<T> ...] // no offset
VLD1.P imm(Rn), [<Vt>.<T>, <Vt2>.<T> ...] // immediate offset variant
@@ -177,6 +182,13 @@ Go Assembly for ARM64 Reference Manual
<T> Is an arrangement specifier and can have the following values:
B8, B16, H4, H8, S2, S4, D1, D2
+ VLD1: Load one single-element structure
+ VLD1 (Rn), <Vt>.<T>[index] // no offset
+ VLD1.P imm(Rn), <Vt>.<T>[index] // immediate offset variant
+ VLD1.P (Rn)(Rm), <Vt>.<T>[index] // register offset variant
+ <T> is an arrangement specifier and can have the following values:
+ B, H, S D
+
VMOV: move
VMOV <Vn>.<T>[index], Rd // Move vector element to general-purpose register.
<T> Is a source width specifier and can have the following values:
@@ -224,11 +236,21 @@ Go Assembly for ARM64 Reference Manual
<T> Is an arrangement specifier and can have the following values:
B8, B16
+ VRBIT: Reverse bit order (vector)
+ VRBIT <Vn>.<T>, <Vd>.<T>
+ <T> is an arrangment specifier and can be B8, B16
+
VREV32: Reverse elements in 32-bit words (vector).
REV32 <Vn>.<T>, <Vd>.<T>
<T> Is an arrangement specifier and can have the following values:
B8, B16, H4, H8
+ VSHL: Shift Left(immediate)
+ VSHL $shift, <Vn>.<T>, <Vd>.<T>
+ <T> is an arrangement specifier and can have the following values:
+ B8, B16, H4, H8, S2, S4, D1, D2
+ $shift Is the left shift amount
+
VST1: Store multiple single-element structures
VST1 [<Vt>.<T>, <Vt2>.<T> ...], (Rn) // no offset
VST1.P [<Vt>.<T>, <Vt2>.<T> ...], imm(Rn) // immediate offset variant
@@ -246,8 +268,29 @@ Go Assembly for ARM64 Reference Manual
<T> Is an arrangement specifier and can have the following values:
8B, 16B, H4, H8, S4
+ VST1: Store one single-element structure
+ VST1 <Vt>.<T>.<Index>, (Rn) // no offset
+ VST1.P <Vt>.<T>.<Index>, imm(Rn) // immediate offset variant
+ VST1.P <Vt>.<T>.<Index>, (Rn)(Rm) // register offset variant
+ <T> Is an arrangement specifier and can have the following values:
+ B, H, S, D
+
+ VUSHR: Unsigned shift right(immediate)
+ VUSHR $shift, <Vn>.<T>, <Vm>.<T>
+ <T> is an arrangement specifier and can have the following values:
+ B8, B16, H4, H8, S2, S4, D1, D2
+ $shift is the right shift amount
+
+
4. Alphabetical list of cryptographic extension instructions
+ VPMULL{2}: Polynomial multiply long.
+ VPMULL{2} <Vm>.<Tb>, <Vn>.<Tb>, <Vd>.<Ta>
+ VPMULL multiplies corresponding elements in the lower half of the
+ vectors of two source SIMD registers and VPMULL{2} operates in the upper half.
+ <Ta> is an arrangement specifier, it can be H8, Q1
+ <Tb> is an arrangement specifier, it can be B8, B16, D1, D2
+
SHA1C, SHA1M, SHA1P: SHA1 hash update.
SHA1C <Vm>.S4, Vn, Vd
SHA1M <Vm>.S4, Vn, Vd
@@ -270,5 +313,4 @@ Go Assembly for ARM64 Reference Manual
SHA256H <Vm>.S4, Vn, Vd
SHA256H2 <Vm>.S4, Vn, Vd
-
*/
diff --git a/src/cmd/internal/obj/arm64/list7.go b/src/cmd/internal/obj/arm64/list7.go
index 266e2baaee..1bf20ae71b 100644
--- a/src/cmd/internal/obj/arm64/list7.go
+++ b/src/cmd/internal/obj/arm64/list7.go
@@ -86,6 +86,8 @@ func arrange(a int) string {
return "S"
case ARNG_D:
return "D"
+ case ARNG_1Q:
+ return "Q1"
default:
return ""
}