aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMeng Zhuo <mengzhuo1203@gmail.com>2019-10-02 11:50:34 +0800
committerCherry Zhang <cherryyz@google.com>2019-10-03 20:31:11 +0000
commit2bf7a925712dca5646f9215cda17c5b61eea14ce (patch)
tree9ee32fcbdfb5e30b26cd1313a31f0f8308b78bcd /src
parent5fe3b49a0540aacf685273a43b0fb31b44cf5dd6 (diff)
downloadgo-2bf7a925712dca5646f9215cda17c5b61eea14ce.tar.xz
cmd/asm: add VLD[1-4]R vector instructions on arm64
This change adds VLD1R, VLD2R, VLD3R, VLD4R Change-Id: Ie19e9ae02fdfc94b9344acde8c9938849efb0bf0 Reviewed-on: https://go-review.googlesource.com/c/go/+/181697 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s12
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go4
-rw-r--r--src/cmd/internal/obj/arm64/anames.go4
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go52
4 files changed, 53 insertions, 19 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 9f19ff1e8d..93f70045b7 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -352,6 +352,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c
VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c
VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // VLD4.P (R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c
+ VLD1R (R0), [V0.B16] // 00c0404d
+ VLD1R.P 16(R0), [V0.B16] // 00c0df4d
+ VLD1R.P (R15)(R1), [V15.H4] // VLD1R.P (R15)(R1*1), [V15.H4] // efc5c10d
+ VLD2R (R15), [V15.H4, V16.H4] // efc5600d
+ VLD2R.P 32(R0), [V0.D2, V1.D2] // 00ccff4d
+ VLD2R.P (R0)(R5), [V31.D1, V0.D1] // VLD2R.P (R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
+ VLD3R (RSP), [V31.S2, V0.S2, V1.S2] // ffeb400d
+ VLD3R.P 24(R15), [V15.H4, V16.H4, V17.H4] // efe5df0d
+ VLD3R.P (R15)(R6), [V15.H8, V16.H8, V17.H8] // VLD3R.P (R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
+ VLD4R (R0), [V0.B8, V1.B8, V2.B8, V3.B8] // 00e0600d
+ VLD4R.P 64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4] // ffebff4d
+ VLD4R.P (R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4] // VLD4R.P (R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
VST1.P [V24.S2], 8(R2) // 58789f0c
VST1 [V29.S2, V30.S2], (R29) // bdab000c
VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index f793cdc4f9..4e5eb75a22 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -956,6 +956,10 @@ const (
AVLD2
AVLD3
AVLD4
+ AVLD1R
+ AVLD2R
+ AVLD3R
+ AVLD4R
AVORR
AVREV32
AVREV64
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 621af6c195..2c277dfb95 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -463,6 +463,10 @@ var Anames = []string{
"VLD2",
"VLD3",
"VLD4",
+ "VLD1R",
+ "VLD2R",
+ "VLD3R",
+ "VLD4R",
"VORR",
"VREV32",
"VREV64",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 47586ba262..c7fa943e7e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -784,15 +784,9 @@ var optab = []Optab{
{AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD2, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
- {AVLD2, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD2, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD3, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
- {AVLD3, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD3, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD4, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
- {AVLD4, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
- {AVLD4, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
+ {AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
+ {AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
+ {AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0},
@@ -2709,13 +2703,18 @@ func buildop(ctxt *obj.Link) {
case AVZIP1:
oprangeset(AVZIP2, t)
+ case AVLD1R:
+ oprangeset(AVLD2, t)
+ oprangeset(AVLD2R, t)
+ oprangeset(AVLD3, t)
+ oprangeset(AVLD3R, t)
+ oprangeset(AVLD4, t)
+ oprangeset(AVLD4R, t)
+
case ASHA1H,
AVCNT,
AVMOV,
AVLD1,
- AVLD2,
- AVLD3,
- AVLD4,
AVST1,
AVST2,
AVST3,
@@ -2803,7 +2802,7 @@ func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) {
func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
var offset, list, n, expect int64
switch as {
- case AVLD1, AVLD2, AVLD3, AVLD4:
+ case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R:
offset = p.From.Offset
list = p.To.Offset
case AVST1, AVST2, AVST3, AVST4:
@@ -2836,11 +2835,13 @@ func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
switch as {
case AVLD1, AVST1:
return
- case AVLD2, AVST2:
+ case AVLD1R:
+ expect = 1
+ case AVLD2, AVST2, AVLD2R:
expect = 2
- case AVLD3, AVST3:
+ case AVLD3, AVST3, AVLD3R:
expect = 3
- case AVLD4, AVST4:
+ case AVLD4, AVST4, AVLD4R:
expect = 4
}
@@ -4344,10 +4345,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31)
- case 81: /* vld[1-4] (Rn), [Vt1.<T>, Vt2.<T>, ...] */
+ case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1.<T>, Vt2.<T>, ...] */
c.checkoffset(p, p.As)
r := int(p.From.Reg)
- o1 = 3<<26 | 1<<22
+ o1 = c.oprrr(p, p.As)
if o.scond == C_XPOST {
o1 |= 1 << 23
if p.From.Index == 0 {
@@ -4358,7 +4359,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if isRegShiftOrExt(&p.From) {
c.ctxt.Diag("invalid extended register op: %v\n", p)
}
- o1 |= uint32(p.From.Index&31) << 16
+ o1 |= uint32(p.From.Index&0x1f) << 16
}
}
o1 |= uint32(p.To.Offset)
@@ -5591,6 +5592,15 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVRBIT:
return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10
+
+ case AVLD1, AVLD2, AVLD3, AVLD4:
+ return 3<<26 | 1<<22
+
+ case AVLD1R, AVLD3R:
+ return 0xD<<24 | 1<<22
+
+ case AVLD2R, AVLD4R:
+ return 0xD<<24 | 3<<21
}
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -6779,6 +6789,10 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 {
o1 &^= 0xf000 // mask out "opcode" field (bit 12-15)
switch p.As {
+ case AVLD1R, AVLD2R:
+ o1 |= 0xC << 12
+ case AVLD3R, AVLD4R:
+ o1 |= 0xE << 12
case AVLD2, AVST2:
o1 |= 8 << 12
case AVLD3, AVST3: