diff options
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 62 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64error.s | 16 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 8 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 8 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 64 |
5 files changed, 153 insertions, 5 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 2f85308bd3..bb0c9e2c05 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -154,6 +154,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VUSHR $8, V1.H8, V2.H8 // 2204186f VUSHR $2, V1.B8, V2.B8 // 22040e2f VUSHR $2, V1.B16, V2.B16 // 22040e6f + VSHRN $7, V1.H8, V0.B8 // 2084090f + VSHRN $15, V1.S4, V0.H4 // 2084110f + VSHRN $31, V1.D2, V0.S2 // 2084210f + VSHRN2 $7, V1.H8, V0.B16 // 2084094f + VSHRN2 $15, V1.S4, V0.H8 // 2084114f + VSHRN2 $31, V1.D2, V0.S4 // 2084214f + VSSHR $2, V0.B8, V1.B8 // 01040e0f + VSSHR $2, V0.B16, V1.B16 // 01040e4f + VSSHR $8, V0.H4, V1.H4 // 0104180f + VSSHR $8, V0.H8, V1.H8 // 0104184f + VSSHR $16, V0.S2, V1.S2 // 0104300f + VSSHR $16, V0.S4, V1.S4 // 0104304f + VSSHR $32, V0.D2, V1.D2 // 0104604f + VSRSHR $2, V0.B8, V1.B8 // 01240e0f + VSRSHR $2, V0.B16, V1.B16 // 01240e4f + VSRSHR $8, V0.H4, V1.H4 // 0124180f + VSRSHR $8, V0.H8, V1.H8 // 0124184f + VSRSHR $16, V0.S2, V1.S2 // 0124300f + VSRSHR $16, V0.S4, V1.S4 // 0124304f + VSRSHR $32, V0.D2, V1.D2 // 0124604f VSHL $56, V1.D2, V2.D2 // 2254784f VSHL $24, V1.S4, V2.S4 // 2254384f VSHL $24, V1.S2, V2.S2 // 2254380f @@ -161,6 +181,48 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VSHL $8, V1.H8, V2.H8 // 2254184f VSHL $2, V1.B8, V2.B8 // 22540a0f VSHL $2, V1.B16, V2.B16 // 22540a4f + VSQSHL $56, V1.D2, V2.D2 // 2274784f + VSQSHL $24, V1.S4, V2.S4 // 2274384f + VSQSHL $24, V1.S2, V2.S2 // 2274380f + VSQSHL $8, V1.H4, V2.H4 // 2274180f + VSQSHL $8, V1.H8, V2.H8 // 2274184f + VSQSHL $2, V1.B8, V2.B8 // 22740a0f + VSQSHL $2, V1.B16, V2.B16 // 22740a4f + VUQSHL $56, V1.D2, V2.D2 // 2274786f + VUQSHL $24, V1.S4, V2.S4 // 2274386f + VUQSHL $24, V1.S2, V2.S2 // 2274382f + VUQSHL $8, V1.H4, V2.H4 // 2274182f + VUQSHL $8, V1.H8, V2.H8 // 2274186f + VUQSHL $2, V1.B8, V2.B8 // 22740a2f + VUQSHL $2, V1.B16, V2.B16 // 22740a6f + VSSHL V1.D2, V2.D2, V3.D2 // 4344e14e + VSSHL V1.S4, V2.S4, V3.S4 // 4344a14e + VSSHL V1.S2, V2.S2, V3.S2 // 4344a10e + VSSHL V1.H4, V2.H4, V3.H4 // 4344610e + VSSHL V1.H8, V2.H8, V3.H8 // 4344614e + VSSHL V1.B8, V2.B8, V3.B8 // 4344210e + VSSHL V1.B16, V2.B16, V3.B16 // 4344214e + VUSHL V1.D2, V2.D2, V3.D2 // 4344e16e + VUSHL V1.S4, V2.S4, V3.S4 // 4344a16e + VUSHL V1.S2, V2.S2, V3.S2 // 4344a12e + VUSHL V1.H4, V2.H4, V3.H4 // 4344612e + VUSHL V1.H8, V2.H8, V3.H8 // 4344616e + VUSHL V1.B8, V2.B8, V3.B8 // 4344212e + VUSHL V1.B16, V2.B16, V3.B16 // 4344216e + VSQSHL V1.D2, V2.D2, V3.D2 // 434ce14e + VSQSHL V1.S4, V2.S4, V3.S4 // 434ca14e + VSQSHL V1.S2, V2.S2, V3.S2 // 434ca10e + VSQSHL V1.H4, V2.H4, V3.H4 // 434c610e + VSQSHL V1.H8, V2.H8, V3.H8 // 434c614e + VSQSHL V1.B8, V2.B8, V3.B8 // 434c210e + VSQSHL V1.B16, V2.B16, V3.B16 // 434c214e + VUQSHL V1.D2, V2.D2, V3.D2 // 434ce16e + VUQSHL V1.S4, V2.S4, V3.S4 // 434ca16e + VUQSHL V1.S2, V2.S2, V3.S2 // 434ca12e + VUQSHL V1.H4, V2.H4, V3.H4 // 434c612e + VUQSHL V1.H8, V2.H8, V3.H8 // 434c616e + VUQSHL V1.B8, V2.B8, V3.B8 // 434c212e + VUQSHL V1.B16, V2.B16, V3.B16 // 434c216e VSRI $56, V1.D2, V2.D2 // 2244486f VSRI $24, V1.S4, V2.S4 // 2244286f VSRI $24, V1.S2, V2.S2 // 2244282f diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 71c025ca55..72a22896c1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -147,6 +147,10 @@ TEXT errors(SB),$0 VRBIT V1.H4, V2.H4 // ERROR "invalid arrangement" VUSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement" VUSHR $127, V1.D2, V2.D2 // ERROR "shift out of range" + VSSHR $127, V1.D2, V2.D2 // ERROR "shift out of range" + VSSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement" + VSRSHR $127, V1.D2, V2.D2 // ERROR "shift out of range" + VSRSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement" VLD1.P (R8)(R9.SXTX<<2), [V2.B16] // ERROR "invalid extended register" VLD1.P (R8)(R9<<2), [V2.B16] // ERROR "invalid extended register" VST1.P [V1.B16], (R8)(R9.UXTW) // ERROR "invalid extended register" @@ -379,6 +383,10 @@ TEXT errors(SB),$0 VUMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch" VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" + VSSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" + VUSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" + VSQSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" + VUQSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register" CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register" CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous" @@ -429,4 +437,12 @@ TEXT errors(SB),$0 AUTIA1716 $45 // ERROR "illegal combination" AUTIB1716 R0 // ERROR "illegal combination" SB $1 // ERROR "illegal combination" + + // VSHRN/VSHRN2 error test cases - invalid arrangements + VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement" + VSHRN $8, V1.S4, V0.S4 // ERROR "invalid arrangement" + VSHRN $8, V1.H8, V0.H8 // ERROR "invalid arrangement" + VSHRN2 $8, V1.B8, V0.B16 // ERROR "invalid arrangement" + VSHRN2 $8, V1.S4, V0.S4 // ERROR "invalid arrangement" + VSHRN2 $8, V1.H8, V0.H8 // ERROR "invalid arrangement" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index fdc42eabaa..56f68756fd 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -1161,7 +1161,13 @@ const ( AVREV32 AVREV64 AVSHL + AVSHRN + AVSHRN2 AVSLI + AVSQSHL + AVSSHL + AVUSHL + AVUQSHL AVSRI AVST1 AVST2 @@ -1180,6 +1186,8 @@ const ( AVUSHLL AVUSHLL2 AVUSHR + AVSRSHR + AVSSHR AVUSRA AVUXTL AVUXTL2 diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 04986e1748..e40c043edd 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -517,7 +517,13 @@ var Anames = []string{ "VREV32", "VREV64", "VSHL", + "VSHRN", + "VSHRN2", "VSLI", + "VSQSHL", + "VSSHL", + "VUSHL", + "VUQSHL", "VSRI", "VST1", "VST2", @@ -536,6 +542,8 @@ var Anames = []string{ "VUSHLL", "VUSHLL2", "VUSHR", + "VSRSHR", + "VSSHR", "VUSRA", "VUXTL", "VUXTL2", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index ecc62251a0..0d8c1f417e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -542,7 +542,9 @@ var optab = []Optab{ {AVEXT, C_VCON, C_ARNG, C_ARNG, C_ARNG, C_NONE, 94, 4, 0, 0, 0}, {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, C_NONE, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 95, 4, 0, 0, 0}, + {AVSQSHL, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, + {AVSQSHL, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, C_NONE, 102, 4, 0, 0, 0}, {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 105, 4, 0, 0, 0}, @@ -3217,6 +3219,8 @@ func buildop(ctxt *obj.Link) { oprangeset(AVBIT, t) oprangeset(AVCMTST, t) oprangeset(AVCMHI, t) + oprangeset(AVSSHL, t) + oprangeset(AVUSHL, t) oprangeset(AVCMHS, t) oprangeset(AVUMAX, t) oprangeset(AVUMIN, t) @@ -3277,6 +3281,13 @@ func buildop(ctxt *obj.Link) { oprangeset(AVSRI, t) oprangeset(AVSLI, t) oprangeset(AVUSRA, t) + oprangeset(AVSSHR, t) + oprangeset(AVSRSHR, t) + oprangeset(AVSHRN, t) + oprangeset(AVSHRN2, t) + + case AVSQSHL: + oprangeset(AVUQSHL, t) case AVREV32: oprangeset(AVCNT, t) @@ -5431,14 +5442,15 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { af := int((p.Reg >> 5) & 15) shift := int(p.From.Offset) - if af != at { + if af != at && p.As != AVSHRN && p.As != AVSHRN2 { c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) + at = af } var Q uint32 var imax, esize int - switch af { + switch at { case ARNG_8B, ARNG_4H, ARNG_2S: Q = 0 case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D: @@ -5447,29 +5459,44 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p) } - switch af { + atwice := -1 + switch at { case ARNG_8B, ARNG_16B: imax = 15 esize = 8 + atwice = ARNG_8H case ARNG_4H, ARNG_8H: imax = 31 esize = 16 + atwice = ARNG_4S case ARNG_2S, ARNG_4S: imax = 63 esize = 32 + atwice = ARNG_2D case ARNG_2D: imax = 127 esize = 64 } + switch p.As { + case AVSHRN: + if Q != 0 || atwice != af { + c.ctxt.Diag("invalid arrangement on op: %v", p) + } + case AVSHRN2: + if Q != 1 || atwice != af { + c.ctxt.Diag("invalid arrangement on op: %v", p) + } + } + imm := 0 switch p.As { - case AVUSHR, AVSRI, AVUSRA: + case AVUSHR, AVSRI, AVUSRA, AVSSHR, AVSRSHR, AVSHRN, AVSHRN2: imm = esize*2 - shift if imm < esize || imm > imax { c.ctxt.Diag("shift out of range: %v", p) } - case AVSHL, AVSLI: + case AVSHL, AVSLI, AVSQSHL, AVUQSHL: imm = esize + shift if imm > imax { c.ctxt.Diag("shift out of range: %v", p) @@ -6538,9 +6565,21 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVSUB: op = ASIMDSAME(1, 0, 0x10) + case AVSSHL: + op = ASIMDSAME(0, 0, 0x8) + + case AVUSHL: + op = ASIMDSAME(1, 0, 0x8) + case AVADDP: op = ASIMDSAME(0, 0, 0x17) + case AVSQSHL: + op = ASIMDSAME(0, 0, 0x9) + + case AVUQSHL: + op = ASIMDSAME(1, 0, 0x9) + case AVAND: op = ASIMDSAME(0, 0, 0x03) @@ -6895,9 +6934,24 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHR: return ASIMDSHF(1, 0x00) + case AVSSHR: + return ASIMDSHF(0, 0x00) + + case AVSRSHR: + return ASIMDSHF(0, 0x04) + case AVSHL: return ASIMDSHF(0, 0x0A) + case AVSQSHL: + return ASIMDSHF(0, 0xE) + + case AVUQSHL: + return ASIMDSHF(1, 0xE) + + case AVSHRN, AVSHRN2: + return ASIMDSHF(0, 0x10) + case AVSRI: return ASIMDSHF(1, 0x08) |
