diff options
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 62 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64enc.s | 14 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 14 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 14 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 83 |
5 files changed, 180 insertions, 7 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index ad46e479e4..185f31c715 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -199,6 +199,21 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VSRSHR $16, V0.S2, V1.S2 // 0124300f VSRSHR $16, V0.S4, V1.S4 // 0124304f VSRSHR $32, V0.D2, V1.D2 // 0124604f + VFSQRT V0.S2, V1.S2 // 01f8a12e + VFSQRT V0.S4, V1.S4 // 01f8a16e + VFSQRT V0.D2, V1.D2 // 01f8e16e + VFRINTN V0.S2, V1.S2 // 0188210e + VFRINTN V0.S4, V1.S4 // 0188214e + VFRINTN V0.D2, V1.D2 // 0188614e + VFRINTP V0.S2, V1.S2 // 0188a10e + VFRINTP V0.S4, V1.S4 // 0188a14e + VFRINTP V0.D2, V1.D2 // 0188e14e + VFRINTM V0.S2, V1.S2 // 0198210e + VFRINTM V0.S4, V1.S4 // 0198214e + VFRINTM V0.D2, V1.D2 // 0198614e + VFRINTZ V0.S2, V1.S2 // 0198a10e + VFRINTZ V0.S4, V1.S4 // 0198a14e + VFRINTZ V0.D2, V1.D2 // 0198e14e VSHL $56, V1.D2, V2.D2 // 2254784f VSHL $24, V1.S4, V2.S4 // 2254384f VSHL $24, V1.S2, V2.S2 // 2254380f @@ -439,6 +454,53 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VREV16 V7.B16, V5.B16 // e518204e VREV16 V7.B8, V5.B8 // e518200e + VABS V0.B8, V1.B8 // 01b8200e + VABS V0.B16, V1.B16 // 01b8204e + VABS V0.H4, V1.H4 // 01b8600e + VABS V0.H8, V1.H8 // 01b8604e + VABS V0.S2, V1.S2 // 01b8a00e + VABS V0.S4, V1.S4 // 01b8a04e + VABS V0.D2, V1.D2 // 01b8e04e + VCLS V0.B8, V1.B8 // 0148200e + VCLS V0.B16, V1.B16 // 0148204e + VCLS V0.H4, V1.H4 // 0148600e + VCLS V0.H8, V1.H8 // 0148604e + VCLS V0.S2, V1.S2 // 0148a00e + VCLS V0.S4, V1.S4 // 0148a04e + VCLZ V0.B8, V1.B8 // 0148202e + VCLZ V0.B16, V1.B16 // 0148206e + VCLZ V0.H4, V1.H4 // 0148602e + VCLZ V0.H8, V1.H8 // 0148606e + VCLZ V0.S2, V1.S2 // 0148a02e + VCLZ V0.S4, V1.S4 // 0148a06e + VNEG V0.B8, V1.B8 // 01b8202e + VNEG V0.B16, V1.B16 // 01b8206e + VNEG V0.H4, V1.H4 // 01b8602e + VNEG V0.H8, V1.H8 // 01b8606e + VNEG V0.S2, V1.S2 // 01b8a02e + VNEG V0.S4, V1.S4 // 01b8a06e + VNEG V0.D2, V1.D2 // 01b8e06e + VFABS V0.S2, V1.S2 // 01f8a00e + VFABS V0.S4, V1.S4 // 01f8a04e + VFABS V0.D2, V1.D2 // 01f8e04e + VFNEG V0.S2, V1.S2 // 01f8a02e + VFNEG V0.S4, V1.S4 // 01f8a06e + VFNEG V0.D2, V1.D2 // 01f8e06e + VSQABS V0.B8, V1.B8 // 0178200e + VSQABS V0.B16, V1.B16 // 0178204e + VSQABS V0.H4, V1.H4 // 0178600e + VSQABS V0.H8, V1.H8 // 0178604e + VSQABS V0.S2, V1.S2 // 0178a00e + VSQABS V0.S4, V1.S4 // 0178a04e + VSQNEG V0.B8, V1.B8 // 0178202e + VSQNEG V0.B16, V1.B16 // 0178206e + VSQNEG V0.H4, V1.H4 // 0178602e + VSQNEG V0.H8, V1.H8 // 0178606e + VSQNEG V0.S2, V1.S2 // 0178a02e + VSQNEG V0.S4, V1.S4 // 0178a06e + VNOT V0.B8, V1.B8 // 0158202e + VNOT V0.B16, V1.B16 // 0158206e + // logical ops // // make sure constants get encoded into an instruction when it could diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s index 3af0880b78..76151a3570 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64enc.s +++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s @@ -426,7 +426,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 YIELD // 3f2003d5 //TODO FABD F0, F5, F11 // abd4a07e //TODO VFABD V30.S2, V8.S2, V24.S2 // 18d5be2e - //TODO VFABS V5.S4, V24.S4 // b8f8a04e + VFABS V5.S4, V24.S4 // b8f8a04e FABSS F2, F28 // 5cc0201e FABSD F0, F14 // 0ec0601e //TODO FACGE F25, F16, F0 // 00ee797e @@ -603,7 +603,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 //TODO VFMULX V12.D[1], V21.D2, V31.D2 // bf9acc6f //TODO FMULX F16, F1, F31 // 3fdc705e //TODO VFMULX V29.S2, V13.S2, V31.S2 // bfdd3d0e - //TODO VFNEG V18.S2, V12.S2 // 4cfaa02e + VFNEG V18.S2, V12.S2 // 4cfaa02e FNEGS F16, F5 // 0542211e FNEGD F31, F31 // ff43611e FNMADDS F17, F22, F6, F20 // d458311f @@ -623,26 +623,26 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 //TODO VFRINTI V21.D2, V31.D2 // bf9ae16e FRINTIS F17, F17 // 31c2271e FRINTID F9, F15 // 2fc1671e - //TODO VFRINTM V9.D2, V27.D2 // 3b99614e + VFRINTM V9.D2, V27.D2 // 3b99614e FRINTMS F24, F16 // 1043251e FRINTMD F5, F2 // a240651e - //TODO VFRINTN V30.S4, V2.S4 // c28b214e + VFRINTN V30.S4, V2.S4 // c28b214e FRINTNS F26, F14 // 4e43241e FRINTND F28, F12 // 8c43641e - //TODO VFRINTP V27.D2, V31.D2 // 7f8be14e + VFRINTP V27.D2, V31.D2 // 7f8be14e FRINTPS F27, F4 // 64c3241e FRINTPD F6, F22 // d6c0641e //TODO VFRINTX V25.D2, V0.D2 // 209b616e FRINTXS F26, F10 // 4a43271e FRINTXD F16, F12 // 0c42671e - //TODO VFRINTZ V25.S4, V27.S4 // 3b9ba14e + VFRINTZ V25.S4, V27.S4 // 3b9ba14e FRINTZS F3, F28 // 7cc0251e FRINTZD F24, F6 // 06c3651e //TODO FRSQRTE F29, F5 // a5dbe17e //TODO VFRSQRTE V18.S2, V1.S2 // 41daa12e //TODO FRSQRTS F17, F7, F24 // f8fcf15e //TODO VFRSQRTS V14.S2, V10.S2, V24.S2 // 58fdae0e - //TODO VFSQRT V2.D2, V21.D2 // 55f8e16e + VFSQRT V2.D2, V21.D2 // 55f8e16e FSQRTS F0, F9 // 09c0211e FSQRTD F14, F27 // dbc1611e FSUBS F25, F23, F0 // e03a391e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 3d7173155a..6ef3ac5105 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -1122,6 +1122,8 @@ const ( AVBIT AVBSL AVCMEQ + AVCLS + AVCLZ AVCMGE AVCMGT AVCMHI @@ -1157,6 +1159,8 @@ const ( AVSQADD AVUQADD AVSQSUB + AVSQABS + AVSQNEG AVUQSUB AVUHADD AVURHADD @@ -1186,6 +1190,16 @@ const ( AVREV16 AVREV32 AVREV64 + AVABS + AVFABS + AVFNEG + AVFSQRT + AVFRINTN + AVFRINTP + AVFRINTM + AVFRINTZ + AVNEG + AVNOT AVSHL AVSHRN AVSHRN2 diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 1f07580ae7..a4de13ed1c 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -478,6 +478,8 @@ var Anames = []string{ "VBIT", "VBSL", "VCMEQ", + "VCLS", + "VCLZ", "VCMGE", "VCMGT", "VCMHI", @@ -513,6 +515,8 @@ var Anames = []string{ "VSQADD", "VUQADD", "VSQSUB", + "VSQABS", + "VSQNEG", "VUQSUB", "VUHADD", "VURHADD", @@ -542,6 +546,16 @@ var Anames = []string{ "VREV16", "VREV32", "VREV64", + "VABS", + "VFABS", + "VFNEG", + "VFSQRT", + "VFRINTN", + "VFRINTP", + "VFRINTM", + "VFRINTZ", + "VNEG", + "VNOT", "VSHL", "VSHRN", "VSHRN2", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 4fd79f3b4b..00af21857c 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -3323,9 +3323,23 @@ func buildop(ctxt *obj.Link) { case AVREV32: oprangeset(AVCNT, t) + oprangeset(AVCLS, t) + oprangeset(AVCLZ, t) oprangeset(AVRBIT, t) oprangeset(AVREV64, t) oprangeset(AVREV16, t) + oprangeset(AVABS, t) + oprangeset(AVNEG, t) + oprangeset(AVFABS, t) + oprangeset(AVFNEG, t) + oprangeset(AVFSQRT, t) + oprangeset(AVFRINTN, t) + oprangeset(AVFRINTP, t) + oprangeset(AVFRINTM, t) + oprangeset(AVFRINTZ, t) + oprangeset(AVSQABS, t) + oprangeset(AVSQNEG, t) + oprangeset(AVNOT, t) case AVZIP1: oprangeset(AVZIP2, t) @@ -5198,6 +5212,9 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { case ARNG_4S: Q = 1 size = 2 + case ARNG_2D: + Q = 1 + size = 3 default: c.ctxt.Diag("invalid arrangement: %v\n", p) } @@ -5214,6 +5231,30 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { c.ctxt.Diag("invalid arrangement: %v", p) } + if p.As == AVNOT && (af != ARNG_8B && af != ARNG_16B) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + // VCLS and VCLZ only support integer arrangements (B, H, S), not D arrangements + if (p.As == AVCLS || p.As == AVCLZ) && (af == ARNG_1D || af == ARNG_2D) { + c.ctxt.Diag("invalid arrangement: %v", p) + } + + // Floating-point instructions only allow floating-point arrangements + // and use 1-bit size field: 0 for S arrangements, 1 for D arrangements + if p.As == AVFABS || p.As == AVFNEG || p.As == AVFSQRT || + p.As == AVFRINTN || p.As == AVFRINTP || p.As == AVFRINTM || p.As == AVFRINTZ { + if af != ARNG_2S && af != ARNG_4S && af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + // Override size for floating-point instructions: 0 for S, 1 for D + if af == ARNG_2S || af == ARNG_4S { + size = 0 + } else if af == ARNG_2D { + size = 1 + } + } + if p.As == AVRBIT { size = 1 } @@ -6681,6 +6722,12 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVCNT: op = ASIMDMISC(0, 0, 0x05) + case AVCLS: + op = ASIMDMISC(0, 0, 0x04) + + case AVCLZ: + op = ASIMDMISC(1, 0, 0x04) + case AVZIP1: op = ASIMDPERM(0x3) @@ -6711,6 +6758,42 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVREV64: op = ASIMDMISC(0, 0, 0x00) + case AVABS: + op = ASIMDMISC(0, 0, 0xB) + + case AVNEG: + op = ASIMDMISC(1, 0, 0xB) + + case AVFABS: + op = ASIMDMISC(0, 2, 0xF) + + case AVFNEG: + op = ASIMDMISC(1, 2, 0xF) + + case AVFSQRT: + op = ASIMDMISC(1, 2, 0x1F) + + case AVFRINTN: + op = ASIMDMISC(0, 0, 0x18) + + case AVFRINTP: + op = ASIMDMISC(0, 2, 0x18) + + case AVFRINTM: + op = ASIMDMISC(0, 0, 0x19) + + case AVFRINTZ: + op = ASIMDMISC(0, 2, 0x19) + + case AVSQABS: + op = ASIMDMISC(0, 0, 0x7) + + case AVSQNEG: + op = ASIMDMISC(1, 0, 0x7) + + case AVNOT: + op = ASIMDMISC(1, 0, 0x5) + case AVMOV: op = 7<<25 | 5<<21 | 7<<10 |
