diff options
| author | erifan01 <eric.fang@arm.com> | 2018-01-26 10:18:50 +0000 |
|---|---|---|
| committer | Brad Fitzpatrick <bradfitz@golang.org> | 2018-02-22 03:55:53 +0000 |
| commit | f5de42001df2e61233c7ec7bbbd014bbaeaee242 (patch) | |
| tree | 639d5fb87f8623a636a81d30c86628ba05127aa2 /src/cmd/asm | |
| parent | c18ff1846592194a6a894f26f782e25b816ae73e (diff) | |
| download | go-f5de42001df2e61233c7ec7bbbd014bbaeaee242.tar.xz | |
cmd/asm: add arm64 instructions for math optimization
Add arm64 HW instructions FMADDD, FMADDS, FMSUBD, FMSUBS, FNMADDD, FNMADDS,
FNMSUBD, FNMSUBS, VFMLA, VFMLS, VMOV (element) for math optimization.
Add check on register element index and test cases.
Change-Id: Ice07c50b1a02d488ad2cde2a4e8aea93f3e3afff
Reviewed-on: https://go-review.googlesource.com/90876
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/asm')
| -rw-r--r-- | src/cmd/asm/internal/arch/arm64.go | 21 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 39 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64error.s | 56 |
3 files changed, 97 insertions, 19 deletions
diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 524a503472..10458b01a0 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -178,18 +178,39 @@ func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, i a.Reg = arm64.REG_SXTX + (reg & 31) + int16(num<<5) a.Offset = int64(((rm & 31) << 16) | (7 << 13) | (uint32(num) << 10)) case "B8": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8B & 15) << 5) case "B16": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_16B & 15) << 5) case "H4": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4H & 15) << 5) case "H8": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8H & 15) << 5) case "S2": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2S & 15) << 5) case "S4": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4S & 15) << 5) case "D2": + if isIndex { + return errors.New("invalid register extension") + } a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2D & 15) << 5) case "B": if !isIndex { diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 18527037b3..f74dc29f77 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -68,6 +68,12 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VADD V1, V3, V3 // 6384e15e VSUB V12, V30, V30 // de87ec7e VSUB V12, V20, V30 // 9e86ec7e + VFMLA V1.D2, V12.D2, V1.D2 // 81cd614e + VFMLA V1.S2, V12.S2, V1.S2 // 81cd210e + VFMLA V1.S4, V12.S4, V1.S4 // 81cd214e + VFMLS V1.D2, V12.D2, V1.D2 // 81cde14e + VFMLS V1.S2, V12.S2, V1.S2 // 81cda10e + VFMLS V1.S4, V12.S4, V1.S4 // 81cda14e // LTYPE1 imsr ',' spreg ',' // { @@ -204,16 +210,20 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 // outcode($1, &$2, NREG, &$4); // } MOVK $1, R1 - VMOV V8.S[1], R1 // 013d0c0e - VMOV V0.D[0], R11 // 0b3c084e - VMOV V0.D[1], R11 // 0b3c184e - VMOV R20, V1.S[0] // 811e044e - VMOV R1, V9.H4 // 290c020e - VMOV R22, V11.D2 // cb0e084e - VMOV V2.B16, V4.B16 // 441ca24e - VMOV V20.S[0], V20 // 9406045e - VREV32 V5.B16, V5.B16 // a508206e - VDUP V19.S[0], V17.S4 // 7106044e + VMOV V8.S[1], R1 // 013d0c0e + VMOV V0.D[0], R11 // 0b3c084e + VMOV V0.D[1], R11 // 0b3c184e + VMOV R20, V1.S[0] // 811e044e + VMOV R1, V9.H4 // 290c020e + VMOV R22, V11.D2 // cb0e084e + VMOV V2.B16, V4.B16 // 441ca24e + VMOV V20.S[0], V20 // 9406045e + VMOV V12.D[0], V12.D[1] // 8c05186e + VMOV V10.S[0], V12.S[1] // 4c050c6e + VMOV V9.H[0], V12.H[1] // 2c05066e + VMOV V8.B[0], V12.B[1] // 0c05036e + VREV32 V5.B16, V5.B16 // a508206e + VDUP V19.S[0], V17.S4 // 7106044e // // B/BL // @@ -367,6 +377,15 @@ again: // } // MADD R1, R2, R3, R4 + FMADDS F1, F3, F2, F4 // 440c011f + FMADDD F4, F5, F4, F4 // 8414441f + FMSUBS F13, F21, F13, F19 // b3d50d1f + FMSUBD F11, F7, F15, F31 // ff9d4b1f + FNMADDS F1, F3, F2, F4 // 440c211f + FNMADDD F1, F3, F2, F4 // 440c611f + FNMSUBS F1, F3, F2, F4 // 448c211f + FNMSUBD F1, F3, F2, F4 // 448c611f + // DMB, HINT // // LDMB imm diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index e4fad9c741..b77dabd4e1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -3,13 +3,51 @@ // license that can be found in the LICENSE file. TEXT errors(SB),$0 - MOVD.P 300(R2), R3 // ERROR "offset out of range [-255,254]" - MOVD.P R3, 344(R2) // ERROR "offset out of range [-255,254]" - VLD1 (R8)(R13), [V2.B16] // ERROR "illegal combination" - VLD1 8(R9), [V2.B16] // ERROR "illegal combination" - VST1 [V1.B16], (R8)(R13) // ERROR "illegal combination" - VST1 [V1.B16], 9(R2) // ERROR "illegal combination" - VLD1 8(R8)(R13), [V2.B16] // ERROR "illegal combination" - ADD R1.UXTB<<5, R2, R3 // ERROR "shift amount out of range 0 to 4" - ADDS R1.UXTX<<7, R2, R3 // ERROR "shift amount out of range 0 to 4" + MOVD.P 300(R2), R3 // ERROR "offset out of range [-255,254]" + MOVD.P R3, 344(R2) // ERROR "offset out of range [-255,254]" + VLD1 (R8)(R13), [V2.B16] // ERROR "illegal combination" + VLD1 8(R9), [V2.B16] // ERROR "illegal combination" + VST1 [V1.B16], (R8)(R13) // ERROR "illegal combination" + VST1 [V1.B16], 9(R2) // ERROR "illegal combination" + VLD1 8(R8)(R13), [V2.B16] // ERROR "illegal combination" + ADD R1.UXTB<<5, R2, R3 // ERROR "shift amount out of range 0 to 4" + ADDS R1.UXTX<<7, R2, R3 // ERROR "shift amount out of range 0 to 4" + VMOV V8.D[2], V12.D[1] // ERROR "register element index out of range 0 to 1" + VMOV V8.S[4], V12.S[1] // ERROR "register element index out of range 0 to 3" + VMOV V8.H[8], V12.H[1] // ERROR "register element index out of range 0 to 7" + VMOV V8.B[16], V12.B[1] // ERROR "register element index out of range 0 to 15" + VMOV V8.D[0], V12.S[1] // ERROR "operand mismatch" + VMOV V8.D[0], V12.H[1] // ERROR "operand mismatch" + VMOV V8.D[0], V12.B[1] // ERROR "operand mismatch" + VMOV V8.S[0], V12.H[1] // ERROR "operand mismatch" + VMOV V8.S[0], V12.B[1] // ERROR "operand mismatch" + VMOV V8.H[0], V12.B[1] // ERROR "operand mismatch" + VMOV V8.B[16], R3 // ERROR "register element index out of range 0 to 15" + VMOV V8.H[9], R3 // ERROR "register element index out of range 0 to 7" + VMOV V8.S[4], R3 // ERROR "register element index out of range 0 to 3" + VMOV V8.D[2], R3 // ERROR "register element index out of range 0 to 1" + VDUP V8.B[16], R3.B16 // ERROR "register element index out of range 0 to 15" + VDUP V8.B[17], R3.B8 // ERROR "register element index out of range 0 to 15" + VDUP V8.H[9], R3.H4 // ERROR "register element index out of range 0 to 7" + VDUP V8.H[9], R3.H8 // ERROR "register element index out of range 0 to 7" + VDUP V8.S[4], R3.S2 // ERROR "register element index out of range 0 to 3" + VDUP V8.S[4], R3.S4 // ERROR "register element index out of range 0 to 3" + VDUP V8.D[2], R3.D2 // ERROR "register element index out of range 0 to 1" + VFMLA V1.D2, V12.D2, V3.S2 // ERROR "operand mismatch" + VFMLA V1.S2, V12.S2, V3.D2 // ERROR "operand mismatch" + VFMLA V1.S4, V12.S2, V3.D2 // ERROR "operand mismatch" + VFMLA V1.H4, V12.H4, V3.D2 // ERROR "operand mismatch" + VFMLS V1.S2, V12.S2, V3.S4 // ERROR "operand mismatch" + VFMLS V1.S2, V12.D2, V3.S4 // ERROR "operand mismatch" + VFMLS V1.S2, V12.S4, V3.D2 // ERROR "operand mismatch" + VFMLA V1.B8, V12.B8, V3.B8 // ERROR "invalid arrangement" + VFMLA V1.B16, V12.B16, V3.B16 // ERROR "invalid arrangement" + VFMLA V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VFMLA V1.H8, V12.H8, V3.H8 // ERROR "invalid arrangement" + VFMLA V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VFMLS V1.B8, V12.B8, V3.B8 // ERROR "invalid arrangement" + VFMLS V1.B16, V12.B16, V3.B16 // ERROR "invalid arrangement" + VFMLS V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VFMLS V1.H8, V12.H8, V3.H8 // ERROR "invalid arrangement" + VFMLS V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" RET |
