diff options
| author | Alexander Musman <alexander.musman@gmail.com> | 2026-04-02 11:00:59 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-04-08 03:51:51 -0700 |
| commit | b1c8857f95581ef3cb3daa0767985bba9f72320f (patch) | |
| tree | 9ec36f73935b56ff344ffda2cfc797fd23bd4b0d /src/cmd | |
| parent | 9111d85e2f699672d67dcee1d6432a940f5306e1 (diff) | |
| download | go-b1c8857f95581ef3cb3daa0767985bba9f72320f.tar.xz | |
cmd/internal/obj/arm64: add ASIMD arithmetic instructions
Add encoding support for ASIMD three-register instructions covering
floating-point, saturating, halving, integer multiply/accumulate,
min/max (including pairwise variants), and bitwise operations.
These belong to the "Advanced SIMD Three-register (same)" instruction
class defined by the ARM architecture, meaning the two source registers
use the same element arrangement (e.g., both .S4 or both .D2). In the
assembler they share a common encoding path using the ASIMDSAME()
macro.
New instructions by group:
Floating-point arithmetic: VFADD, VFSUB, VFMUL, VFDIV
Floating-point min/max: VFMAX, VFMAXNM, VFMIN, VFMINNM
Pairwise floating-point: VFADDP, VFMAXP, VFMINP, VFMAXNMP,
VFMINNMP
Saturating arithmetic: VSQADD, VUQADD, VSQSUB, VUQSUB
Average (halving add): VSHADD, VSRHADD, VUHADD, VURHADD
Integer multiply/accum: VMUL, VMLA, VMLS
Integer min/max: VSMAX, VSMIN
Pairwise integer min/max: VSMAXP, VSMINP, VUMAXP, VUMINP
Bitwise: VBIC, VORN
Change-Id: I732c84123ad1f302260514fdfe0d020787da017b
Reviewed-on: https://go-review.googlesource.com/c/go/+/762200
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 42 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64enc.s | 24 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64error.s | 30 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 32 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 32 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 138 |
6 files changed, 281 insertions, 17 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index bb0c9e2c05..ad46e479e4 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -127,8 +127,33 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VFCMGT $(0.0), V2.S4, V3.S4 // 43c8a04e VFCMLE $(0.0), V2.S4, V3.S4 // 43d8a06e VFCMLT $(0.0), V2.S4, V3.S4 // 43e8a04e + VFADD V0.S4, V0.S4, V1.S4 // 01d4204e + VFADD V0.D2, V0.D2, V1.D2 // 01d4604e + VFSUB V0.S4, V0.S4, V1.S4 // 01d4a04e + VFSUB V0.D2, V0.D2, V1.D2 // 01d4e04e + VFMUL V0.S4, V0.S4, V1.S4 // 01dc206e + VFMUL V0.D2, V0.D2, V1.D2 // 01dc606e + VFDIV V0.S4, V0.S4, V1.S4 // 01fc206e + VFDIV V0.D2, V0.D2, V1.D2 // 01fc606e + VSQADD V0.S4, V0.S4, V1.S4 // 010ca04e + VSQADD V0.D2, V0.D2, V1.D2 // 010ce04e + VUQADD V0.S4, V0.S4, V1.S4 // 010ca06e + VUQADD V0.D2, V0.D2, V1.D2 // 010ce06e + VSQSUB V0.S4, V0.S4, V1.S4 // 012ca04e + VSQSUB V0.D2, V0.D2, V1.D2 // 012ce04e + VUQSUB V0.S4, V0.S4, V1.S4 // 012ca06e + VUQSUB V0.D2, V0.D2, V1.D2 // 012ce06e + VMUL V0.S4, V0.S4, V1.S4 // 019ca04e + VMLA V0.S4, V0.S4, V1.S4 // 0194a04e + VMLS V0.S4, V0.S4, V1.S4 // 0194a06e VORR V5.B16, V4.B16, V3.B16 // 831ca54e + VBIC V0.B8, V1.B8, V2.B8 // 221c600e + VORN V0.B16, V1.B16, V2.B16 // 221ce04e VADD V16.S4, V5.S4, V9.S4 // a984b04e + VSHADD V0.S4, V1.S4, V2.S4 // 2204a04e + VSRHADD V0.S4, V1.S4, V2.S4 // 2214a04e + VUHADD V0.S4, V1.S4, V2.S4 // 2204a06e + VURHADD V0.S4, V1.S4, V2.S4 // 2214a06e VEOR V0.B16, V1.B16, V0.B16 // 201c206e VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f @@ -337,6 +362,23 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VUMIN V3.H8, V2.H8, V1.H8 // 416c636e VUMIN V3.S2, V2.S2, V1.S2 // 416ca32e VUMIN V3.S4, V2.S4, V1.S4 // 416ca36e + VSMAX V3.S4, V2.S4, V1.S4 // 4164a34e + VSMIN V3.S4, V2.S4, V1.S4 // 416ca34e + VSMAXP V3.S4, V2.S4, V1.S4 // 41a4a34e + VSMINP V3.S4, V2.S4, V1.S4 // 41aca34e + VUMAXP V3.S4, V2.S4, V1.S4 // 41a4a36e + VUMINP V3.S4, V2.S4, V1.S4 // 41aca36e + VFMAX V3.S4, V2.S4, V1.S4 // 41f4234e + VFMIN V3.S4, V2.S4, V1.S4 // 41f4a34e + VFMAXNM V3.S4, V2.S4, V1.S4 // 41c4234e + VFMINNM V3.S4, V2.S4, V1.S4 // 41c4a34e + VFMAXP V3.S4, V2.S4, V1.S4 // 41f4236e + VFADDP V3.S4, V2.S4, V1.S4 // 41d4236e + VFADDP V3.S2, V2.S2, V1.S2 // 41d4232e + VFADDP V3.D2, V2.D2, V1.D2 // 41d4636e + VFMINP V3.S4, V2.S4, V1.S4 // 41f4a36e + VFMAXNMP V3.S4, V2.S4, V1.S4 // 41c4236e + VFMINNMP V3.S4, V2.S4, V1.S4 // 41c4a36e FCCMPS LT, F1, F2, $1 // 41b4211e FMADDS F1, F3, F2, F4 // 440c011f FMADDD F4, F5, F4, F4 // 8414441f diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s index dca6c78de4..3af0880b78 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64enc.s +++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s @@ -433,11 +433,11 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 //TODO VFACGE V11.S2, V15.S2, V9.S2 // e9ed2b2e //TODO FACGT F20, F16, F27 // 1beef47e //TODO VFACGT V15.S4, V25.S4, V22.S4 // 36efaf6e - //TODO VFADD V21.D2, V10.D2, V21.D2 // 55d5754e + VFADD V21.D2, V10.D2, V21.D2 // 55d5754e FADDS F12, F2, F10 // 4a282c1e FADDD F24, F14, F12 // cc29781e //TODO VFADDP V4.D2, F13 // 8dd8707e - //TODO VFADDP V30.S4, V3.S4, V11.S4 // 6bd43e6e + VFADDP V30.S4, V3.S4, V11.S4 // 6bd43e6e FCCMPS LE, F17, F12, $14 // 8ed5311e FCCMPD HI, F11, F15, $15 // ef856b1e FCCMPES HS, F28, F13, $13 // bd253c1e @@ -547,34 +547,34 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 FCVTZUS F12, R29 // 9d01399e FCVTZUDW F27, R22 // 7603791e FCVTZUD F25, R22 // 3603799e - //TODO VFDIV V6.D2, V1.D2, V27.D2 // 3bfc666e + VFDIV V6.D2, V1.D2, V27.D2 // 3bfc666e FDIVS F16, F10, F20 // 5419301e FDIVD F11, F25, F30 // 3e1b6b1e FMADDS F15, F2, F8, F1 // 01090f1f FMADDD F15, F21, F25, F9 // 29574f1f - //TODO VFMAX V23.D2, V27.D2, V14.D2 // 6ef7774e + VFMAX V23.D2, V27.D2, V14.D2 // 6ef7774e FMAXS F5, F28, F27 // 9b4b251e FMAXD F12, F31, F31 // ff4b6c1e - //TODO VFMAXNM V3.D2, V12.D2, V27.D2 // 9bc5634e + VFMAXNM V3.D2, V12.D2, V27.D2 // 9bc5634e FMAXNMS F11, F24, F12 // 0c6b2b1e FMAXNMD F20, F6, F16 // d068741e //TODO VFMAXNMP V3.S2, F2 // 62c8307e - //TODO VFMAXNMP V25.S2, V4.S2, V2.S2 // 82c4392e + VFMAXNMP V25.S2, V4.S2, V2.S2 // 82c4392e //TODO VFMAXNMV V14.S4, F15 // cfc9306e //TODO VFMAXP V3.S2, F27 // 7bf8307e - //TODO VFMAXP V29.S2, V30.S2, V9.S2 // c9f73d2e + VFMAXP V29.S2, V30.S2, V9.S2 // c9f73d2e //TODO VFMAXV V13.S4, F14 // aef9306e - //TODO VFMIN V19.D2, V30.D2, V7.D2 // c7f7f34e + VFMIN V19.D2, V30.D2, V7.D2 // c7f7f34e FMINS F26, F18, F30 // 5e5a3a1e FMIND F29, F4, F21 // 95587d1e - //TODO VFMINNM V21.S4, V5.S4, V1.S4 // a1c4b54e + VFMINNM V21.S4, V5.S4, V1.S4 // a1c4b54e FMINNMS F23, F20, F1 // 817a371e FMINNMD F8, F3, F24 // 7878681e //TODO VFMINNMP V16.D2, F12 // 0ccaf07e - //TODO VFMINNMP V10.S4, V25.S4, V27.S4 // 3bc7aa6e + VFMINNMP V10.S4, V25.S4, V27.S4 // 3bc7aa6e //TODO VFMINNMV V8.S4, F3 // 03c9b06e //TODO VFMINP V10.S2, F20 // 54f9b07e - //TODO VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e + VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e //TODO VFMINV V11.S4, F9 // 69f9b06e //TODO VFMLA V6.S[0], F2, F14 // 4e10865f //TODO VFMLA V28.S[2], V2.S2, V30.S2 // 5e189c0f @@ -596,7 +596,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8 FMSUBD F11, F7, F15, F31 // ff9d4b1f //TODO VFMUL V9.S[2], F21, F19 // b39a895f //TODO VFMUL V26.S[2], V26.S2, V2.S2 // 429b9a0f - //TODO VFMUL V21.D2, V17.D2, V25.D2 // 39de756e + VFMUL V21.D2, V17.D2, V25.D2 // 39de756e FMULS F0, F6, F24 // d808201e FMULD F5, F29, F9 // a90b651e //TODO VFMULX V26.S[2], F20, F8 // 889a9a7f diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 72a22896c1..e9a6e69a09 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -381,6 +381,20 @@ TEXT errors(SB),$0 VUMIN V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" VUMAX V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" VUMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch" + VSMAX V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VSMIN V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VSMAX V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" + VSMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch" + VSMAXP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VSMINP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VUMAXP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VUMINP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement" + VFMAX V1.B8, V2.B8, V3.B8 // ERROR "invalid arrangement" + VFMIN V1.B16, V2.B16, V3.B16 // ERROR "invalid arrangement" + VFADDP V1.B8, V2.B8, V3.B8 // ERROR "invalid arrangement" + VFADDP V1.B16, V2.B16, V3.B16 // ERROR "invalid arrangement" + VFADDP V1.H4, V2.H4, V3.H4 // ERROR "invalid arrangement" + VFADDP V1.H8, V2.H8, V3.H8 // ERROR "invalid arrangement" VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" VSSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch" @@ -438,6 +452,22 @@ TEXT errors(SB),$0 AUTIB1716 R0 // ERROR "illegal combination" SB $1 // ERROR "illegal combination" + // VMUL family invalid arrangement tests + VMUL V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + VMLA V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + VMLS V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + // VMUL family operand mismatch tests + VMUL V0.S4, V0.D2, V1.S4 // ERROR "operand mismatch" + VMLA V0.H4, V0.S4, V1.H4 // ERROR "operand mismatch" + VMLS V0.B8, V0.H4, V1.B8 // ERROR "operand mismatch" + // VBIC/VORN family invalid arrangement tests + VBIC V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement" + VBIC V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement" + VBIC V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + VORN V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement" + VORN V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement" + VORN V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + // VSHRN/VSHRN2 error test cases - invalid arrangements VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement" VSHRN $8, V1.S4, V0.S4 // ERROR "invalid arrangement" diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 56f68756fd..3d7173155a 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -1113,6 +1113,8 @@ const ( AUXTW AVADD AVADDP + AVSHADD + AVSRHADD AVADDV AVAND AVBCAX @@ -1139,6 +1141,28 @@ const ( AVFCMGT AVFCMLE AVFCMLT + AVFADDP + AVFADD + AVFSUB + AVFMUL + AVFDIV + AVFMAX + AVFMAXNM + AVFMAXP + AVFMIN + AVFMINNM + AVFMINP + AVFMAXNMP + AVFMINNMP + AVSQADD + AVUQADD + AVSQSUB + AVUQSUB + AVUHADD + AVURHADD + AVMUL + AVMLA + AVMLS AVLD1 AVLD1R AVLD2 @@ -1153,6 +1177,8 @@ const ( AVMOVQ AVMOVS AVORR + AVORN + AVBIC AVPMULL AVPMULL2 AVRAX1 @@ -1174,6 +1200,10 @@ const ( AVST3 AVST4 AVSUB + AVSMAX + AVSMIN + AVSMAXP + AVSMINP AVTBL AVTBX AVTRN1 @@ -1183,6 +1213,8 @@ const ( AVUADDW2 AVUMAX AVUMIN + AVUMAXP + AVUMINP AVUSHLL AVUSHLL2 AVUSHR diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index e40c043edd..1f07580ae7 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -469,6 +469,8 @@ var Anames = []string{ "UXTW", "VADD", "VADDP", + "VSHADD", + "VSRHADD", "VADDV", "VAND", "VBCAX", @@ -495,6 +497,28 @@ var Anames = []string{ "VFCMGT", "VFCMLE", "VFCMLT", + "VFADDP", + "VFADD", + "VFSUB", + "VFMUL", + "VFDIV", + "VFMAX", + "VFMAXNM", + "VFMAXP", + "VFMIN", + "VFMINNM", + "VFMINP", + "VFMAXNMP", + "VFMINNMP", + "VSQADD", + "VUQADD", + "VSQSUB", + "VUQSUB", + "VUHADD", + "VURHADD", + "VMUL", + "VMLA", + "VMLS", "VLD1", "VLD1R", "VLD2", @@ -509,6 +533,8 @@ var Anames = []string{ "VMOVQ", "VMOVS", "VORR", + "VORN", + "VBIC", "VPMULL", "VPMULL2", "VRAX1", @@ -530,6 +556,10 @@ var Anames = []string{ "VST3", "VST4", "VSUB", + "VSMAX", + "VSMIN", + "VSMAXP", + "VSMINP", "VTBL", "VTBX", "VTRN1", @@ -539,6 +569,8 @@ var Anames = []string{ "VUADDW2", "VUMAX", "VUMIN", + "VUMAXP", + "VUMINP", "VUSHLL", "VUSHLL2", "VUSHR", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 0d8c1f417e..4fd79f3b4b 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -3215,15 +3215,34 @@ func buildop(ctxt *obj.Link) { oprangeset(AVAND, t) oprangeset(AVORR, t) oprangeset(AVEOR, t) + oprangeset(AVBIC, t) + oprangeset(AVORN, t) oprangeset(AVBSL, t) oprangeset(AVBIT, t) oprangeset(AVCMTST, t) oprangeset(AVCMHI, t) + oprangeset(AVSQADD, t) + oprangeset(AVUQADD, t) + oprangeset(AVSQSUB, t) + oprangeset(AVUQSUB, t) + oprangeset(AVMUL, t) + oprangeset(AVMLA, t) + oprangeset(AVMLS, t) + oprangeset(AVSHADD, t) + oprangeset(AVSRHADD, t) oprangeset(AVSSHL, t) oprangeset(AVUSHL, t) + oprangeset(AVUHADD, t) + oprangeset(AVURHADD, t) oprangeset(AVCMHS, t) oprangeset(AVUMAX, t) oprangeset(AVUMIN, t) + oprangeset(AVSMAX, t) + oprangeset(AVSMIN, t) + oprangeset(AVSMAXP, t) + oprangeset(AVSMINP, t) + oprangeset(AVUMAXP, t) + oprangeset(AVUMINP, t) oprangeset(AVUZP1, t) oprangeset(AVUZP2, t) oprangeset(AVBIF, t) @@ -3272,6 +3291,19 @@ func buildop(ctxt *obj.Link) { case AVFMLA: oprangeset(AVFMLS, t) + oprangeset(AVFADD, t) + oprangeset(AVFSUB, t) + oprangeset(AVFMUL, t) + oprangeset(AVFDIV, t) + oprangeset(AVFMAX, t) + oprangeset(AVFMAXNM, t) + oprangeset(AVFMAXP, t) + oprangeset(AVFADDP, t) + oprangeset(AVFMIN, t) + oprangeset(AVFMINNM, t) + oprangeset(AVFMINP, t) + oprangeset(AVFMAXNMP, t) + oprangeset(AVFMINNMP, t) case AVPMULL: oprangeset(AVPMULL2, t) @@ -4782,27 +4814,27 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { } switch p.As { - case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF, AVBIC, AVORN: if af != ARNG_16B && af != ARNG_8B { c.ctxt.Diag("invalid arrangement: %v", p) } - case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT: + case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT, AVFADD, AVFSUB, AVFMUL, AVFDIV, AVFMAX, AVFMAXNM, AVFMAXP, AVFADDP, AVFMIN, AVFMINNM, AVFMINP, AVFMAXNMP, AVFMINNMP: if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { c.ctxt.Diag("invalid arrangement: %v", p) } - case AVUMAX, AVUMIN: + case AVUMAX, AVUMIN, AVUMAXP, AVUMINP, AVMUL, AVMLA, AVMLS, AVSMAX, AVSMIN, AVSMAXP, AVSMINP: if af == ARNG_2D { c.ctxt.Diag("invalid arrangement: %v", p) } } switch p.As { - case AVAND, AVEOR: + case AVAND, AVEOR, AVBIC, AVORN: size = 0 case AVBSL: size = 1 case AVORR, AVBIT, AVBIF: size = 2 - case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT: + case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT, AVFADD, AVFSUB, AVFMUL, AVFDIV, AVFMAX, AVFMAXNM, AVFMAXP, AVFADDP, AVFMIN, AVFMINNM, AVFMINP, AVFMAXNMP, AVFMINNMP: if af == ARNG_2D { size = 1 } else { @@ -6565,24 +6597,60 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVSUB: op = ASIMDSAME(1, 0, 0x10) + case AVSHADD: + op = ASIMDSAME(0, 0, 0x0) + + case AVSRHADD: + op = ASIMDSAME(0, 0, 0x2) + case AVSSHL: op = ASIMDSAME(0, 0, 0x8) case AVUSHL: op = ASIMDSAME(1, 0, 0x8) + case AVUHADD: + op = ASIMDSAME(1, 0, 0x0) + + case AVURHADD: + op = ASIMDSAME(1, 0, 0x2) + case AVADDP: op = ASIMDSAME(0, 0, 0x17) + case AVSQADD: + op = ASIMDSAME(0, 0, 0x1) + + case AVUQADD: + op = ASIMDSAME(1, 0, 0x1) + case AVSQSHL: op = ASIMDSAME(0, 0, 0x9) case AVUQSHL: op = ASIMDSAME(1, 0, 0x9) + case AVSQSUB: + op = ASIMDSAME(0, 0, 0x5) + + case AVUQSUB: + op = ASIMDSAME(1, 0, 0x5) + + case AVMUL: + op = ASIMDSAME(0, 0, 0x13) + + case AVMLA: + op = ASIMDSAME(0, 0, 0x12) + + case AVMLS: + op = ASIMDSAME(1, 0, 0x12) + case AVAND: op = ASIMDSAME(0, 0, 0x03) + case AVBIC: + op = ASIMDSAME(0, 1, 0x03) + case AVBCAX: op = 0xCE<<24 | 1<<21 @@ -6628,6 +6696,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVORR: op = ASIMDSAME(0, 2, 0x03) + case AVORN: + op = ASIMDSAME(0, 3, 0x03) + case AVRAX1: op = 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 @@ -6655,6 +6726,45 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVFMLS: op = ASIMDSAME(0, 2, 0x19) + case AVFADD: + op = ASIMDSAME(0, 0, 0x1A) + + case AVFSUB: + op = ASIMDSAME(0, 2, 0x1A) + + case AVFMUL: + op = ASIMDSAME(1, 0, 0x1B) + + case AVFDIV: + op = ASIMDSAME(1, 0, 0x1F) + + case AVFMAX: + op = ASIMDSAME(0, 0, 0x1E) + + case AVFMAXNM: + op = ASIMDSAME(0, 0, 0x18) + + case AVFMAXP: + op = ASIMDSAME(1, 0, 0x1E) + + case AVFADDP: + op = ASIMDSAME(1, 0, 0x1A) + + case AVFMIN: + op = ASIMDSAME(0, 2, 0x1E) + + case AVFMINNM: + op = ASIMDSAME(0, 2, 0x18) + + case AVFMINP: + op = ASIMDSAME(1, 2, 0x1E) + + case AVFMAXNMP: + op = ASIMDSAME(1, 0, 0x18) + + case AVFMINNMP: + op = ASIMDSAME(1, 2, 0x18) + case AVPMULL, AVPMULL2: op = ASIMDDIFF(0, 0xE) @@ -6688,6 +6798,24 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVUMIN: op = ASIMDSAME(1, 0, 0x0D) + case AVUMAXP: + op = ASIMDSAME(1, 0, 0x14) + + case AVUMINP: + op = ASIMDSAME(1, 0, 0x15) + + case AVSMAX: + op = ASIMDSAME(0, 0, 0x0C) + + case AVSMIN: + op = ASIMDSAME(0, 0, 0x0D) + + case AVSMAXP: + op = ASIMDSAME(0, 0, 0x14) + + case AVSMINP: + op = ASIMDSAME(0, 0, 0x15) + case AVUZP1: op = ASIMDPERM(0x1) |
