aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-04-02 11:00:59 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 03:51:51 -0700
commitb1c8857f95581ef3cb3daa0767985bba9f72320f (patch)
tree9ec36f73935b56ff344ffda2cfc797fd23bd4b0d /src
parent9111d85e2f699672d67dcee1d6432a940f5306e1 (diff)
downloadgo-b1c8857f95581ef3cb3daa0767985bba9f72320f.tar.xz
cmd/internal/obj/arm64: add ASIMD arithmetic instructions
Add encoding support for ASIMD three-register instructions covering floating-point, saturating, halving, integer multiply/accumulate, min/max (including pairwise variants), and bitwise operations. These belong to the "Advanced SIMD Three-register (same)" instruction class defined by the ARM architecture, meaning the two source registers use the same element arrangement (e.g., both .S4 or both .D2). In the assembler they share a common encoding path using the ASIMDSAME() macro. New instructions by group: Floating-point arithmetic: VFADD, VFSUB, VFMUL, VFDIV Floating-point min/max: VFMAX, VFMAXNM, VFMIN, VFMINNM Pairwise floating-point: VFADDP, VFMAXP, VFMINP, VFMAXNMP, VFMINNMP Saturating arithmetic: VSQADD, VUQADD, VSQSUB, VUQSUB Average (halving add): VSHADD, VSRHADD, VUHADD, VURHADD Integer multiply/accum: VMUL, VMLA, VMLS Integer min/max: VSMAX, VSMIN Pairwise integer min/max: VSMAXP, VSMINP, VUMAXP, VUMINP Bitwise: VBIC, VORN Change-Id: I732c84123ad1f302260514fdfe0d020787da017b Reviewed-on: https://go-review.googlesource.com/c/go/+/762200 Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s42
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64enc.s24
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64error.s30
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go32
-rw-r--r--src/cmd/internal/obj/arm64/anames.go32
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go138
6 files changed, 281 insertions, 17 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index bb0c9e2c05..ad46e479e4 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -127,8 +127,33 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VFCMGT $(0.0), V2.S4, V3.S4 // 43c8a04e
VFCMLE $(0.0), V2.S4, V3.S4 // 43d8a06e
VFCMLT $(0.0), V2.S4, V3.S4 // 43e8a04e
+ VFADD V0.S4, V0.S4, V1.S4 // 01d4204e
+ VFADD V0.D2, V0.D2, V1.D2 // 01d4604e
+ VFSUB V0.S4, V0.S4, V1.S4 // 01d4a04e
+ VFSUB V0.D2, V0.D2, V1.D2 // 01d4e04e
+ VFMUL V0.S4, V0.S4, V1.S4 // 01dc206e
+ VFMUL V0.D2, V0.D2, V1.D2 // 01dc606e
+ VFDIV V0.S4, V0.S4, V1.S4 // 01fc206e
+ VFDIV V0.D2, V0.D2, V1.D2 // 01fc606e
+ VSQADD V0.S4, V0.S4, V1.S4 // 010ca04e
+ VSQADD V0.D2, V0.D2, V1.D2 // 010ce04e
+ VUQADD V0.S4, V0.S4, V1.S4 // 010ca06e
+ VUQADD V0.D2, V0.D2, V1.D2 // 010ce06e
+ VSQSUB V0.S4, V0.S4, V1.S4 // 012ca04e
+ VSQSUB V0.D2, V0.D2, V1.D2 // 012ce04e
+ VUQSUB V0.S4, V0.S4, V1.S4 // 012ca06e
+ VUQSUB V0.D2, V0.D2, V1.D2 // 012ce06e
+ VMUL V0.S4, V0.S4, V1.S4 // 019ca04e
+ VMLA V0.S4, V0.S4, V1.S4 // 0194a04e
+ VMLS V0.S4, V0.S4, V1.S4 // 0194a06e
VORR V5.B16, V4.B16, V3.B16 // 831ca54e
+ VBIC V0.B8, V1.B8, V2.B8 // 221c600e
+ VORN V0.B16, V1.B16, V2.B16 // 221ce04e
VADD V16.S4, V5.S4, V9.S4 // a984b04e
+ VSHADD V0.S4, V1.S4, V2.S4 // 2204a04e
+ VSRHADD V0.S4, V1.S4, V2.S4 // 2214a04e
+ VUHADD V0.S4, V1.S4, V2.S4 // 2204a06e
+ VURHADD V0.S4, V1.S4, V2.S4 // 2214a06e
VEOR V0.B16, V1.B16, V0.B16 // 201c206e
VADDV V0.S4, V0 // 00b8b14e
VMOVI $82, V0.B16 // 40e6024f
@@ -337,6 +362,23 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VUMIN V3.H8, V2.H8, V1.H8 // 416c636e
VUMIN V3.S2, V2.S2, V1.S2 // 416ca32e
VUMIN V3.S4, V2.S4, V1.S4 // 416ca36e
+ VSMAX V3.S4, V2.S4, V1.S4 // 4164a34e
+ VSMIN V3.S4, V2.S4, V1.S4 // 416ca34e
+ VSMAXP V3.S4, V2.S4, V1.S4 // 41a4a34e
+ VSMINP V3.S4, V2.S4, V1.S4 // 41aca34e
+ VUMAXP V3.S4, V2.S4, V1.S4 // 41a4a36e
+ VUMINP V3.S4, V2.S4, V1.S4 // 41aca36e
+ VFMAX V3.S4, V2.S4, V1.S4 // 41f4234e
+ VFMIN V3.S4, V2.S4, V1.S4 // 41f4a34e
+ VFMAXNM V3.S4, V2.S4, V1.S4 // 41c4234e
+ VFMINNM V3.S4, V2.S4, V1.S4 // 41c4a34e
+ VFMAXP V3.S4, V2.S4, V1.S4 // 41f4236e
+ VFADDP V3.S4, V2.S4, V1.S4 // 41d4236e
+ VFADDP V3.S2, V2.S2, V1.S2 // 41d4232e
+ VFADDP V3.D2, V2.D2, V1.D2 // 41d4636e
+ VFMINP V3.S4, V2.S4, V1.S4 // 41f4a36e
+ VFMAXNMP V3.S4, V2.S4, V1.S4 // 41c4236e
+ VFMINNMP V3.S4, V2.S4, V1.S4 // 41c4a36e
FCCMPS LT, F1, F2, $1 // 41b4211e
FMADDS F1, F3, F2, F4 // 440c011f
FMADDD F4, F5, F4, F4 // 8414441f
diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
index dca6c78de4..3af0880b78 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -433,11 +433,11 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
//TODO VFACGE V11.S2, V15.S2, V9.S2 // e9ed2b2e
//TODO FACGT F20, F16, F27 // 1beef47e
//TODO VFACGT V15.S4, V25.S4, V22.S4 // 36efaf6e
- //TODO VFADD V21.D2, V10.D2, V21.D2 // 55d5754e
+ VFADD V21.D2, V10.D2, V21.D2 // 55d5754e
FADDS F12, F2, F10 // 4a282c1e
FADDD F24, F14, F12 // cc29781e
//TODO VFADDP V4.D2, F13 // 8dd8707e
- //TODO VFADDP V30.S4, V3.S4, V11.S4 // 6bd43e6e
+ VFADDP V30.S4, V3.S4, V11.S4 // 6bd43e6e
FCCMPS LE, F17, F12, $14 // 8ed5311e
FCCMPD HI, F11, F15, $15 // ef856b1e
FCCMPES HS, F28, F13, $13 // bd253c1e
@@ -547,34 +547,34 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
FCVTZUS F12, R29 // 9d01399e
FCVTZUDW F27, R22 // 7603791e
FCVTZUD F25, R22 // 3603799e
- //TODO VFDIV V6.D2, V1.D2, V27.D2 // 3bfc666e
+ VFDIV V6.D2, V1.D2, V27.D2 // 3bfc666e
FDIVS F16, F10, F20 // 5419301e
FDIVD F11, F25, F30 // 3e1b6b1e
FMADDS F15, F2, F8, F1 // 01090f1f
FMADDD F15, F21, F25, F9 // 29574f1f
- //TODO VFMAX V23.D2, V27.D2, V14.D2 // 6ef7774e
+ VFMAX V23.D2, V27.D2, V14.D2 // 6ef7774e
FMAXS F5, F28, F27 // 9b4b251e
FMAXD F12, F31, F31 // ff4b6c1e
- //TODO VFMAXNM V3.D2, V12.D2, V27.D2 // 9bc5634e
+ VFMAXNM V3.D2, V12.D2, V27.D2 // 9bc5634e
FMAXNMS F11, F24, F12 // 0c6b2b1e
FMAXNMD F20, F6, F16 // d068741e
//TODO VFMAXNMP V3.S2, F2 // 62c8307e
- //TODO VFMAXNMP V25.S2, V4.S2, V2.S2 // 82c4392e
+ VFMAXNMP V25.S2, V4.S2, V2.S2 // 82c4392e
//TODO VFMAXNMV V14.S4, F15 // cfc9306e
//TODO VFMAXP V3.S2, F27 // 7bf8307e
- //TODO VFMAXP V29.S2, V30.S2, V9.S2 // c9f73d2e
+ VFMAXP V29.S2, V30.S2, V9.S2 // c9f73d2e
//TODO VFMAXV V13.S4, F14 // aef9306e
- //TODO VFMIN V19.D2, V30.D2, V7.D2 // c7f7f34e
+ VFMIN V19.D2, V30.D2, V7.D2 // c7f7f34e
FMINS F26, F18, F30 // 5e5a3a1e
FMIND F29, F4, F21 // 95587d1e
- //TODO VFMINNM V21.S4, V5.S4, V1.S4 // a1c4b54e
+ VFMINNM V21.S4, V5.S4, V1.S4 // a1c4b54e
FMINNMS F23, F20, F1 // 817a371e
FMINNMD F8, F3, F24 // 7878681e
//TODO VFMINNMP V16.D2, F12 // 0ccaf07e
- //TODO VFMINNMP V10.S4, V25.S4, V27.S4 // 3bc7aa6e
+ VFMINNMP V10.S4, V25.S4, V27.S4 // 3bc7aa6e
//TODO VFMINNMV V8.S4, F3 // 03c9b06e
//TODO VFMINP V10.S2, F20 // 54f9b07e
- //TODO VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e
+ VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e
//TODO VFMINV V11.S4, F9 // 69f9b06e
//TODO VFMLA V6.S[0], F2, F14 // 4e10865f
//TODO VFMLA V28.S[2], V2.S2, V30.S2 // 5e189c0f
@@ -596,7 +596,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
FMSUBD F11, F7, F15, F31 // ff9d4b1f
//TODO VFMUL V9.S[2], F21, F19 // b39a895f
//TODO VFMUL V26.S[2], V26.S2, V2.S2 // 429b9a0f
- //TODO VFMUL V21.D2, V17.D2, V25.D2 // 39de756e
+ VFMUL V21.D2, V17.D2, V25.D2 // 39de756e
FMULS F0, F6, F24 // d808201e
FMULD F5, F29, F9 // a90b651e
//TODO VFMULX V26.S[2], F20, F8 // 889a9a7f
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 72a22896c1..e9a6e69a09 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -381,6 +381,20 @@ TEXT errors(SB),$0
VUMIN V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
VUMAX V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
VUMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch"
+ VSMAX V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VSMIN V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VSMAX V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
+ VSMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch"
+ VSMAXP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VSMINP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VUMAXP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VUMINP V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VFMAX V1.B8, V2.B8, V3.B8 // ERROR "invalid arrangement"
+ VFMIN V1.B16, V2.B16, V3.B16 // ERROR "invalid arrangement"
+ VFADDP V1.B8, V2.B8, V3.B8 // ERROR "invalid arrangement"
+ VFADDP V1.B16, V2.B16, V3.B16 // ERROR "invalid arrangement"
+ VFADDP V1.H4, V2.H4, V3.H4 // ERROR "invalid arrangement"
+ VFADDP V1.H8, V2.H8, V3.H8 // ERROR "invalid arrangement"
VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range"
VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range"
VSSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
@@ -438,6 +452,22 @@ TEXT errors(SB),$0
AUTIB1716 R0 // ERROR "illegal combination"
SB $1 // ERROR "illegal combination"
+ // VMUL family invalid arrangement tests
+ VMUL V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+ VMLA V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+ VMLS V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+ // VMUL family operand mismatch tests
+ VMUL V0.S4, V0.D2, V1.S4 // ERROR "operand mismatch"
+ VMLA V0.H4, V0.S4, V1.H4 // ERROR "operand mismatch"
+ VMLS V0.B8, V0.H4, V1.B8 // ERROR "operand mismatch"
+ // VBIC/VORN family invalid arrangement tests
+ VBIC V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement"
+ VBIC V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement"
+ VBIC V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+ VORN V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement"
+ VORN V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement"
+ VORN V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+
// VSHRN/VSHRN2 error test cases - invalid arrangements
VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement"
VSHRN $8, V1.S4, V0.S4 // ERROR "invalid arrangement"
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 56f68756fd..3d7173155a 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -1113,6 +1113,8 @@ const (
AUXTW
AVADD
AVADDP
+ AVSHADD
+ AVSRHADD
AVADDV
AVAND
AVBCAX
@@ -1139,6 +1141,28 @@ const (
AVFCMGT
AVFCMLE
AVFCMLT
+ AVFADDP
+ AVFADD
+ AVFSUB
+ AVFMUL
+ AVFDIV
+ AVFMAX
+ AVFMAXNM
+ AVFMAXP
+ AVFMIN
+ AVFMINNM
+ AVFMINP
+ AVFMAXNMP
+ AVFMINNMP
+ AVSQADD
+ AVUQADD
+ AVSQSUB
+ AVUQSUB
+ AVUHADD
+ AVURHADD
+ AVMUL
+ AVMLA
+ AVMLS
AVLD1
AVLD1R
AVLD2
@@ -1153,6 +1177,8 @@ const (
AVMOVQ
AVMOVS
AVORR
+ AVORN
+ AVBIC
AVPMULL
AVPMULL2
AVRAX1
@@ -1174,6 +1200,10 @@ const (
AVST3
AVST4
AVSUB
+ AVSMAX
+ AVSMIN
+ AVSMAXP
+ AVSMINP
AVTBL
AVTBX
AVTRN1
@@ -1183,6 +1213,8 @@ const (
AVUADDW2
AVUMAX
AVUMIN
+ AVUMAXP
+ AVUMINP
AVUSHLL
AVUSHLL2
AVUSHR
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index e40c043edd..1f07580ae7 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -469,6 +469,8 @@ var Anames = []string{
"UXTW",
"VADD",
"VADDP",
+ "VSHADD",
+ "VSRHADD",
"VADDV",
"VAND",
"VBCAX",
@@ -495,6 +497,28 @@ var Anames = []string{
"VFCMGT",
"VFCMLE",
"VFCMLT",
+ "VFADDP",
+ "VFADD",
+ "VFSUB",
+ "VFMUL",
+ "VFDIV",
+ "VFMAX",
+ "VFMAXNM",
+ "VFMAXP",
+ "VFMIN",
+ "VFMINNM",
+ "VFMINP",
+ "VFMAXNMP",
+ "VFMINNMP",
+ "VSQADD",
+ "VUQADD",
+ "VSQSUB",
+ "VUQSUB",
+ "VUHADD",
+ "VURHADD",
+ "VMUL",
+ "VMLA",
+ "VMLS",
"VLD1",
"VLD1R",
"VLD2",
@@ -509,6 +533,8 @@ var Anames = []string{
"VMOVQ",
"VMOVS",
"VORR",
+ "VORN",
+ "VBIC",
"VPMULL",
"VPMULL2",
"VRAX1",
@@ -530,6 +556,10 @@ var Anames = []string{
"VST3",
"VST4",
"VSUB",
+ "VSMAX",
+ "VSMIN",
+ "VSMAXP",
+ "VSMINP",
"VTBL",
"VTBX",
"VTRN1",
@@ -539,6 +569,8 @@ var Anames = []string{
"VUADDW2",
"VUMAX",
"VUMIN",
+ "VUMAXP",
+ "VUMINP",
"VUSHLL",
"VUSHLL2",
"VUSHR",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 0d8c1f417e..4fd79f3b4b 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -3215,15 +3215,34 @@ func buildop(ctxt *obj.Link) {
oprangeset(AVAND, t)
oprangeset(AVORR, t)
oprangeset(AVEOR, t)
+ oprangeset(AVBIC, t)
+ oprangeset(AVORN, t)
oprangeset(AVBSL, t)
oprangeset(AVBIT, t)
oprangeset(AVCMTST, t)
oprangeset(AVCMHI, t)
+ oprangeset(AVSQADD, t)
+ oprangeset(AVUQADD, t)
+ oprangeset(AVSQSUB, t)
+ oprangeset(AVUQSUB, t)
+ oprangeset(AVMUL, t)
+ oprangeset(AVMLA, t)
+ oprangeset(AVMLS, t)
+ oprangeset(AVSHADD, t)
+ oprangeset(AVSRHADD, t)
oprangeset(AVSSHL, t)
oprangeset(AVUSHL, t)
+ oprangeset(AVUHADD, t)
+ oprangeset(AVURHADD, t)
oprangeset(AVCMHS, t)
oprangeset(AVUMAX, t)
oprangeset(AVUMIN, t)
+ oprangeset(AVSMAX, t)
+ oprangeset(AVSMIN, t)
+ oprangeset(AVSMAXP, t)
+ oprangeset(AVSMINP, t)
+ oprangeset(AVUMAXP, t)
+ oprangeset(AVUMINP, t)
oprangeset(AVUZP1, t)
oprangeset(AVUZP2, t)
oprangeset(AVBIF, t)
@@ -3272,6 +3291,19 @@ func buildop(ctxt *obj.Link) {
case AVFMLA:
oprangeset(AVFMLS, t)
+ oprangeset(AVFADD, t)
+ oprangeset(AVFSUB, t)
+ oprangeset(AVFMUL, t)
+ oprangeset(AVFDIV, t)
+ oprangeset(AVFMAX, t)
+ oprangeset(AVFMAXNM, t)
+ oprangeset(AVFMAXP, t)
+ oprangeset(AVFADDP, t)
+ oprangeset(AVFMIN, t)
+ oprangeset(AVFMINNM, t)
+ oprangeset(AVFMINP, t)
+ oprangeset(AVFMAXNMP, t)
+ oprangeset(AVFMINNMP, t)
case AVPMULL:
oprangeset(AVPMULL2, t)
@@ -4782,27 +4814,27 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
}
switch p.As {
- case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF:
+ case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF, AVBIC, AVORN:
if af != ARNG_16B && af != ARNG_8B {
c.ctxt.Diag("invalid arrangement: %v", p)
}
- case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT:
+ case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT, AVFADD, AVFSUB, AVFMUL, AVFDIV, AVFMAX, AVFMAXNM, AVFMAXP, AVFADDP, AVFMIN, AVFMINNM, AVFMINP, AVFMAXNMP, AVFMINNMP:
if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S {
c.ctxt.Diag("invalid arrangement: %v", p)
}
- case AVUMAX, AVUMIN:
+ case AVUMAX, AVUMIN, AVUMAXP, AVUMINP, AVMUL, AVMLA, AVMLS, AVSMAX, AVSMIN, AVSMAXP, AVSMINP:
if af == ARNG_2D {
c.ctxt.Diag("invalid arrangement: %v", p)
}
}
switch p.As {
- case AVAND, AVEOR:
+ case AVAND, AVEOR, AVBIC, AVORN:
size = 0
case AVBSL:
size = 1
case AVORR, AVBIT, AVBIF:
size = 2
- case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT:
+ case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT, AVFADD, AVFSUB, AVFMUL, AVFDIV, AVFMAX, AVFMAXNM, AVFMAXP, AVFADDP, AVFMIN, AVFMINNM, AVFMINP, AVFMAXNMP, AVFMINNMP:
if af == ARNG_2D {
size = 1
} else {
@@ -6565,24 +6597,60 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVSUB:
op = ASIMDSAME(1, 0, 0x10)
+ case AVSHADD:
+ op = ASIMDSAME(0, 0, 0x0)
+
+ case AVSRHADD:
+ op = ASIMDSAME(0, 0, 0x2)
+
case AVSSHL:
op = ASIMDSAME(0, 0, 0x8)
case AVUSHL:
op = ASIMDSAME(1, 0, 0x8)
+ case AVUHADD:
+ op = ASIMDSAME(1, 0, 0x0)
+
+ case AVURHADD:
+ op = ASIMDSAME(1, 0, 0x2)
+
case AVADDP:
op = ASIMDSAME(0, 0, 0x17)
+ case AVSQADD:
+ op = ASIMDSAME(0, 0, 0x1)
+
+ case AVUQADD:
+ op = ASIMDSAME(1, 0, 0x1)
+
case AVSQSHL:
op = ASIMDSAME(0, 0, 0x9)
case AVUQSHL:
op = ASIMDSAME(1, 0, 0x9)
+ case AVSQSUB:
+ op = ASIMDSAME(0, 0, 0x5)
+
+ case AVUQSUB:
+ op = ASIMDSAME(1, 0, 0x5)
+
+ case AVMUL:
+ op = ASIMDSAME(0, 0, 0x13)
+
+ case AVMLA:
+ op = ASIMDSAME(0, 0, 0x12)
+
+ case AVMLS:
+ op = ASIMDSAME(1, 0, 0x12)
+
case AVAND:
op = ASIMDSAME(0, 0, 0x03)
+ case AVBIC:
+ op = ASIMDSAME(0, 1, 0x03)
+
case AVBCAX:
op = 0xCE<<24 | 1<<21
@@ -6628,6 +6696,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVORR:
op = ASIMDSAME(0, 2, 0x03)
+ case AVORN:
+ op = ASIMDSAME(0, 3, 0x03)
+
case AVRAX1:
op = 0xCE<<24 | 3<<21 | 1<<15 | 3<<10
@@ -6655,6 +6726,45 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVFMLS:
op = ASIMDSAME(0, 2, 0x19)
+ case AVFADD:
+ op = ASIMDSAME(0, 0, 0x1A)
+
+ case AVFSUB:
+ op = ASIMDSAME(0, 2, 0x1A)
+
+ case AVFMUL:
+ op = ASIMDSAME(1, 0, 0x1B)
+
+ case AVFDIV:
+ op = ASIMDSAME(1, 0, 0x1F)
+
+ case AVFMAX:
+ op = ASIMDSAME(0, 0, 0x1E)
+
+ case AVFMAXNM:
+ op = ASIMDSAME(0, 0, 0x18)
+
+ case AVFMAXP:
+ op = ASIMDSAME(1, 0, 0x1E)
+
+ case AVFADDP:
+ op = ASIMDSAME(1, 0, 0x1A)
+
+ case AVFMIN:
+ op = ASIMDSAME(0, 2, 0x1E)
+
+ case AVFMINNM:
+ op = ASIMDSAME(0, 2, 0x18)
+
+ case AVFMINP:
+ op = ASIMDSAME(1, 2, 0x1E)
+
+ case AVFMAXNMP:
+ op = ASIMDSAME(1, 0, 0x18)
+
+ case AVFMINNMP:
+ op = ASIMDSAME(1, 2, 0x18)
+
case AVPMULL, AVPMULL2:
op = ASIMDDIFF(0, 0xE)
@@ -6688,6 +6798,24 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVUMIN:
op = ASIMDSAME(1, 0, 0x0D)
+ case AVUMAXP:
+ op = ASIMDSAME(1, 0, 0x14)
+
+ case AVUMINP:
+ op = ASIMDSAME(1, 0, 0x15)
+
+ case AVSMAX:
+ op = ASIMDSAME(0, 0, 0x0C)
+
+ case AVSMIN:
+ op = ASIMDSAME(0, 0, 0x0D)
+
+ case AVSMAXP:
+ op = ASIMDSAME(0, 0, 0x14)
+
+ case AVSMINP:
+ op = ASIMDSAME(0, 0, 0x15)
+
case AVUZP1:
op = ASIMDPERM(0x1)