aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-04-02 13:34:42 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 03:53:59 -0700
commitc1352b7df17574e0f7d3ada9514b36ddac993abb (patch)
treee017d2416b50d37fa8a9c84c4e562bef8e2c6b44
parentb1c8857f95581ef3cb3daa0767985bba9f72320f (diff)
downloadgo-c1352b7df17574e0f7d3ada9514b36ddac993abb.tar.xz
cmd/internal/obj/arm64: add ASIMD miscellaneous unary instructions
Add support for ASIMD unary miscellaneous instructions that operate on a single source register. These use the ASIMDMISC encoding class from the ARM architecture specification. These instruction need some validation for arrangement constraints: - VNOT only allows .B8/.B16 arrangements - VCLS/VCLZ do not support D arrangements - Floating-point variants (VFABS, VFNEG, VFSQRT, VFRINT*) only allow floating-point arrangements (S and D) New instructions by group: Integer absolute/negate: VABS, VNEG Floating-point abs/negate: VFABS, VFNEG Floating-point sqrt: VFSQRT Floating-point round: VFRINTN, VFRINTP, VFRINTM, VFRINTZ Saturating abs/negate: VSQABS, VSQNEG Bit/count operations: VCLS, VCLZ, VNOT Change-Id: I62242eda31f82cd34119c7d4f97316a030e7663b Reviewed-on: https://go-review.googlesource.com/c/go/+/762201 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Auto-Submit: Keith Randall <khr@golang.org>
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s62
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64enc.s14
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go14
-rw-r--r--src/cmd/internal/obj/arm64/anames.go14
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go83
5 files changed, 180 insertions, 7 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index ad46e479e4..185f31c715 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -199,6 +199,21 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VSRSHR $16, V0.S2, V1.S2 // 0124300f
VSRSHR $16, V0.S4, V1.S4 // 0124304f
VSRSHR $32, V0.D2, V1.D2 // 0124604f
+ VFSQRT V0.S2, V1.S2 // 01f8a12e
+ VFSQRT V0.S4, V1.S4 // 01f8a16e
+ VFSQRT V0.D2, V1.D2 // 01f8e16e
+ VFRINTN V0.S2, V1.S2 // 0188210e
+ VFRINTN V0.S4, V1.S4 // 0188214e
+ VFRINTN V0.D2, V1.D2 // 0188614e
+ VFRINTP V0.S2, V1.S2 // 0188a10e
+ VFRINTP V0.S4, V1.S4 // 0188a14e
+ VFRINTP V0.D2, V1.D2 // 0188e14e
+ VFRINTM V0.S2, V1.S2 // 0198210e
+ VFRINTM V0.S4, V1.S4 // 0198214e
+ VFRINTM V0.D2, V1.D2 // 0198614e
+ VFRINTZ V0.S2, V1.S2 // 0198a10e
+ VFRINTZ V0.S4, V1.S4 // 0198a14e
+ VFRINTZ V0.D2, V1.D2 // 0198e14e
VSHL $56, V1.D2, V2.D2 // 2254784f
VSHL $24, V1.S4, V2.S4 // 2254384f
VSHL $24, V1.S2, V2.S2 // 2254380f
@@ -439,6 +454,53 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VREV16 V7.B16, V5.B16 // e518204e
VREV16 V7.B8, V5.B8 // e518200e
+ VABS V0.B8, V1.B8 // 01b8200e
+ VABS V0.B16, V1.B16 // 01b8204e
+ VABS V0.H4, V1.H4 // 01b8600e
+ VABS V0.H8, V1.H8 // 01b8604e
+ VABS V0.S2, V1.S2 // 01b8a00e
+ VABS V0.S4, V1.S4 // 01b8a04e
+ VABS V0.D2, V1.D2 // 01b8e04e
+ VCLS V0.B8, V1.B8 // 0148200e
+ VCLS V0.B16, V1.B16 // 0148204e
+ VCLS V0.H4, V1.H4 // 0148600e
+ VCLS V0.H8, V1.H8 // 0148604e
+ VCLS V0.S2, V1.S2 // 0148a00e
+ VCLS V0.S4, V1.S4 // 0148a04e
+ VCLZ V0.B8, V1.B8 // 0148202e
+ VCLZ V0.B16, V1.B16 // 0148206e
+ VCLZ V0.H4, V1.H4 // 0148602e
+ VCLZ V0.H8, V1.H8 // 0148606e
+ VCLZ V0.S2, V1.S2 // 0148a02e
+ VCLZ V0.S4, V1.S4 // 0148a06e
+ VNEG V0.B8, V1.B8 // 01b8202e
+ VNEG V0.B16, V1.B16 // 01b8206e
+ VNEG V0.H4, V1.H4 // 01b8602e
+ VNEG V0.H8, V1.H8 // 01b8606e
+ VNEG V0.S2, V1.S2 // 01b8a02e
+ VNEG V0.S4, V1.S4 // 01b8a06e
+ VNEG V0.D2, V1.D2 // 01b8e06e
+ VFABS V0.S2, V1.S2 // 01f8a00e
+ VFABS V0.S4, V1.S4 // 01f8a04e
+ VFABS V0.D2, V1.D2 // 01f8e04e
+ VFNEG V0.S2, V1.S2 // 01f8a02e
+ VFNEG V0.S4, V1.S4 // 01f8a06e
+ VFNEG V0.D2, V1.D2 // 01f8e06e
+ VSQABS V0.B8, V1.B8 // 0178200e
+ VSQABS V0.B16, V1.B16 // 0178204e
+ VSQABS V0.H4, V1.H4 // 0178600e
+ VSQABS V0.H8, V1.H8 // 0178604e
+ VSQABS V0.S2, V1.S2 // 0178a00e
+ VSQABS V0.S4, V1.S4 // 0178a04e
+ VSQNEG V0.B8, V1.B8 // 0178202e
+ VSQNEG V0.B16, V1.B16 // 0178206e
+ VSQNEG V0.H4, V1.H4 // 0178602e
+ VSQNEG V0.H8, V1.H8 // 0178606e
+ VSQNEG V0.S2, V1.S2 // 0178a02e
+ VSQNEG V0.S4, V1.S4 // 0178a06e
+ VNOT V0.B8, V1.B8 // 0158202e
+ VNOT V0.B16, V1.B16 // 0158206e
+
// logical ops
//
// make sure constants get encoded into an instruction when it could
diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
index 3af0880b78..76151a3570 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -426,7 +426,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
YIELD // 3f2003d5
//TODO FABD F0, F5, F11 // abd4a07e
//TODO VFABD V30.S2, V8.S2, V24.S2 // 18d5be2e
- //TODO VFABS V5.S4, V24.S4 // b8f8a04e
+ VFABS V5.S4, V24.S4 // b8f8a04e
FABSS F2, F28 // 5cc0201e
FABSD F0, F14 // 0ec0601e
//TODO FACGE F25, F16, F0 // 00ee797e
@@ -603,7 +603,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
//TODO VFMULX V12.D[1], V21.D2, V31.D2 // bf9acc6f
//TODO FMULX F16, F1, F31 // 3fdc705e
//TODO VFMULX V29.S2, V13.S2, V31.S2 // bfdd3d0e
- //TODO VFNEG V18.S2, V12.S2 // 4cfaa02e
+ VFNEG V18.S2, V12.S2 // 4cfaa02e
FNEGS F16, F5 // 0542211e
FNEGD F31, F31 // ff43611e
FNMADDS F17, F22, F6, F20 // d458311f
@@ -623,26 +623,26 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
//TODO VFRINTI V21.D2, V31.D2 // bf9ae16e
FRINTIS F17, F17 // 31c2271e
FRINTID F9, F15 // 2fc1671e
- //TODO VFRINTM V9.D2, V27.D2 // 3b99614e
+ VFRINTM V9.D2, V27.D2 // 3b99614e
FRINTMS F24, F16 // 1043251e
FRINTMD F5, F2 // a240651e
- //TODO VFRINTN V30.S4, V2.S4 // c28b214e
+ VFRINTN V30.S4, V2.S4 // c28b214e
FRINTNS F26, F14 // 4e43241e
FRINTND F28, F12 // 8c43641e
- //TODO VFRINTP V27.D2, V31.D2 // 7f8be14e
+ VFRINTP V27.D2, V31.D2 // 7f8be14e
FRINTPS F27, F4 // 64c3241e
FRINTPD F6, F22 // d6c0641e
//TODO VFRINTX V25.D2, V0.D2 // 209b616e
FRINTXS F26, F10 // 4a43271e
FRINTXD F16, F12 // 0c42671e
- //TODO VFRINTZ V25.S4, V27.S4 // 3b9ba14e
+ VFRINTZ V25.S4, V27.S4 // 3b9ba14e
FRINTZS F3, F28 // 7cc0251e
FRINTZD F24, F6 // 06c3651e
//TODO FRSQRTE F29, F5 // a5dbe17e
//TODO VFRSQRTE V18.S2, V1.S2 // 41daa12e
//TODO FRSQRTS F17, F7, F24 // f8fcf15e
//TODO VFRSQRTS V14.S2, V10.S2, V24.S2 // 58fdae0e
- //TODO VFSQRT V2.D2, V21.D2 // 55f8e16e
+ VFSQRT V2.D2, V21.D2 // 55f8e16e
FSQRTS F0, F9 // 09c0211e
FSQRTD F14, F27 // dbc1611e
FSUBS F25, F23, F0 // e03a391e
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 3d7173155a..6ef3ac5105 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -1122,6 +1122,8 @@ const (
AVBIT
AVBSL
AVCMEQ
+ AVCLS
+ AVCLZ
AVCMGE
AVCMGT
AVCMHI
@@ -1157,6 +1159,8 @@ const (
AVSQADD
AVUQADD
AVSQSUB
+ AVSQABS
+ AVSQNEG
AVUQSUB
AVUHADD
AVURHADD
@@ -1186,6 +1190,16 @@ const (
AVREV16
AVREV32
AVREV64
+ AVABS
+ AVFABS
+ AVFNEG
+ AVFSQRT
+ AVFRINTN
+ AVFRINTP
+ AVFRINTM
+ AVFRINTZ
+ AVNEG
+ AVNOT
AVSHL
AVSHRN
AVSHRN2
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 1f07580ae7..a4de13ed1c 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -478,6 +478,8 @@ var Anames = []string{
"VBIT",
"VBSL",
"VCMEQ",
+ "VCLS",
+ "VCLZ",
"VCMGE",
"VCMGT",
"VCMHI",
@@ -513,6 +515,8 @@ var Anames = []string{
"VSQADD",
"VUQADD",
"VSQSUB",
+ "VSQABS",
+ "VSQNEG",
"VUQSUB",
"VUHADD",
"VURHADD",
@@ -542,6 +546,16 @@ var Anames = []string{
"VREV16",
"VREV32",
"VREV64",
+ "VABS",
+ "VFABS",
+ "VFNEG",
+ "VFSQRT",
+ "VFRINTN",
+ "VFRINTP",
+ "VFRINTM",
+ "VFRINTZ",
+ "VNEG",
+ "VNOT",
"VSHL",
"VSHRN",
"VSHRN2",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 4fd79f3b4b..00af21857c 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -3323,9 +3323,23 @@ func buildop(ctxt *obj.Link) {
case AVREV32:
oprangeset(AVCNT, t)
+ oprangeset(AVCLS, t)
+ oprangeset(AVCLZ, t)
oprangeset(AVRBIT, t)
oprangeset(AVREV64, t)
oprangeset(AVREV16, t)
+ oprangeset(AVABS, t)
+ oprangeset(AVNEG, t)
+ oprangeset(AVFABS, t)
+ oprangeset(AVFNEG, t)
+ oprangeset(AVFSQRT, t)
+ oprangeset(AVFRINTN, t)
+ oprangeset(AVFRINTP, t)
+ oprangeset(AVFRINTM, t)
+ oprangeset(AVFRINTZ, t)
+ oprangeset(AVSQABS, t)
+ oprangeset(AVSQNEG, t)
+ oprangeset(AVNOT, t)
case AVZIP1:
oprangeset(AVZIP2, t)
@@ -5198,6 +5212,9 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
case ARNG_4S:
Q = 1
size = 2
+ case ARNG_2D:
+ Q = 1
+ size = 3
default:
c.ctxt.Diag("invalid arrangement: %v\n", p)
}
@@ -5214,6 +5231,30 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
c.ctxt.Diag("invalid arrangement: %v", p)
}
+ if p.As == AVNOT && (af != ARNG_8B && af != ARNG_16B) {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+
+ // VCLS and VCLZ only support integer arrangements (B, H, S), not D arrangements
+ if (p.As == AVCLS || p.As == AVCLZ) && (af == ARNG_1D || af == ARNG_2D) {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+
+ // Floating-point instructions only allow floating-point arrangements
+ // and use 1-bit size field: 0 for S arrangements, 1 for D arrangements
+ if p.As == AVFABS || p.As == AVFNEG || p.As == AVFSQRT ||
+ p.As == AVFRINTN || p.As == AVFRINTP || p.As == AVFRINTM || p.As == AVFRINTZ {
+ if af != ARNG_2S && af != ARNG_4S && af != ARNG_2D {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+ // Override size for floating-point instructions: 0 for S, 1 for D
+ if af == ARNG_2S || af == ARNG_4S {
+ size = 0
+ } else if af == ARNG_2D {
+ size = 1
+ }
+ }
+
if p.As == AVRBIT {
size = 1
}
@@ -6681,6 +6722,12 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVCNT:
op = ASIMDMISC(0, 0, 0x05)
+ case AVCLS:
+ op = ASIMDMISC(0, 0, 0x04)
+
+ case AVCLZ:
+ op = ASIMDMISC(1, 0, 0x04)
+
case AVZIP1:
op = ASIMDPERM(0x3)
@@ -6711,6 +6758,42 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVREV64:
op = ASIMDMISC(0, 0, 0x00)
+ case AVABS:
+ op = ASIMDMISC(0, 0, 0xB)
+
+ case AVNEG:
+ op = ASIMDMISC(1, 0, 0xB)
+
+ case AVFABS:
+ op = ASIMDMISC(0, 2, 0xF)
+
+ case AVFNEG:
+ op = ASIMDMISC(1, 2, 0xF)
+
+ case AVFSQRT:
+ op = ASIMDMISC(1, 2, 0x1F)
+
+ case AVFRINTN:
+ op = ASIMDMISC(0, 0, 0x18)
+
+ case AVFRINTP:
+ op = ASIMDMISC(0, 2, 0x18)
+
+ case AVFRINTM:
+ op = ASIMDMISC(0, 0, 0x19)
+
+ case AVFRINTZ:
+ op = ASIMDMISC(0, 2, 0x19)
+
+ case AVSQABS:
+ op = ASIMDMISC(0, 0, 0x7)
+
+ case AVSQNEG:
+ op = ASIMDMISC(1, 0, 0x7)
+
+ case AVNOT:
+ op = ASIMDMISC(1, 0, 0x5)
+
case AVMOV:
op = 7<<25 | 5<<21 | 7<<10