aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-04-02 14:59:35 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 08:20:20 -0700
commitf2e70034cc852daec07bebd3f36aaf90aac28173 (patch)
tree100e3b304f57325b993c2abbd4917f8f58dbc114
parent1ea7966042731bae941511fb2b261b9536ad268f (diff)
downloadgo-f2e70034cc852daec07bebd3f36aaf90aac28173.tar.xz
cmd/internal/obj/arm64: add ASIMD cross-lane reduction instructions
Add support for ASIMD instructions that reduce a vector to a scalar by operating across all lanes. These use the ASIMDALL encoding class from the ARM architecture specification. Integer cross-lane reductions (.B8, .B16, .H4, .H8, .S4): Signed max/min across lanes: VSMAXV, VSMINV Unsigned max/min across lanes: VUMAXV, VUMINV Floating-point cross-lane reductions (.S4 arrangement): FP max/min across lanes: VFMAXV, VFMINV FP max/min across lanes (NM): VFMAXNMV, VFMINNMV Change-Id: I6af4462d26803dfc7c78db2ad9df4284083e31e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/762202 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s26
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64error.s19
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go8
-rw-r--r--src/cmd/internal/obj/arm64/anames.go8
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go40
5 files changed, 101 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 185f31c715..3adf402291 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -2100,4 +2100,30 @@ next:
AUTIA1716 // 9f2103d5
AUTIB1716 // df2103d5
+// Vector Reduction Instructions
+ VSMAXV V0.B8, V0 // 00a8300e
+ VSMAXV V0.B16, V0 // 00a8304e
+ VSMAXV V0.H4, V0 // 00a8700e
+ VSMAXV V0.H8, V0 // 00a8704e
+ VSMAXV V0.S4, V0 // 00a8b04e
+ VSMINV V0.B8, V0 // 00a8310e
+ VSMINV V0.B16, V0 // 00a8314e
+ VSMINV V0.H4, V0 // 00a8710e
+ VSMINV V0.H8, V0 // 00a8714e
+ VSMINV V0.S4, V0 // 00a8b14e
+ VUMAXV V0.B8, V0 // 00a8302e
+ VUMAXV V0.B16, V0 // 00a8306e
+ VUMAXV V0.H4, V0 // 00a8702e
+ VUMAXV V0.H8, V0 // 00a8706e
+ VUMAXV V0.S4, V0 // 00a8b06e
+ VUMINV V0.B8, V0 // 00a8312e
+ VUMINV V0.B16, V0 // 00a8316e
+ VUMINV V0.H4, V0 // 00a8712e
+ VUMINV V0.H8, V0 // 00a8716e
+ VUMINV V0.S4, V0 // 00a8b16e
+ VFMAXV V0.S4, V0 // 00f8306e
+ VFMINV V0.S4, V0 // 00f8b06e
+ VFMAXNMV V0.S4, V0 // 00c8306e
+ VFMINNMV V0.S4, V0 // 00c8b06e
+
END
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index e9a6e69a09..dac106fee1 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -467,6 +467,25 @@ TEXT errors(SB),$0
VORN V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement"
VORN V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement"
VORN V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
+ VSMAXV V0.B8, V0.S4 // ERROR "illegal combination"
+ VUMAXV V0.S4, V0.H8 // ERROR "illegal combination"
+ VFMAXV V0.S4, V0.D2 // ERROR "illegal combination"
+ VFMAXV V0.D2, V0 // ERROR "invalid arrangement"
+ VFMINV V0.D2, V0 // ERROR "invalid arrangement"
+ VFMAXNMV V0.D2, V0 // ERROR "invalid arrangement"
+ VFMINNMV V0.D2, V0 // ERROR "invalid arrangement"
+ VFMAXV V0.S2, V0 // ERROR "invalid arrangement"
+ VFMINV V0.S2, V0 // ERROR "invalid arrangement"
+ VFMAXNMV V0.S2, V0 // ERROR "invalid arrangement"
+ VFMINNMV V0.S2, V0 // ERROR "invalid arrangement"
+ VFMAXV V0.H4, V0 // ERROR "invalid arrangement"
+ VFMAXV V0.H8, V0 // ERROR "invalid arrangement"
+ VFMINV V0.H4, V0 // ERROR "invalid arrangement"
+ VFMINV V0.H8, V0 // ERROR "invalid arrangement"
+ VFMAXNMV V0.H4, V0 // ERROR "invalid arrangement"
+ VFMAXNMV V0.H8, V0 // ERROR "invalid arrangement"
+ VFMINNMV V0.H4, V0 // ERROR "invalid arrangement"
+ VFMINNMV V0.H8, V0 // ERROR "invalid arrangement"
// VSHRN/VSHRN2 error test cases - invalid arrangements
VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement"
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 6ef3ac5105..f6ef1681f2 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -1116,6 +1116,14 @@ const (
AVSHADD
AVSRHADD
AVADDV
+ AVFMAXV
+ AVFMAXNMV
+ AVFMINV
+ AVFMINNMV
+ AVSMAXV
+ AVSMINV
+ AVUMAXV
+ AVUMINV
AVAND
AVBCAX
AVBIF
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index a4de13ed1c..9592e9246c 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -472,6 +472,14 @@ var Anames = []string{
"VSHADD",
"VSRHADD",
"VADDV",
+ "VFMAXV",
+ "VFMAXNMV",
+ "VFMINV",
+ "VFMINNMV",
+ "VSMAXV",
+ "VSMINV",
+ "VUMAXV",
+ "VUMINV",
"VAND",
"VBCAX",
"VBIF",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 00af21857c..aa1c76368a 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -3288,6 +3288,14 @@ func buildop(ctxt *obj.Link) {
case AVADDV:
oprangeset(AVUADDLV, t)
+ oprangeset(AVFMAXV, t)
+ oprangeset(AVFMAXNMV, t)
+ oprangeset(AVFMINV, t)
+ oprangeset(AVFMINNMV, t)
+ oprangeset(AVSMAXV, t)
+ oprangeset(AVSMINV, t)
+ oprangeset(AVUMAXV, t)
+ oprangeset(AVUMINV, t)
case AVFMLA:
oprangeset(AVFMLS, t)
@@ -5312,6 +5320,14 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
default:
c.ctxt.Diag("invalid arrangement: %v\n", p)
}
+ switch p.As {
+ // Floating-point reduction instructions only support .S4 arrangement and don't have a size field.
+ case AVFMAXV, AVFMINV, AVFMAXNMV, AVFMINNMV:
+ if af != ARNG_4S {
+ c.ctxt.Diag("invalid arrangement: %v\n", p)
+ }
+ size = 0
+ }
o1 = c.oprrr(p, p.As, p.To.Reg, p.From.Reg, obj.REG_NONE)
o1 |= uint32(Q&1)<<30 | uint32(size&3)<<22
@@ -6800,6 +6816,30 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVADDV:
op = ASIMDALL(0, 0, 0x1B)
+ case AVFMAXV:
+ op = ASIMDALL(1, 0, 0xF)
+
+ case AVFMAXNMV:
+ op = ASIMDALL(1, 0, 0xC)
+
+ case AVFMINV:
+ op = ASIMDALL(1, 2, 0xF)
+
+ case AVFMINNMV:
+ op = ASIMDALL(1, 2, 0xC)
+
+ case AVSMAXV:
+ op = ASIMDALL(0, 0, 0xA)
+
+ case AVSMINV:
+ op = ASIMDALL(0, 0, 0x1A)
+
+ case AVUMAXV:
+ op = ASIMDALL(1, 0, 0xA)
+
+ case AVUMINV:
+ op = ASIMDALL(1, 0, 0x1A)
+
case AVUADDLV:
op = ASIMDALL(1, 0, 0x03)