diff options
| author | Alexander Musman <alexander.musman@gmail.com> | 2026-04-02 14:59:35 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-04-08 08:20:20 -0700 |
| commit | f2e70034cc852daec07bebd3f36aaf90aac28173 (patch) | |
| tree | 100e3b304f57325b993c2abbd4917f8f58dbc114 /src/cmd/asm | |
| parent | 1ea7966042731bae941511fb2b261b9536ad268f (diff) | |
| download | go-f2e70034cc852daec07bebd3f36aaf90aac28173.tar.xz | |
cmd/internal/obj/arm64: add ASIMD cross-lane reduction instructions
Add support for ASIMD instructions that reduce a vector to
a scalar by operating across all lanes. These use the ASIMDALL
encoding class from the ARM architecture specification.
Integer cross-lane reductions (.B8, .B16, .H4, .H8, .S4):
Signed max/min across lanes: VSMAXV, VSMINV
Unsigned max/min across lanes: VUMAXV, VUMINV
Floating-point cross-lane reductions (.S4 arrangement):
FP max/min across lanes: VFMAXV, VFMINV
FP max/min across lanes (NM): VFMAXNMV, VFMINNMV
Change-Id: I6af4462d26803dfc7c78db2ad9df4284083e31e8
Reviewed-on: https://go-review.googlesource.com/c/go/+/762202
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/asm')
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 26 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64error.s | 19 |
2 files changed, 45 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 185f31c715..3adf402291 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -2100,4 +2100,30 @@ next: AUTIA1716 // 9f2103d5 AUTIB1716 // df2103d5 +// Vector Reduction Instructions + VSMAXV V0.B8, V0 // 00a8300e + VSMAXV V0.B16, V0 // 00a8304e + VSMAXV V0.H4, V0 // 00a8700e + VSMAXV V0.H8, V0 // 00a8704e + VSMAXV V0.S4, V0 // 00a8b04e + VSMINV V0.B8, V0 // 00a8310e + VSMINV V0.B16, V0 // 00a8314e + VSMINV V0.H4, V0 // 00a8710e + VSMINV V0.H8, V0 // 00a8714e + VSMINV V0.S4, V0 // 00a8b14e + VUMAXV V0.B8, V0 // 00a8302e + VUMAXV V0.B16, V0 // 00a8306e + VUMAXV V0.H4, V0 // 00a8702e + VUMAXV V0.H8, V0 // 00a8706e + VUMAXV V0.S4, V0 // 00a8b06e + VUMINV V0.B8, V0 // 00a8312e + VUMINV V0.B16, V0 // 00a8316e + VUMINV V0.H4, V0 // 00a8712e + VUMINV V0.H8, V0 // 00a8716e + VUMINV V0.S4, V0 // 00a8b16e + VFMAXV V0.S4, V0 // 00f8306e + VFMINV V0.S4, V0 // 00f8b06e + VFMAXNMV V0.S4, V0 // 00c8306e + VFMINNMV V0.S4, V0 // 00c8b06e + END diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index e9a6e69a09..dac106fee1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -467,6 +467,25 @@ TEXT errors(SB),$0 VORN V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement" VORN V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement" VORN V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + VSMAXV V0.B8, V0.S4 // ERROR "illegal combination" + VUMAXV V0.S4, V0.H8 // ERROR "illegal combination" + VFMAXV V0.S4, V0.D2 // ERROR "illegal combination" + VFMAXV V0.D2, V0 // ERROR "invalid arrangement" + VFMINV V0.D2, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.D2, V0 // ERROR "invalid arrangement" + VFMINNMV V0.D2, V0 // ERROR "invalid arrangement" + VFMAXV V0.S2, V0 // ERROR "invalid arrangement" + VFMINV V0.S2, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.S2, V0 // ERROR "invalid arrangement" + VFMINNMV V0.S2, V0 // ERROR "invalid arrangement" + VFMAXV V0.H4, V0 // ERROR "invalid arrangement" + VFMAXV V0.H8, V0 // ERROR "invalid arrangement" + VFMINV V0.H4, V0 // ERROR "invalid arrangement" + VFMINV V0.H8, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.H4, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.H8, V0 // ERROR "invalid arrangement" + VFMINNMV V0.H4, V0 // ERROR "invalid arrangement" + VFMINNMV V0.H8, V0 // ERROR "invalid arrangement" // VSHRN/VSHRN2 error test cases - invalid arrangements VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement" |
