From f2e70034cc852daec07bebd3f36aaf90aac28173 Mon Sep 17 00:00:00 2001 From: Alexander Musman Date: Thu, 2 Apr 2026 14:59:35 +0300 Subject: cmd/internal/obj/arm64: add ASIMD cross-lane reduction instructions Add support for ASIMD instructions that reduce a vector to a scalar by operating across all lanes. These use the ASIMDALL encoding class from the ARM architecture specification. Integer cross-lane reductions (.B8, .B16, .H4, .H8, .S4): Signed max/min across lanes: VSMAXV, VSMINV Unsigned max/min across lanes: VUMAXV, VUMINV Floating-point cross-lane reductions (.S4 arrangement): FP max/min across lanes: VFMAXV, VFMINV FP max/min across lanes (NM): VFMAXNMV, VFMINNMV Change-Id: I6af4462d26803dfc7c78db2ad9df4284083e31e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/762202 Reviewed-by: David Chase Reviewed-by: Keith Randall Reviewed-by: Keith Randall Auto-Submit: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/asm/internal/asm/testdata/arm64.s | 26 ++++++++++++++++++++++++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 19 +++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'src/cmd/asm/internal') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 185f31c715..3adf402291 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -2100,4 +2100,30 @@ next: AUTIA1716 // 9f2103d5 AUTIB1716 // df2103d5 +// Vector Reduction Instructions + VSMAXV V0.B8, V0 // 00a8300e + VSMAXV V0.B16, V0 // 00a8304e + VSMAXV V0.H4, V0 // 00a8700e + VSMAXV V0.H8, V0 // 00a8704e + VSMAXV V0.S4, V0 // 00a8b04e + VSMINV V0.B8, V0 // 00a8310e + VSMINV V0.B16, V0 // 00a8314e + VSMINV V0.H4, V0 // 00a8710e + VSMINV V0.H8, V0 // 00a8714e + VSMINV V0.S4, V0 // 00a8b14e + VUMAXV V0.B8, V0 // 00a8302e + VUMAXV V0.B16, V0 // 00a8306e + VUMAXV V0.H4, V0 // 00a8702e + VUMAXV V0.H8, V0 // 00a8706e + VUMAXV V0.S4, V0 // 00a8b06e + VUMINV V0.B8, V0 // 00a8312e + VUMINV V0.B16, V0 // 00a8316e + VUMINV V0.H4, V0 // 00a8712e + VUMINV V0.H8, V0 // 00a8716e + VUMINV V0.S4, V0 // 00a8b16e + VFMAXV V0.S4, V0 // 00f8306e + VFMINV V0.S4, V0 // 00f8b06e + VFMAXNMV V0.S4, V0 // 00c8306e + VFMINNMV V0.S4, V0 // 00c8b06e + END diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index e9a6e69a09..dac106fee1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -467,6 +467,25 @@ TEXT errors(SB),$0 VORN V0.S4, V0.S4, V1.S4 // ERROR "invalid arrangement" VORN V0.H4, V0.H4, V1.H4 // ERROR "invalid arrangement" VORN V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement" + VSMAXV V0.B8, V0.S4 // ERROR "illegal combination" + VUMAXV V0.S4, V0.H8 // ERROR "illegal combination" + VFMAXV V0.S4, V0.D2 // ERROR "illegal combination" + VFMAXV V0.D2, V0 // ERROR "invalid arrangement" + VFMINV V0.D2, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.D2, V0 // ERROR "invalid arrangement" + VFMINNMV V0.D2, V0 // ERROR "invalid arrangement" + VFMAXV V0.S2, V0 // ERROR "invalid arrangement" + VFMINV V0.S2, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.S2, V0 // ERROR "invalid arrangement" + VFMINNMV V0.S2, V0 // ERROR "invalid arrangement" + VFMAXV V0.H4, V0 // ERROR "invalid arrangement" + VFMAXV V0.H8, V0 // ERROR "invalid arrangement" + VFMINV V0.H4, V0 // ERROR "invalid arrangement" + VFMINV V0.H8, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.H4, V0 // ERROR "invalid arrangement" + VFMAXNMV V0.H8, V0 // ERROR "invalid arrangement" + VFMINNMV V0.H4, V0 // ERROR "invalid arrangement" + VFMINNMV V0.H8, V0 // ERROR "invalid arrangement" // VSHRN/VSHRN2 error test cases - invalid arrangements VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement" -- cgit v1.3-6-g1900