aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/asm/internal
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-04-01 16:23:15 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 03:51:48 -0700
commit9111d85e2f699672d67dcee1d6432a940f5306e1 (patch)
tree1b9538168dbf9d5f28d4f50a2f4905bd5c0c847e /src/cmd/asm/internal
parent4dffc57944c829d2fb2cf1b25168c27e555a8e5c (diff)
downloadgo-9111d85e2f699672d67dcee1d6432a940f5306e1.tar.xz
cmd/internal/obj/arm64: add ASIMD shift instructions
Add support for ASIMD shift instructions. These use the ASIMDSHF encoding class from the ARM architecture specification, where the shift amount is encoded as an immediate derived from the element size. Also add ASIMD shifts-by-vector (3-register form) where the shift amount comes from a second vector register. These use the ASIMDSAME encoding class. New instructions by group: Shift by immediate (signed): VSSHR, VSRSHR Shift by immediate (saturating): VSQSHL, VUQSHL Narrowing shift by immediate: VSHRN, VSHRN2 Shift by vector (3-reg): VSSHL, VUSHL, VSQSHL, VUQSHL Change-Id: I039cc16bc01980b04e6940cc1d4670faf5fa7e3c Reviewed-on: https://go-review.googlesource.com/c/go/+/762180 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/asm/internal')
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s62
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64error.s16
2 files changed, 78 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 2f85308bd3..bb0c9e2c05 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -154,6 +154,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VUSHR $8, V1.H8, V2.H8 // 2204186f
VUSHR $2, V1.B8, V2.B8 // 22040e2f
VUSHR $2, V1.B16, V2.B16 // 22040e6f
+ VSHRN $7, V1.H8, V0.B8 // 2084090f
+ VSHRN $15, V1.S4, V0.H4 // 2084110f
+ VSHRN $31, V1.D2, V0.S2 // 2084210f
+ VSHRN2 $7, V1.H8, V0.B16 // 2084094f
+ VSHRN2 $15, V1.S4, V0.H8 // 2084114f
+ VSHRN2 $31, V1.D2, V0.S4 // 2084214f
+ VSSHR $2, V0.B8, V1.B8 // 01040e0f
+ VSSHR $2, V0.B16, V1.B16 // 01040e4f
+ VSSHR $8, V0.H4, V1.H4 // 0104180f
+ VSSHR $8, V0.H8, V1.H8 // 0104184f
+ VSSHR $16, V0.S2, V1.S2 // 0104300f
+ VSSHR $16, V0.S4, V1.S4 // 0104304f
+ VSSHR $32, V0.D2, V1.D2 // 0104604f
+ VSRSHR $2, V0.B8, V1.B8 // 01240e0f
+ VSRSHR $2, V0.B16, V1.B16 // 01240e4f
+ VSRSHR $8, V0.H4, V1.H4 // 0124180f
+ VSRSHR $8, V0.H8, V1.H8 // 0124184f
+ VSRSHR $16, V0.S2, V1.S2 // 0124300f
+ VSRSHR $16, V0.S4, V1.S4 // 0124304f
+ VSRSHR $32, V0.D2, V1.D2 // 0124604f
VSHL $56, V1.D2, V2.D2 // 2254784f
VSHL $24, V1.S4, V2.S4 // 2254384f
VSHL $24, V1.S2, V2.S2 // 2254380f
@@ -161,6 +181,48 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VSHL $8, V1.H8, V2.H8 // 2254184f
VSHL $2, V1.B8, V2.B8 // 22540a0f
VSHL $2, V1.B16, V2.B16 // 22540a4f
+ VSQSHL $56, V1.D2, V2.D2 // 2274784f
+ VSQSHL $24, V1.S4, V2.S4 // 2274384f
+ VSQSHL $24, V1.S2, V2.S2 // 2274380f
+ VSQSHL $8, V1.H4, V2.H4 // 2274180f
+ VSQSHL $8, V1.H8, V2.H8 // 2274184f
+ VSQSHL $2, V1.B8, V2.B8 // 22740a0f
+ VSQSHL $2, V1.B16, V2.B16 // 22740a4f
+ VUQSHL $56, V1.D2, V2.D2 // 2274786f
+ VUQSHL $24, V1.S4, V2.S4 // 2274386f
+ VUQSHL $24, V1.S2, V2.S2 // 2274382f
+ VUQSHL $8, V1.H4, V2.H4 // 2274182f
+ VUQSHL $8, V1.H8, V2.H8 // 2274186f
+ VUQSHL $2, V1.B8, V2.B8 // 22740a2f
+ VUQSHL $2, V1.B16, V2.B16 // 22740a6f
+ VSSHL V1.D2, V2.D2, V3.D2 // 4344e14e
+ VSSHL V1.S4, V2.S4, V3.S4 // 4344a14e
+ VSSHL V1.S2, V2.S2, V3.S2 // 4344a10e
+ VSSHL V1.H4, V2.H4, V3.H4 // 4344610e
+ VSSHL V1.H8, V2.H8, V3.H8 // 4344614e
+ VSSHL V1.B8, V2.B8, V3.B8 // 4344210e
+ VSSHL V1.B16, V2.B16, V3.B16 // 4344214e
+ VUSHL V1.D2, V2.D2, V3.D2 // 4344e16e
+ VUSHL V1.S4, V2.S4, V3.S4 // 4344a16e
+ VUSHL V1.S2, V2.S2, V3.S2 // 4344a12e
+ VUSHL V1.H4, V2.H4, V3.H4 // 4344612e
+ VUSHL V1.H8, V2.H8, V3.H8 // 4344616e
+ VUSHL V1.B8, V2.B8, V3.B8 // 4344212e
+ VUSHL V1.B16, V2.B16, V3.B16 // 4344216e
+ VSQSHL V1.D2, V2.D2, V3.D2 // 434ce14e
+ VSQSHL V1.S4, V2.S4, V3.S4 // 434ca14e
+ VSQSHL V1.S2, V2.S2, V3.S2 // 434ca10e
+ VSQSHL V1.H4, V2.H4, V3.H4 // 434c610e
+ VSQSHL V1.H8, V2.H8, V3.H8 // 434c614e
+ VSQSHL V1.B8, V2.B8, V3.B8 // 434c210e
+ VSQSHL V1.B16, V2.B16, V3.B16 // 434c214e
+ VUQSHL V1.D2, V2.D2, V3.D2 // 434ce16e
+ VUQSHL V1.S4, V2.S4, V3.S4 // 434ca16e
+ VUQSHL V1.S2, V2.S2, V3.S2 // 434ca12e
+ VUQSHL V1.H4, V2.H4, V3.H4 // 434c612e
+ VUQSHL V1.H8, V2.H8, V3.H8 // 434c616e
+ VUQSHL V1.B8, V2.B8, V3.B8 // 434c212e
+ VUQSHL V1.B16, V2.B16, V3.B16 // 434c216e
VSRI $56, V1.D2, V2.D2 // 2244486f
VSRI $24, V1.S4, V2.S4 // 2244286f
VSRI $24, V1.S2, V2.S2 // 2244282f
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 71c025ca55..72a22896c1 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -147,6 +147,10 @@ TEXT errors(SB),$0
VRBIT V1.H4, V2.H4 // ERROR "invalid arrangement"
VUSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement"
VUSHR $127, V1.D2, V2.D2 // ERROR "shift out of range"
+ VSSHR $127, V1.D2, V2.D2 // ERROR "shift out of range"
+ VSSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement"
+ VSRSHR $127, V1.D2, V2.D2 // ERROR "shift out of range"
+ VSRSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement"
VLD1.P (R8)(R9.SXTX<<2), [V2.B16] // ERROR "invalid extended register"
VLD1.P (R8)(R9<<2), [V2.B16] // ERROR "invalid extended register"
VST1.P [V1.B16], (R8)(R9.UXTW) // ERROR "invalid extended register"
@@ -379,6 +383,10 @@ TEXT errors(SB),$0
VUMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch"
VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range"
VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range"
+ VSSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
+ VUSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
+ VSQSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
+ VUQSHL V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register"
CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register"
CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous"
@@ -429,4 +437,12 @@ TEXT errors(SB),$0
AUTIA1716 $45 // ERROR "illegal combination"
AUTIB1716 R0 // ERROR "illegal combination"
SB $1 // ERROR "illegal combination"
+
+ // VSHRN/VSHRN2 error test cases - invalid arrangements
+ VSHRN $8, V1.B8, V0.B8 // ERROR "invalid arrangement"
+ VSHRN $8, V1.S4, V0.S4 // ERROR "invalid arrangement"
+ VSHRN $8, V1.H8, V0.H8 // ERROR "invalid arrangement"
+ VSHRN2 $8, V1.B8, V0.B16 // ERROR "invalid arrangement"
+ VSHRN2 $8, V1.S4, V0.S4 // ERROR "invalid arrangement"
+ VSHRN2 $8, V1.H8, V0.H8 // ERROR "invalid arrangement"
RET