aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-04-01 16:23:15 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 03:51:48 -0700
commit9111d85e2f699672d67dcee1d6432a940f5306e1 (patch)
tree1b9538168dbf9d5f28d4f50a2f4905bd5c0c847e /src/cmd/internal/obj
parent4dffc57944c829d2fb2cf1b25168c27e555a8e5c (diff)
downloadgo-9111d85e2f699672d67dcee1d6432a940f5306e1.tar.xz
cmd/internal/obj/arm64: add ASIMD shift instructions
Add support for ASIMD shift instructions. These use the ASIMDSHF encoding class from the ARM architecture specification, where the shift amount is encoded as an immediate derived from the element size. Also add ASIMD shifts-by-vector (3-register form) where the shift amount comes from a second vector register. These use the ASIMDSAME encoding class. New instructions by group: Shift by immediate (signed): VSSHR, VSRSHR Shift by immediate (saturating): VSQSHL, VUQSHL Narrowing shift by immediate: VSHRN, VSHRN2 Shift by vector (3-reg): VSSHL, VUSHL, VSQSHL, VUQSHL Change-Id: I039cc16bc01980b04e6940cc1d4670faf5fa7e3c Reviewed-on: https://go-review.googlesource.com/c/go/+/762180 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go8
-rw-r--r--src/cmd/internal/obj/arm64/anames.go8
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go64
3 files changed, 75 insertions, 5 deletions
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index fdc42eabaa..56f68756fd 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -1161,7 +1161,13 @@ const (
AVREV32
AVREV64
AVSHL
+ AVSHRN
+ AVSHRN2
AVSLI
+ AVSQSHL
+ AVSSHL
+ AVUSHL
+ AVUQSHL
AVSRI
AVST1
AVST2
@@ -1180,6 +1186,8 @@ const (
AVUSHLL
AVUSHLL2
AVUSHR
+ AVSRSHR
+ AVSSHR
AVUSRA
AVUXTL
AVUXTL2
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 04986e1748..e40c043edd 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -517,7 +517,13 @@ var Anames = []string{
"VREV32",
"VREV64",
"VSHL",
+ "VSHRN",
+ "VSHRN2",
"VSLI",
+ "VSQSHL",
+ "VSSHL",
+ "VUSHL",
+ "VUQSHL",
"VSRI",
"VST1",
"VST2",
@@ -536,6 +542,8 @@ var Anames = []string{
"VUSHLL",
"VUSHLL2",
"VUSHR",
+ "VSRSHR",
+ "VSSHR",
"VUSRA",
"VUXTL",
"VUXTL2",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index ecc62251a0..0d8c1f417e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -542,7 +542,9 @@ var optab = []Optab{
{AVEXT, C_VCON, C_ARNG, C_ARNG, C_ARNG, C_NONE, 94, 4, 0, 0, 0},
{AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, C_NONE, 100, 4, 0, 0, 0},
{AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 95, 4, 0, 0, 0},
+ {AVSQSHL, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 95, 4, 0, 0, 0},
{AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
+ {AVSQSHL, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
{AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 102, 4, 0, 0, 0},
{AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, C_NONE, 102, 4, 0, 0, 0},
{AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 105, 4, 0, 0, 0},
@@ -3217,6 +3219,8 @@ func buildop(ctxt *obj.Link) {
oprangeset(AVBIT, t)
oprangeset(AVCMTST, t)
oprangeset(AVCMHI, t)
+ oprangeset(AVSSHL, t)
+ oprangeset(AVUSHL, t)
oprangeset(AVCMHS, t)
oprangeset(AVUMAX, t)
oprangeset(AVUMIN, t)
@@ -3277,6 +3281,13 @@ func buildop(ctxt *obj.Link) {
oprangeset(AVSRI, t)
oprangeset(AVSLI, t)
oprangeset(AVUSRA, t)
+ oprangeset(AVSSHR, t)
+ oprangeset(AVSRSHR, t)
+ oprangeset(AVSHRN, t)
+ oprangeset(AVSHRN2, t)
+
+ case AVSQSHL:
+ oprangeset(AVUQSHL, t)
case AVREV32:
oprangeset(AVCNT, t)
@@ -5431,14 +5442,15 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
af := int((p.Reg >> 5) & 15)
shift := int(p.From.Offset)
- if af != at {
+ if af != at && p.As != AVSHRN && p.As != AVSHRN2 {
c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p)
+ at = af
}
var Q uint32
var imax, esize int
- switch af {
+ switch at {
case ARNG_8B, ARNG_4H, ARNG_2S:
Q = 0
case ARNG_16B, ARNG_8H, ARNG_4S, ARNG_2D:
@@ -5447,29 +5459,44 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
c.ctxt.Diag("invalid arrangement on op Vn.<T>, Vd.<T>: %v", p)
}
- switch af {
+ atwice := -1
+ switch at {
case ARNG_8B, ARNG_16B:
imax = 15
esize = 8
+ atwice = ARNG_8H
case ARNG_4H, ARNG_8H:
imax = 31
esize = 16
+ atwice = ARNG_4S
case ARNG_2S, ARNG_4S:
imax = 63
esize = 32
+ atwice = ARNG_2D
case ARNG_2D:
imax = 127
esize = 64
}
+ switch p.As {
+ case AVSHRN:
+ if Q != 0 || atwice != af {
+ c.ctxt.Diag("invalid arrangement on op: %v", p)
+ }
+ case AVSHRN2:
+ if Q != 1 || atwice != af {
+ c.ctxt.Diag("invalid arrangement on op: %v", p)
+ }
+ }
+
imm := 0
switch p.As {
- case AVUSHR, AVSRI, AVUSRA:
+ case AVUSHR, AVSRI, AVUSRA, AVSSHR, AVSRSHR, AVSHRN, AVSHRN2:
imm = esize*2 - shift
if imm < esize || imm > imax {
c.ctxt.Diag("shift out of range: %v", p)
}
- case AVSHL, AVSLI:
+ case AVSHL, AVSLI, AVSQSHL, AVUQSHL:
imm = esize + shift
if imm > imax {
c.ctxt.Diag("shift out of range: %v", p)
@@ -6538,9 +6565,21 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVSUB:
op = ASIMDSAME(1, 0, 0x10)
+ case AVSSHL:
+ op = ASIMDSAME(0, 0, 0x8)
+
+ case AVUSHL:
+ op = ASIMDSAME(1, 0, 0x8)
+
case AVADDP:
op = ASIMDSAME(0, 0, 0x17)
+ case AVSQSHL:
+ op = ASIMDSAME(0, 0, 0x9)
+
+ case AVUQSHL:
+ op = ASIMDSAME(1, 0, 0x9)
+
case AVAND:
op = ASIMDSAME(0, 0, 0x03)
@@ -6895,9 +6934,24 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
case AVUSHR:
return ASIMDSHF(1, 0x00)
+ case AVSSHR:
+ return ASIMDSHF(0, 0x00)
+
+ case AVSRSHR:
+ return ASIMDSHF(0, 0x04)
+
case AVSHL:
return ASIMDSHF(0, 0x0A)
+ case AVSQSHL:
+ return ASIMDSHF(0, 0xE)
+
+ case AVUQSHL:
+ return ASIMDSHF(1, 0xE)
+
+ case AVSHRN, AVSHRN2:
+ return ASIMDSHF(0, 0x10)
+
case AVSRI:
return ASIMDSHF(1, 0x08)