aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2025-11-18 16:59:53 +0300
committerGopher Robot <gobot@golang.org>2026-04-08 03:51:27 -0700
commit4dffc57944c829d2fb2cf1b25168c27e555a8e5c (patch)
treec9993f427c2f1ce6b60974ef1e7111388838398b /src/cmd
parentf91403c2a0fd3fc4367d901bd6f238948c49c265 (diff)
downloadgo-4dffc57944c829d2fb2cf1b25168c27e555a8e5c.tar.xz
cmd/internal/obj/arm64: add remaining ASIMD compare instructions
Add remaining arm64 ASIMD vector compare instructions. All these instructions produce either all zeroes (false) or all ones (true) bits in each corresponding lane as the result. Added integer comparison instructions: - VCMEQ (compare to zero) - VCMGE, VCMGT (singed, both two-register and compare to zero) - VCMHI, VCMHS (unsigned two-register compare) - VCMLE, VCMLT (signed compare to zero) Added floating-point comparison instructions: - VFCMEQ, VFCMGE, VFCMGT (both two-register and zero variants) - VFCMLE, VFCMLT (compare to zero) Change-Id: I913165d3934f2556c9bdf38c5103ef56d86383ef Reviewed-on: https://go-review.googlesource.com/c/go/+/721640 Auto-Submit: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s17
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go11
-rw-r--r--src/cmd/internal/obj/arm64/anames.go11
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go139
4 files changed, 175 insertions, 3 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 9c35b4b248..2f85308bd3 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -110,6 +110,23 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VADDP V1.D2, V2.D2, V3.D2 // 43bce14e
VAND V21.B8, V12.B8, V3.B8 // 831d350e
VCMEQ V1.H4, V2.H4, V3.H4 // 438c612e
+ VCMEQ $0, V2.H4, V3.H4 // 4398600e
+ VCMGE V1.H4, V2.H4, V3.H4 // 433c610e
+ VCMGE $0, V2.H4, V3.H4 // 4388602e
+ VCMGT V1.H4, V2.H4, V3.H4 // 4334610e
+ VCMGT $0, V2.H4, V3.H4 // 4388600e
+ VCMHI V1.H4, V2.H4, V3.H4 // 4334612e
+ VCMHS V1.H4, V2.H4, V3.H4 // 433c612e
+ VCMLE $0, V2.H4, V3.H4 // 4398602e
+ VCMLT $0, V2.H4, V3.H4 // 43a8600e
+ VFCMEQ V1.S4, V2.S4, V3.S4 // 43e4214e
+ VFCMEQ $(0.0), V2.S4, V3.S4 // 43d8a04e
+ VFCMGE V1.S4, V2.S4, V3.S4 // 43e4216e
+ VFCMGE $(0.0), V2.S4, V3.S4 // 43c8a06e
+ VFCMGT V1.S4, V2.S4, V3.S4 // 43e4a16e
+ VFCMGT $(0.0), V2.S4, V3.S4 // 43c8a04e
+ VFCMLE $(0.0), V2.S4, V3.S4 // 43d8a06e
+ VFCMLT $(0.0), V2.S4, V3.S4 // 43e8a04e
VORR V5.B16, V4.B16, V3.B16 // 831ca54e
VADD V16.S4, V5.S4, V9.S4 // a984b04e
VEOR V0.B16, V1.B16, V0.B16 // 201c206e
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 3a39f2719d..fdc42eabaa 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -1120,6 +1120,12 @@ const (
AVBIT
AVBSL
AVCMEQ
+ AVCMGE
+ AVCMGT
+ AVCMHI
+ AVCMHS
+ AVCMLE
+ AVCMLT
AVCMTST
AVCNT
AVDUP
@@ -1128,6 +1134,11 @@ const (
AVEXT
AVFMLA
AVFMLS
+ AVFCMEQ
+ AVFCMGE
+ AVFCMGT
+ AVFCMLE
+ AVFCMLT
AVLD1
AVLD1R
AVLD2
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 564e0faa8b..04986e1748 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -476,6 +476,12 @@ var Anames = []string{
"VBIT",
"VBSL",
"VCMEQ",
+ "VCMGE",
+ "VCMGT",
+ "VCMHI",
+ "VCMHS",
+ "VCMLE",
+ "VCMLT",
"VCMTST",
"VCNT",
"VDUP",
@@ -484,6 +490,11 @@ var Anames = []string{
"VEXT",
"VFMLA",
"VFMLS",
+ "VFCMEQ",
+ "VFCMGE",
+ "VFCMGT",
+ "VFCMLE",
+ "VFCMLT",
"VLD1",
"VLD1R",
"VLD2",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 2c35734f26..ecc62251a0 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -375,6 +375,13 @@ var optab = []Optab{
{AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, C_NONE, 56, 4, 0, 0, 0},
{AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, C_NONE, 56, 4, 0, 0, 0},
{AVADDP, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
+ {AVCMEQ, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
+ {AVCMEQ, C_ZCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0},
+ {AVCMLE, C_ZCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0},
+ {AVFCMEQ, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
+ {AVFCMEQ, C_FCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0},
+ {AVFCMLE, C_FCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0},
+
{AVADD, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
{AVADD, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 89, 4, 0, 0, 0},
{AVADD, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 89, 4, 0, 0, 0},
@@ -3204,18 +3211,33 @@ func buildop(ctxt *obj.Link) {
case AVADDP:
oprangeset(AVAND, t)
- oprangeset(AVCMEQ, t)
oprangeset(AVORR, t)
oprangeset(AVEOR, t)
oprangeset(AVBSL, t)
oprangeset(AVBIT, t)
oprangeset(AVCMTST, t)
+ oprangeset(AVCMHI, t)
+ oprangeset(AVCMHS, t)
oprangeset(AVUMAX, t)
oprangeset(AVUMIN, t)
oprangeset(AVUZP1, t)
oprangeset(AVUZP2, t)
oprangeset(AVBIF, t)
+ case AVCMEQ:
+ oprangeset(AVCMGE, t)
+ oprangeset(AVCMGT, t)
+
+ case AVCMLE:
+ oprangeset(AVCMLT, t)
+
+ case AVFCMEQ:
+ oprangeset(AVFCMGE, t)
+ oprangeset(AVFCMGT, t)
+
+ case AVFCMLE:
+ oprangeset(AVFCMLT, t)
+
case AVADD:
oprangeset(AVSUB, t)
oprangeset(AVRAX1, t)
@@ -4753,7 +4775,7 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
if af != ARNG_16B && af != ARNG_8B {
c.ctxt.Diag("invalid arrangement: %v", p)
}
- case AVFMLA, AVFMLS:
+ case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT:
if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S {
c.ctxt.Diag("invalid arrangement: %v", p)
}
@@ -4769,7 +4791,7 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
size = 1
case AVORR, AVBIT, AVBIF:
size = 2
- case AVFMLA, AVFMLS:
+ case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT:
if af == ARNG_2D {
size = 1
} else {
@@ -5851,6 +5873,66 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
c.ctxt.Diag("illegal argument: %v\n", p)
break
}
+
+ case 109: /* [cm|fcm][eq|ge|gt|le|lt] $0, Vn.<T>, Vd.<T> */
+ // Encoding is same as case 83 (this is a separate case because $0 occupies p.From)
+ if !(p.From.Type == obj.TYPE_CONST && p.From.Offset == 0) &&
+ !(p.From.Type == obj.TYPE_FCONST && p.From.Val.(float64) == 0.0) {
+ c.ctxt.Diag("expected a constant zero immediate operand: %v\n", p)
+ }
+ an := int((p.Reg >> 5) & 15)
+ ad := int((p.To.Reg >> 5) & 15)
+ if an != ad {
+ c.ctxt.Diag("operand mismatch: %v", p)
+ break
+ }
+ var Q, size uint32
+ if p.From.Type == obj.TYPE_FCONST {
+ switch an {
+ case ARNG_2D:
+ Q = 1
+ size = 1
+ case ARNG_2S:
+ Q = 0
+ size = 0
+ case ARNG_4S:
+ Q = 1
+ size = 0
+ default:
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+ } else {
+ switch an {
+ case ARNG_16B:
+ Q = 1
+ size = 0
+ case ARNG_2D:
+ Q = 1
+ size = 3
+ case ARNG_2S:
+ Q = 0
+ size = 2
+ case ARNG_4H:
+ Q = 0
+ size = 1
+ case ARNG_4S:
+ Q = 1
+ size = 2
+ case ARNG_8B:
+ Q = 0
+ size = 0
+ case ARNG_8H:
+ Q = 1
+ size = 1
+ default:
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+ }
+ o1 = c.opirr(p, p.As)
+ rd := uint32(p.To.Reg & 31)
+ rn := uint32(p.Reg & 31)
+ o1 |= Q<<30 | size<<22 | (rn << 5) | (rd)
+
case 127:
// Generic SVE instruction encoding
matched := false
@@ -6468,6 +6550,27 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 {
case AVCMEQ:
op = ASIMDSAME(1, 0, 0x11)
+ case AVCMGE:
+ op = ASIMDSAME(0, 0, 0x07)
+
+ case AVCMGT:
+ op = ASIMDSAME(0, 0, 0x06)
+
+ case AVCMHI:
+ op = ASIMDSAME(1, 0, 0x06)
+
+ case AVCMHS:
+ op = ASIMDSAME(1, 0, 0x07)
+
+ case AVFCMEQ:
+ op = ASIMDSAME(0, 0, 0x1C)
+
+ case AVFCMGE:
+ op = ASIMDSAME(1, 0, 0x1C)
+
+ case AVFCMGT:
+ op = ASIMDSAME(1, 2, 0x1C)
+
case AVCNT:
op = ASIMDMISC(0, 0, 0x05)
@@ -6756,6 +6859,36 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
case AHINT:
return SYSHINT(0)
+ case AVCMEQ:
+ return ASIMDMISC(0, 0, 0x09)
+
+ case AVCMGE:
+ return ASIMDMISC(1, 0, 0x08)
+
+ case AVCMGT:
+ return ASIMDMISC(0, 0, 0x08)
+
+ case AVCMLE:
+ return ASIMDMISC(1, 0, 0x09)
+
+ case AVCMLT:
+ return ASIMDMISC(0, 0, 0x0A)
+
+ case AVFCMEQ:
+ return ASIMDMISC(0, 2, 0x0D)
+
+ case AVFCMGE:
+ return ASIMDMISC(1, 2, 0x0C)
+
+ case AVFCMGT:
+ return ASIMDMISC(0, 2, 0x0C)
+
+ case AVFCMLE:
+ return ASIMDMISC(1, 2, 0x0D)
+
+ case AVFCMLT:
+ return ASIMDMISC(0, 2, 0x0E)
+
case AVEXT:
return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15