diff options
| author | Alexander Musman <alexander.musman@gmail.com> | 2025-11-18 16:59:53 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-04-08 03:51:27 -0700 |
| commit | 4dffc57944c829d2fb2cf1b25168c27e555a8e5c (patch) | |
| tree | c9993f427c2f1ce6b60974ef1e7111388838398b /src/cmd | |
| parent | f91403c2a0fd3fc4367d901bd6f238948c49c265 (diff) | |
| download | go-4dffc57944c829d2fb2cf1b25168c27e555a8e5c.tar.xz | |
cmd/internal/obj/arm64: add remaining ASIMD compare instructions
Add remaining arm64 ASIMD vector compare instructions.
All these instructions produce either all zeroes (false) or all ones (true)
bits in each corresponding lane as the result.
Added integer comparison instructions:
- VCMEQ (compare to zero)
- VCMGE, VCMGT (singed, both two-register and compare to zero)
- VCMHI, VCMHS (unsigned two-register compare)
- VCMLE, VCMLT (signed compare to zero)
Added floating-point comparison instructions:
- VFCMEQ, VFCMGE, VFCMGT (both two-register and zero variants)
- VFCMLE, VFCMLT (compare to zero)
Change-Id: I913165d3934f2556c9bdf38c5103ef56d86383ef
Reviewed-on: https://go-review.googlesource.com/c/go/+/721640
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 17 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 11 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 11 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 139 |
4 files changed, 175 insertions, 3 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 9c35b4b248..2f85308bd3 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -110,6 +110,23 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VADDP V1.D2, V2.D2, V3.D2 // 43bce14e VAND V21.B8, V12.B8, V3.B8 // 831d350e VCMEQ V1.H4, V2.H4, V3.H4 // 438c612e + VCMEQ $0, V2.H4, V3.H4 // 4398600e + VCMGE V1.H4, V2.H4, V3.H4 // 433c610e + VCMGE $0, V2.H4, V3.H4 // 4388602e + VCMGT V1.H4, V2.H4, V3.H4 // 4334610e + VCMGT $0, V2.H4, V3.H4 // 4388600e + VCMHI V1.H4, V2.H4, V3.H4 // 4334612e + VCMHS V1.H4, V2.H4, V3.H4 // 433c612e + VCMLE $0, V2.H4, V3.H4 // 4398602e + VCMLT $0, V2.H4, V3.H4 // 43a8600e + VFCMEQ V1.S4, V2.S4, V3.S4 // 43e4214e + VFCMEQ $(0.0), V2.S4, V3.S4 // 43d8a04e + VFCMGE V1.S4, V2.S4, V3.S4 // 43e4216e + VFCMGE $(0.0), V2.S4, V3.S4 // 43c8a06e + VFCMGT V1.S4, V2.S4, V3.S4 // 43e4a16e + VFCMGT $(0.0), V2.S4, V3.S4 // 43c8a04e + VFCMLE $(0.0), V2.S4, V3.S4 // 43d8a06e + VFCMLT $(0.0), V2.S4, V3.S4 // 43e8a04e VORR V5.B16, V4.B16, V3.B16 // 831ca54e VADD V16.S4, V5.S4, V9.S4 // a984b04e VEOR V0.B16, V1.B16, V0.B16 // 201c206e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 3a39f2719d..fdc42eabaa 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -1120,6 +1120,12 @@ const ( AVBIT AVBSL AVCMEQ + AVCMGE + AVCMGT + AVCMHI + AVCMHS + AVCMLE + AVCMLT AVCMTST AVCNT AVDUP @@ -1128,6 +1134,11 @@ const ( AVEXT AVFMLA AVFMLS + AVFCMEQ + AVFCMGE + AVFCMGT + AVFCMLE + AVFCMLT AVLD1 AVLD1R AVLD2 diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 564e0faa8b..04986e1748 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -476,6 +476,12 @@ var Anames = []string{ "VBIT", "VBSL", "VCMEQ", + "VCMGE", + "VCMGT", + "VCMHI", + "VCMHS", + "VCMLE", + "VCMLT", "VCMTST", "VCNT", "VDUP", @@ -484,6 +490,11 @@ var Anames = []string{ "VEXT", "VFMLA", "VFMLS", + "VFCMEQ", + "VFCMGE", + "VFCMGT", + "VFCMLE", + "VFCMLT", "VLD1", "VLD1R", "VLD2", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 2c35734f26..ecc62251a0 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -375,6 +375,13 @@ var optab = []Optab{ {AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, C_NONE, 56, 4, 0, 0, 0}, {AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, C_NONE, 56, 4, 0, 0, 0}, {AVADDP, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, + {AVCMEQ, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, + {AVCMEQ, C_ZCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0}, + {AVCMLE, C_ZCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0}, + {AVFCMEQ, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, + {AVFCMEQ, C_FCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0}, + {AVFCMLE, C_FCON, C_ARNG, C_NONE, C_ARNG, C_NONE, 109, 4, 0, 0, 0}, + {AVADD, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0}, {AVADD, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 89, 4, 0, 0, 0}, {AVADD, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 89, 4, 0, 0, 0}, @@ -3204,18 +3211,33 @@ func buildop(ctxt *obj.Link) { case AVADDP: oprangeset(AVAND, t) - oprangeset(AVCMEQ, t) oprangeset(AVORR, t) oprangeset(AVEOR, t) oprangeset(AVBSL, t) oprangeset(AVBIT, t) oprangeset(AVCMTST, t) + oprangeset(AVCMHI, t) + oprangeset(AVCMHS, t) oprangeset(AVUMAX, t) oprangeset(AVUMIN, t) oprangeset(AVUZP1, t) oprangeset(AVUZP2, t) oprangeset(AVBIF, t) + case AVCMEQ: + oprangeset(AVCMGE, t) + oprangeset(AVCMGT, t) + + case AVCMLE: + oprangeset(AVCMLT, t) + + case AVFCMEQ: + oprangeset(AVFCMGE, t) + oprangeset(AVFCMGT, t) + + case AVFCMLE: + oprangeset(AVFCMLT, t) + case AVADD: oprangeset(AVSUB, t) oprangeset(AVRAX1, t) @@ -4753,7 +4775,7 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { if af != ARNG_16B && af != ARNG_8B { c.ctxt.Diag("invalid arrangement: %v", p) } - case AVFMLA, AVFMLS: + case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT: if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { c.ctxt.Diag("invalid arrangement: %v", p) } @@ -4769,7 +4791,7 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { size = 1 case AVORR, AVBIT, AVBIF: size = 2 - case AVFMLA, AVFMLS: + case AVFMLA, AVFMLS, AVFCMEQ, AVFCMGE, AVFCMGT: if af == ARNG_2D { size = 1 } else { @@ -5851,6 +5873,66 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { c.ctxt.Diag("illegal argument: %v\n", p) break } + + case 109: /* [cm|fcm][eq|ge|gt|le|lt] $0, Vn.<T>, Vd.<T> */ + // Encoding is same as case 83 (this is a separate case because $0 occupies p.From) + if !(p.From.Type == obj.TYPE_CONST && p.From.Offset == 0) && + !(p.From.Type == obj.TYPE_FCONST && p.From.Val.(float64) == 0.0) { + c.ctxt.Diag("expected a constant zero immediate operand: %v\n", p) + } + an := int((p.Reg >> 5) & 15) + ad := int((p.To.Reg >> 5) & 15) + if an != ad { + c.ctxt.Diag("operand mismatch: %v", p) + break + } + var Q, size uint32 + if p.From.Type == obj.TYPE_FCONST { + switch an { + case ARNG_2D: + Q = 1 + size = 1 + case ARNG_2S: + Q = 0 + size = 0 + case ARNG_4S: + Q = 1 + size = 0 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + } else { + switch an { + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_2D: + Q = 1 + size = 3 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_8H: + Q = 1 + size = 1 + default: + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + o1 = c.opirr(p, p.As) + rd := uint32(p.To.Reg & 31) + rn := uint32(p.Reg & 31) + o1 |= Q<<30 | size<<22 | (rn << 5) | (rd) + case 127: // Generic SVE instruction encoding matched := false @@ -6468,6 +6550,27 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As, rd, rn, rm int16) uint32 { case AVCMEQ: op = ASIMDSAME(1, 0, 0x11) + case AVCMGE: + op = ASIMDSAME(0, 0, 0x07) + + case AVCMGT: + op = ASIMDSAME(0, 0, 0x06) + + case AVCMHI: + op = ASIMDSAME(1, 0, 0x06) + + case AVCMHS: + op = ASIMDSAME(1, 0, 0x07) + + case AVFCMEQ: + op = ASIMDSAME(0, 0, 0x1C) + + case AVFCMGE: + op = ASIMDSAME(1, 0, 0x1C) + + case AVFCMGT: + op = ASIMDSAME(1, 2, 0x1C) + case AVCNT: op = ASIMDMISC(0, 0, 0x05) @@ -6756,6 +6859,36 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AHINT: return SYSHINT(0) + case AVCMEQ: + return ASIMDMISC(0, 0, 0x09) + + case AVCMGE: + return ASIMDMISC(1, 0, 0x08) + + case AVCMGT: + return ASIMDMISC(0, 0, 0x08) + + case AVCMLE: + return ASIMDMISC(1, 0, 0x09) + + case AVCMLT: + return ASIMDMISC(0, 0, 0x0A) + + case AVFCMEQ: + return ASIMDMISC(0, 2, 0x0D) + + case AVFCMGE: + return ASIMDMISC(1, 2, 0x0C) + + case AVFCMGT: + return ASIMDMISC(0, 2, 0x0C) + + case AVFCMLE: + return ASIMDMISC(1, 2, 0x0D) + + case AVFCMLT: + return ASIMDMISC(0, 2, 0x0E) + case AVEXT: return 0x2E<<24 | 0<<23 | 0<<21 | 0<<15 |
