aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal
diff options
context:
space:
mode:
authorGuoqi Chen <chenguoqi@loongson.cn>2025-10-29 15:43:54 +0800
committerabner chenc <chenguoqi@loongson.cn>2025-11-03 17:16:43 -0800
commitdadbac0c9ed3bd8f18dcb7e250f3f8f2808a4927 (patch)
tree6c79f6193fb1d33eb02e28b7e7ade80479783fb1 /src/cmd/internal
parente2c6a2024c9bdd56786feef42a2e2c5c5adeced2 (diff)
downloadgo-dadbac0c9ed3bd8f18dcb7e250f3f8f2808a4927.tar.xz
cmd/internal/obj/loong64: add VPERMI.W, XVPERMI.{W,V,Q} instruction support
Go asm syntax: VPERMIW $0x1b, vj, vd XVPERMI{W,V,Q} $0x1b, xj, xd Equivalent platform assembler syntax: vpermi.w vd, vj, $0x1b xvpermi.{w,d,q} xd, xj, $0x1b Change-Id: Ie23b2fdd09b4c93801dc804913206f1c5a496268 Reviewed-on: https://go-review.googlesource.com/c/go/+/716800 Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn> Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/cmd/internal')
-rw-r--r--src/cmd/internal/obj/loong64/a.out.go5
-rw-r--r--src/cmd/internal/obj/loong64/anames.go4
-rw-r--r--src/cmd/internal/obj/loong64/asm.go12
-rw-r--r--src/cmd/internal/obj/loong64/doc.go17
4 files changed, 38 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go
index 3a676db922..762dc338e3 100644
--- a/src/cmd/internal/obj/loong64/a.out.go
+++ b/src/cmd/internal/obj/loong64/a.out.go
@@ -1115,6 +1115,11 @@ const (
AXVSHUF4IW
AXVSHUF4IV
+ AVPERMIW
+ AXVPERMIW
+ AXVPERMIV
+ AXVPERMIQ
+
AVSETEQV
AVSETNEV
AVSETANYEQB
diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go
index 422ccbd9b0..607e606311 100644
--- a/src/cmd/internal/obj/loong64/anames.go
+++ b/src/cmd/internal/obj/loong64/anames.go
@@ -586,6 +586,10 @@ var Anames = []string{
"XVSHUF4IH",
"XVSHUF4IW",
"XVSHUF4IV",
+ "VPERMIW",
+ "XVPERMIW",
+ "XVPERMIV",
+ "XVPERMIQ",
"VSETEQV",
"VSETNEV",
"VSETANYEQB",
diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go
index 7eb5668d82..8e2393bc1c 100644
--- a/src/cmd/internal/obj/loong64/asm.go
+++ b/src/cmd/internal/obj/loong64/asm.go
@@ -1778,6 +1778,7 @@ func buildop(ctxt *obj.Link) {
opset(AVSHUF4IH, r0)
opset(AVSHUF4IW, r0)
opset(AVSHUF4IV, r0)
+ opset(AVPERMIW, r0)
case AXVANDB:
opset(AXVORB, r0)
@@ -1787,6 +1788,9 @@ func buildop(ctxt *obj.Link) {
opset(AXVSHUF4IH, r0)
opset(AXVSHUF4IW, r0)
opset(AXVSHUF4IV, r0)
+ opset(AXVPERMIW, r0)
+ opset(AXVPERMIV, r0)
+ opset(AXVPERMIQ, r0)
case AVANDV:
opset(AVORV, r0)
@@ -4362,6 +4366,14 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
return 0x1de6 << 18 // xvshuf4i.w
case AXVSHUF4IV:
return 0x1de7 << 18 // xvshuf4i.d
+ case AVPERMIW:
+ return 0x1cf9 << 18 // vpermi.w
+ case AXVPERMIW:
+ return 0x1df9 << 18 // xvpermi.w
+ case AXVPERMIV:
+ return 0x1dfa << 18 // xvpermi.d
+ case AXVPERMIQ:
+ return 0x1dfb << 18 // xvpermi.q
case AVBITCLRB:
return 0x1CC4<<18 | 0x1<<13 // vbitclri.b
case AVBITCLRH:
diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go
index f7e5a4fb42..45f75e6e70 100644
--- a/src/cmd/internal/obj/loong64/doc.go
+++ b/src/cmd/internal/obj/loong64/doc.go
@@ -229,6 +229,23 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate)
VMOVQ 8(R4), V5.W4 | vldrepl.w v5, r4, $2
VMOVQ 8(R4), V5.V2 | vldrepl.d v5, r4, $1
+3.8 Vector permutation instruction
+ Instruction format:
+ VPERMIW ui8, Vj, Vd
+
+ Mapping between Go and platform assembly:
+ Go assembly | platform assembly | semantics
+ VPERMIW ui8, Vj, Vd | vpermi.w vd, vj, ui8 | VR[vd].W[0] = VR[vj].W[ui8[1:0]], VR[vd].W[1] = VR[vj].W[ui8[3:2]],
+ | | VR[vd].W[2] = VR[vd].W[ui8[5:4]], VR[vd].W[3] = VR[vd].W[ui8[7:6]]
+ XVPERMIW ui8, Xj, Xd | xvpermi.w xd, xj, ui8 | XR[xd].W[0] = XR[xj].W[ui8[1:0]], XR[xd].W[1] = XR[xj].W[ui8[3:2]],
+ | | XR[xd].W[3] = XR[xd].W[ui8[7:6]], XR[xd].W[2] = XR[xd].W[ui8[5:4]],
+ | | XR[xd].W[4] = XR[xj].W[ui8[1:0]+4], XR[xd].W[5] = XR[xj].W[ui8[3:2]+4],
+ | | XR[xd].W[6] = XR[xd].W[ui8[5:4]+4], XR[xd].W[7] = XR[xd].W[ui8[7:6]+4]
+ XVPERMIV ui8, Xj, Xd | xvpermi.d xd, xj, ui8 | XR[xd].D[0] = XR[xj].D[ui8[1:0]], XR[xd].D[1] = XR[xj].D[ui8[3:2]],
+ | | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]]
+ XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]]
+
+
# Special instruction encoding definition and description on LoongArch
1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased