aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/asm/internal
diff options
context:
space:
mode:
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>2025-08-29 16:20:16 +0800
committerGopher Robot <gobot@golang.org>2025-09-04 09:22:33 -0700
commitb8cc907425c4b851d2b941cf689cf8177ea8a153 (patch)
treec6d99ae0cff79fbfa55dcaa69928a1c24ffc474a /src/cmd/asm/internal
parent8c27a808905b0611b0a7b7bbff08819206be3b86 (diff)
downloadgo-b8cc907425c4b851d2b941cf689cf8177ea8a153.tar.xz
cmd/internal/obj/loong64: fix the usage of offset in the instructions [X]VLDREPL.{B/H/W/D}
The previously defined usage of offset was ambiguous and not easy to understand. For example, to fetch 4 bytes of data from the address base+8 and broadcast it to each word element of vector register V5, the assembly implementation is as follows: previous: VMOVQ 2(base), V5.W4 current: VMOVQ 8(base), V5.W4 Change-Id: I8bc84e35033ab63bd10f4c61618789f94314f78c Reviewed-on: https://go-review.googlesource.com/c/go/+/699875 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> Auto-Submit: Michael Pratt <mpratt@google.com> Reviewed-by: Meidan Li <limeidan@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/asm/internal')
-rw-r--r--src/cmd/asm/internal/asm/testdata/loong64enc1.s28
1 files changed, 22 insertions, 6 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s
index 63676cc785..c5c6a4479a 100644
--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s
+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s
@@ -538,13 +538,29 @@ lable2:
// Load data from memory and broadcast to each element of a vector register: VMOVQ offset(Rj), <Vd>.<T>
VMOVQ (R4), V0.B16 // 80008030
- VMOVQ 1(R4), V1.H8 // 81044030
- VMOVQ 2(R4), V2.W4 // 82082030
- VMOVQ 3(R4), V3.V2 // 830c1030
+ VMOVQ 1(R4), V0.B16 // 80048030
+ VMOVQ -3(R4), V0.B16 // 80f4bf30
+ VMOVQ (R4), V1.H8 // 81004030
+ VMOVQ 2(R4), V1.H8 // 81044030
+ VMOVQ -6(R4), V1.H8 // 81f45f30
+ VMOVQ (R4), V2.W4 // 82002030
+ VMOVQ 8(R4), V2.W4 // 82082030
+ VMOVQ -12(R4), V2.W4 // 82f42f30
+ VMOVQ (R4), V3.V2 // 83001030
+ VMOVQ 24(R4), V3.V2 // 830c1030
+ VMOVQ -16(R4), V3.V2 // 83f81730
XVMOVQ (R4), X0.B32 // 80008032
- XVMOVQ 1(R4), X1.H16 // 81044032
- XVMOVQ 2(R4), X2.W8 // 82082032
- XVMOVQ 3(R4), X3.V4 // 830c1032
+ XVMOVQ 1(R4), X0.B32 // 80048032
+ XVMOVQ -5(R4), X0.B32 // 80ecbf32
+ XVMOVQ (R4), X1.H16 // 81004032
+ XVMOVQ 2(R4), X1.H16 // 81044032
+ XVMOVQ -10(R4), X1.H16 // 81ec5f32
+ XVMOVQ (R4), X2.W8 // 82002032
+ XVMOVQ 8(R4), X2.W8 // 82082032
+ XVMOVQ -20(R4), X2.W8 // 82ec2f32
+ XVMOVQ (R4), X3.V4 // 83001032
+ XVMOVQ 24(R4), X3.V4 // 830c1032
+ XVMOVQ -24(R4), X3.V4 // 83f41732
// VSEQ{B,H,W,V}, XVSEQ{B,H,W,V} instruction
VSEQB V1, V2, V3 // 43040070