cmd/compile: optimize shifts of int32 and uint32 on loong64

goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.100n ± 1% 1.101n ± 0% ~ (p=0.566 n=10) LeadingZeros8 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros16 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros32 1.2010n ± 0% 0.9511n ± 0% -20.81% (p=0.000 n=10) LeadingZeros64 1.104n ± 1% 1.119n ± 0% +1.40% (p=0.000 n=10) TrailingZeros 0.8137n ± 0% 0.8086n ± 0% -0.63% (p=0.001 n=10) TrailingZeros8 1.031n ± 1% 1.031n ± 1% ~ (p=0.956 n=10) TrailingZeros16 0.8204n ± 1% 0.8114n ± 0% -1.11% (p=0.000 n=10) TrailingZeros32 0.8145n ± 0% 0.8090n ± 0% -0.68% (p=0.000 n=10) TrailingZeros64 0.8159n ± 0% 0.8089n ± 1% -0.86% (p=0.000 n=10) OnesCount 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount8 0.8005n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) OnesCount16 0.9339n ± 0% 0.9344n ± 0% +0.05% (p=0.000 n=10) OnesCount32 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) RotateLeft 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) RotateLeft8 1.202n ± 0% 1.202n ± 0% ~ (p=0.210 n=10) RotateLeft16 0.8050n ± 0% 0.8036n ± 0% -0.17% (p=0.002 n=10) RotateLeft32 0.6674n ± 0% 0.6674n ± 0% ~ (p=1.000 n=10) RotateLeft64 0.6673n ± 0% 0.6674n ± 0% ~ (p=0.072 n=10) Reverse 0.4123n ± 0% 0.4067n ± 1% -1.37% (p=0.000 n=10) Reverse8 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Reverse16 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse32 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse64 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.001 n=10) ReverseBytes 0.4100n ± 1% 0.4057n ± 1% -1.06% (p=0.002 n=10) ReverseBytes16 0.8004n ± 0% 0.8009n ± 0% +0.07% (p=0.000 n=10) ReverseBytes32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) ReverseBytes64 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add64multiple 1.831n ± 0% 1.832n ± 0% ~ (p=1.000 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.601n ± 0% 1.602n ± 0% +0.06% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Sub64multiple 2.400n ± 0% 2.402n ± 0% +0.10% (p=0.000 n=10) Mul 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul64 0.8004n ± 0% 0.8008n ± 0% +0.05% (p=0.000 n=10) Div 9.107n ± 0% 9.083n ± 0% ~ (p=0.255 n=10) Div32 4.009n ± 0% 4.011n ± 0% +0.05% (p=0.000 n=10) Div64 9.705n ± 0% 9.711n ± 0% +0.06% (p=0.000 n=10) geomean 1.089n 1.083n -0.62% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.352n ± 0% 1.341n ± 4% -0.81% (p=0.024 n=10) LeadingZeros8 1.766n ± 0% 1.781n ± 0% +0.88% (p=0.000 n=10) LeadingZeros16 1.766n ± 0% 1.782n ± 0% +0.88% (p=0.000 n=10) LeadingZeros32 1.536n ± 0% 1.341n ± 1% -12.73% (p=0.000 n=10) LeadingZeros64 1.351n ± 1% 1.338n ± 0% -0.96% (p=0.000 n=10) TrailingZeros 0.9037n ± 0% 0.9025n ± 0% -0.12% (p=0.020 n=10) TrailingZeros8 1.087n ± 3% 1.056n ± 0% ~ (p=0.060 n=10) TrailingZeros16 1.101n ± 0% 1.101n ± 0% ~ (p=0.211 n=10) TrailingZeros32 0.9040n ± 0% 0.9024n ± 1% -0.18% (p=0.017 n=10) TrailingZeros64 0.9043n ± 0% 0.9028n ± 1% ~ (p=0.118 n=10) OnesCount 1.503n ± 2% 1.482n ± 1% -1.43% (p=0.001 n=10) OnesCount8 1.207n ± 0% 1.206n ± 0% -0.12% (p=0.000 n=10) OnesCount16 1.501n ± 0% 1.534n ± 0% +2.13% (p=0.000 n=10) OnesCount32 1.483n ± 1% 1.531n ± 1% +3.27% (p=0.000 n=10) OnesCount64 1.301n ± 0% 1.302n ± 0% +0.08% (p=0.000 n=10) RotateLeft 0.8136n ± 4% 0.8083n ± 0% -0.66% (p=0.002 n=10) RotateLeft8 1.311n ± 0% 1.310n ± 0% ~ (p=0.786 n=10) RotateLeft16 1.165n ± 0% 1.149n ± 0% -1.33% (p=0.001 n=10) RotateLeft32 0.8138n ± 1% 0.8093n ± 0% -0.57% (p=0.017 n=10) RotateLeft64 0.8149n ± 1% 0.8088n ± 0% -0.74% (p=0.000 n=10) Reverse 0.5195n ± 1% 0.5109n ± 0% -1.67% (p=0.000 n=10) Reverse8 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.012 n=10) Reverse64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.010 n=10) ReverseBytes 0.5120n ± 1% 0.5122n ± 2% ~ (p=0.306 n=10) ReverseBytes16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 4% ~ (p=0.334 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.563 n=10) Add64 1.201n ± 0% 1.201n ± 1% ~ (p=0.652 n=10) Add64multiple 1.909n ± 0% 1.902n ± 0% ~ (p=0.126 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.655n ± 0% 1.654n ± 0% ~ (p=0.589 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub64multiple 2.150n ± 0% 2.180n ± 4% +1.37% (p=0.000 n=10) Mul 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.011 n=10) Mul32 1.053n ± 0% 1.030n ± 0% -2.23% (p=0.000 n=10) Mul64 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.018 n=10) Div 11.59n ± 0% 11.57n ± 1% ~ (p=0.091 n=10) Div32 4.337n ± 0% 4.337n ± 1% ~ (p=0.783 n=10) Div64 12.81n ± 0% 12.76n ± 0% -0.39% (p=0.001 n=10) geomean 1.257n 1.252n -0.46% Change-Id: I9e93ea49736760c19dc6b6463d2aa95878121b7b Reviewed-on: https://go-review.googlesource.com/c/go/+/627855 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: Junyang Shao <shaojunyang@google.com>
author: Xiaolin Zhao <zhaoxiaolin@loongson.cn> 2024-11-14 11:35:39 +0800
committer: abner chenc <chenguoqi@loongson.cn> 2025-03-10 17:55:10 -0700
commit: 2a772a2fe7db5602a2932c63a0278ed45e8762cc (patch)
tree: 601d35b885347b62aae54d0f34cde3f39a5a663d /src/cmd
parent: 2abe5ceb1968edb882c8318af674c4133dd0bb75 (diff)
download: go-2a772a2fe7db5602a2932c63a0278ed45e8762cc.tar.xz
5 files changed, 441 insertions, 127 deletions
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go
index e8b8b27f87..60516d6618 100644
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@@ -165,8 +165,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		ssa.OpLOONG64OR,
 		ssa.OpLOONG64XOR,
 		ssa.OpLOONG64NOR,
+		ssa.OpLOONG64SLL,
 		ssa.OpLOONG64SLLV,
+		ssa.OpLOONG64SRL,
 		ssa.OpLOONG64SRLV,
+		ssa.OpLOONG64SRA,
 		ssa.OpLOONG64SRAV,
 		ssa.OpLOONG64ROTR,
 		ssa.OpLOONG64ROTRV,
@@ -274,8 +277,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		ssa.OpLOONG64ORconst,
 		ssa.OpLOONG64XORconst,
 		ssa.OpLOONG64NORconst,
+		ssa.OpLOONG64SLLconst,
 		ssa.OpLOONG64SLLVconst,
+		ssa.OpLOONG64SRLconst,
 		ssa.OpLOONG64SRLVconst,
+		ssa.OpLOONG64SRAconst,
 		ssa.OpLOONG64SRAVconst,
 		ssa.OpLOONG64ROTRconst,
 		ssa.OpLOONG64ROTRVconst,
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
index 7ffd579dc7..41c120c983 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -62,10 +62,10 @@
 (Lsh64x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
 (Lsh64x8  <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y)))
 
-(Lsh32x64 <t> x y) => (MASKEQZ (SLLV <t> x                y)  (SGTU (MOVVconst <typ.UInt64> [64])                y))
-(Lsh32x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
-(Lsh32x8  <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y)))
+(Lsh32x64 <t> x y) => (MASKEQZ (SLL <t> x                y)  (SGTU (MOVVconst <typ.UInt64> [32])                y))
+(Lsh32x32 <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
+(Lsh32x16 <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
+(Lsh32x8  <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64  y)))
 
 (Lsh16x64 <t> x y) => (MASKEQZ (SLLV <t> x                y)  (SGTU (MOVVconst <typ.UInt64> [64])                y))
 (Lsh16x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
@@ -82,10 +82,10 @@
 (Rsh64Ux16 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
 (Rsh64Ux8  <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y)))
 
-(Rsh32Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x)                y)  (SGTU (MOVVconst <typ.UInt64> [64])                y))
-(Rsh32Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
-(Rsh32Ux8  <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y)))
+(Rsh32Ux64 <t> x y) => (MASKEQZ (SRL <t> x                y)  (SGTU (MOVVconst <typ.UInt64> [32])                y))
+(Rsh32Ux32 <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
+(Rsh32Ux16 <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
+(Rsh32Ux8  <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt8to64  y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64  y)))
 
 (Rsh16Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x)                y)  (SGTU (MOVVconst <typ.UInt64> [64])                y))
 (Rsh16Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
@@ -102,10 +102,10 @@
 (Rsh64x16 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
 (Rsh64x8  <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
 
-(Rsh32x64 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
-(Rsh32x32 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
-(Rsh32x16 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
-(Rsh32x8  <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
+(Rsh32x64 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [31]))) y))
+(Rsh32x32 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt32to64 y)))
+(Rsh32x16 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt16to64 y)))
+(Rsh32x8  <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt8to64  y)))
 
 (Rsh16x64 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
 (Rsh16x32 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
@@ -686,15 +686,30 @@
 (XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x)
 (NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x)
 
+(SLL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0])
 (SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0])
+(SRL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0])
 (SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0])
+(SRA x (MOVVconst [c])) && uint64(c)>=32 => (SRAconst x [31])
 (SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63])
+(SLL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SLLconst x [c])
 (SLLV x (MOVVconst [c])) => (SLLVconst x [c])
+(SRL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SRLconst x [c])
 (SRLV x (MOVVconst [c])) => (SRLVconst x [c])
+(SRA x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SRAconst x [c])
 (SRAV x (MOVVconst [c])) => (SRAVconst x [c])
 (ROTR x (MOVVconst [c]))  => (ROTRconst x [c&31])
 (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63])
 
+// Avoid unnecessary zero and sign extension when right shifting.
+(SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y)
+(SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y)
+
+// Replace right shifts that exceed size of signed type.
+(SRAVconst <t> [rc] (MOVBreg y)) && rc >=  8 => (SRAVconst [63] (SLLVconst <t> [56] y))
+(SRAVconst <t> [rc] (MOVHreg y)) && rc >= 16 => (SRAVconst  [63] (SLLVconst <t> [48] y))
+(SRAVconst <t> [rc] (MOVWreg y)) && rc >= 32 => (SRAconst [31] y)
+
 // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0.
 (MOVWUreg (SLLVconst [lc] x)) && lc >= 32 => (MOVVconst [0])
 (MOVHUreg (SLLVconst [lc] x)) && lc >= 16 => (MOVVconst [0])
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
index 8f17158b64..c68a24ca97 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -240,11 +240,17 @@ func init() {
 		{name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64
 
 		// shifts
+		{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"},                        // arg0 << arg1, shift amount is mod 32
 		{name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"},                      // arg0 << arg1, shift amount is mod 64
+		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int64"},     // arg0 << auxInt, auxInt should be in the range 0 to 31.
 		{name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"},   // arg0 << auxInt
+		{name: "SRL", argLength: 2, reg: gp21, asm: "SRL"},                        // arg0 >> arg1, shift amount is mod 32
 		{name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"},                      // arg0 >> arg1, unsigned, shift amount is mod 64
+		{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int64"},     // arg0 >> auxInt, auxInt should be in the range 0 to 31.
 		{name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"},   // arg0 >> auxInt, unsigned
+		{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"},                        // arg0 >> arg1, shift amount is mod 32
 		{name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"},                      // arg0 >> arg1, signed, shift amount is mod 64
+		{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int64"},     // arg0 >> auxInt, signed, auxInt should be in the range 0 to 31.
 		{name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"},   // arg0 >> auxInt, signed
 		{name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"},                      // arg0 right rotate by (arg1 mod 32) bits
 		{name: "ROTRV", argLength: 2, reg: gp21, asm: "ROTRV"},                    // arg0 right rotate by (arg1 mod 64) bits
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 8b51015ed8..03ccc0f5d7 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1834,11 +1834,17 @@ const (
 	OpLOONG64MASKEQZ
 	OpLOONG64MASKNEZ
 	OpLOONG64FCOPYSGD
+	OpLOONG64SLL
 	OpLOONG64SLLV
+	OpLOONG64SLLconst
 	OpLOONG64SLLVconst
+	OpLOONG64SRL
 	OpLOONG64SRLV
+	OpLOONG64SRLconst
 	OpLOONG64SRLVconst
+	OpLOONG64SRA
 	OpLOONG64SRAV
+	OpLOONG64SRAconst
 	OpLOONG64SRAVconst
 	OpLOONG64ROTR
 	OpLOONG64ROTRV
@@ -24710,6 +24716,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:   "SLL",
+		argLen: 2,
+		asm:    loong64.ASLL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+				{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:   "SLLV",
 		argLen: 2,
 		asm:    loong64.ASLLV,
@@ -24724,6 +24744,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:    "SLLconst",
+		auxType: auxInt64,
+		argLen:  1,
+		asm:     loong64.ASLL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:    "SLLVconst",
 		auxType: auxInt64,
 		argLen:  1,
@@ -24738,6 +24772,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:   "SRL",
+		argLen: 2,
+		asm:    loong64.ASRL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+				{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:   "SRLV",
 		argLen: 2,
 		asm:    loong64.ASRLV,
@@ -24752,6 +24800,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:    "SRLconst",
+		auxType: auxInt64,
+		argLen:  1,
+		asm:     loong64.ASRL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:    "SRLVconst",
 		auxType: auxInt64,
 		argLen:  1,
@@ -24766,6 +24828,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:   "SRA",
+		argLen: 2,
+		asm:    loong64.ASRA,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+				{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:   "SRAV",
 		argLen: 2,
 		asm:    loong64.ASRAV,
@@ -24780,6 +24856,20 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
+		name:    "SRAconst",
+		auxType: auxInt64,
+		argLen:  1,
+		asm:     loong64.ASRA,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},
+	{
 		name:    "SRAVconst",
 		auxType: auxInt64,
 		argLen:  1,
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
index 4499efa01d..01000db4c0 100644
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@@ -454,14 +454,20 @@ func rewriteValueLOONG64(v *Value) bool {
 		return rewriteValueLOONG64_OpLOONG64SGTUconst(v)
 	case OpLOONG64SGTconst:
 		return rewriteValueLOONG64_OpLOONG64SGTconst(v)
+	case OpLOONG64SLL:
+		return rewriteValueLOONG64_OpLOONG64SLL(v)
 	case OpLOONG64SLLV:
 		return rewriteValueLOONG64_OpLOONG64SLLV(v)
 	case OpLOONG64SLLVconst:
 		return rewriteValueLOONG64_OpLOONG64SLLVconst(v)
+	case OpLOONG64SRA:
+		return rewriteValueLOONG64_OpLOONG64SRA(v)
 	case OpLOONG64SRAV:
 		return rewriteValueLOONG64_OpLOONG64SRAV(v)
 	case OpLOONG64SRAVconst:
 		return rewriteValueLOONG64_OpLOONG64SRAVconst(v)
+	case OpLOONG64SRL:
+		return rewriteValueLOONG64_OpLOONG64SRL(v)
 	case OpLOONG64SRLV:
 		return rewriteValueLOONG64_OpLOONG64SRLV(v)
 	case OpLOONG64SRLVconst:
@@ -6031,6 +6037,43 @@ func rewriteValueLOONG64_OpLOONG64SGTconst(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueLOONG64_OpLOONG64SLL(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (SLL _ (MOVVconst [c]))
+	// cond: uint64(c)>=32
+	// result: (MOVVconst [0])
+	for {
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 32) {
+			break
+		}
+		v.reset(OpLOONG64MOVVconst)
+		v.AuxInt = int64ToAuxInt(0)
+		return true
+	}
+	// match: (SLL x (MOVVconst [c]))
+	// cond: uint64(c) >=0 && uint64(c) <=31
+	// result: (SLLconst x [c])
+	for {
+		x := v_0
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 0 && uint64(c) <= 31) {
+			break
+		}
+		v.reset(OpLOONG64SLLconst)
+		v.AuxInt = int64ToAuxInt(c)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -6080,6 +6123,45 @@ func rewriteValueLOONG64_OpLOONG64SLLVconst(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueLOONG64_OpLOONG64SRA(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (SRA x (MOVVconst [c]))
+	// cond: uint64(c)>=32
+	// result: (SRAconst x [31])
+	for {
+		x := v_0
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 32) {
+			break
+		}
+		v.reset(OpLOONG64SRAconst)
+		v.AuxInt = int64ToAuxInt(31)
+		v.AddArg(x)
+		return true
+	}
+	// match: (SRA x (MOVVconst [c]))
+	// cond: uint64(c) >=0 && uint64(c) <=31
+	// result: (SRAconst x [c])
+	for {
+		x := v_0
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 0 && uint64(c) <= 31) {
+			break
+		}
+		v.reset(OpLOONG64SRAconst)
+		v.AuxInt = int64ToAuxInt(c)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -6117,6 +6199,83 @@ func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool {
 }
 func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool {
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (SRAVconst [rc] (MOVWreg y))
+	// cond: rc >= 0 && rc <= 31
+	// result: (SRAconst [int64(rc)] y)
+	for {
+		rc := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVWreg {
+			break
+		}
+		y := v_0.Args[0]
+		if !(rc >= 0 && rc <= 31) {
+			break
+		}
+		v.reset(OpLOONG64SRAconst)
+		v.AuxInt = int64ToAuxInt(int64(rc))
+		v.AddArg(y)
+		return true
+	}
+	// match: (SRAVconst <t> [rc] (MOVBreg y))
+	// cond: rc >= 8
+	// result: (SRAVconst [63] (SLLVconst <t> [56] y))
+	for {
+		t := v.Type
+		rc := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVBreg {
+			break
+		}
+		y := v_0.Args[0]
+		if !(rc >= 8) {
+			break
+		}
+		v.reset(OpLOONG64SRAVconst)
+		v.AuxInt = int64ToAuxInt(63)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t)
+		v0.AuxInt = int64ToAuxInt(56)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (SRAVconst <t> [rc] (MOVHreg y))
+	// cond: rc >= 16
+	// result: (SRAVconst [63] (SLLVconst <t> [48] y))
+	for {
+		t := v.Type
+		rc := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVHreg {
+			break
+		}
+		y := v_0.Args[0]
+		if !(rc >= 16) {
+			break
+		}
+		v.reset(OpLOONG64SRAVconst)
+		v.AuxInt = int64ToAuxInt(63)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t)
+		v0.AuxInt = int64ToAuxInt(48)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (SRAVconst <t> [rc] (MOVWreg y))
+	// cond: rc >= 32
+	// result: (SRAconst [31] y)
+	for {
+		rc := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVWreg {
+			break
+		}
+		y := v_0.Args[0]
+		if !(rc >= 32) {
+			break
+		}
+		v.reset(OpLOONG64SRAconst)
+		v.AuxInt = int64ToAuxInt(31)
+		v.AddArg(y)
+		return true
+	}
 	// match: (SRAVconst [c] (MOVVconst [d]))
 	// result: (MOVVconst [d>>uint64(c)])
 	for {
@@ -6131,6 +6290,43 @@ func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueLOONG64_OpLOONG64SRL(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (SRL _ (MOVVconst [c]))
+	// cond: uint64(c)>=32
+	// result: (MOVVconst [0])
+	for {
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 32) {
+			break
+		}
+		v.reset(OpLOONG64MOVVconst)
+		v.AuxInt = int64ToAuxInt(0)
+		return true
+	}
+	// match: (SRL x (MOVVconst [c]))
+	// cond: uint64(c) >=0 && uint64(c) <=31
+	// result: (SRLconst x [c])
+	for {
+		x := v_0
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		if !(uint64(c) >= 0 && uint64(c) <= 31) {
+			break
+		}
+		v.reset(OpLOONG64SRLconst)
+		v.AuxInt = int64ToAuxInt(c)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -6235,6 +6431,23 @@ func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (SRLVconst [rc] (MOVWUreg y))
+	// cond: rc >= 0 && rc <= 31
+	// result: (SRLconst [int64(rc)] y)
+	for {
+		rc := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVWUreg {
+			break
+		}
+		y := v_0.Args[0]
+		if !(rc >= 0 && rc <= 31) {
+			break
+		}
+		v.reset(OpLOONG64SRLconst)
+		v.AuxInt = int64ToAuxInt(int64(rc))
+		v.AddArg(y)
+		return true
+	}
 	// match: (SRLVconst [rc] (MOVWUreg x))
 	// cond: rc >= 32
 	// result: (MOVVconst [0])
@@ -7340,19 +7553,19 @@ func rewriteValueLOONG64_OpLsh32x16(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Lsh32x16 <t> x y)
-	// result: (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
+	// result: (MASKEQZ (SLL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t)
 		v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
 		v1.AddArg(y)
 		v0.AddArg2(x, v1)
 		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
 		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v3.AuxInt = int64ToAuxInt(64)
+		v3.AuxInt = int64ToAuxInt(32)
 		v2.AddArg2(v3, v1)
 		v.AddArg2(v0, v2)
 		return true
@@ -7364,19 +7577,19 @@ func rewriteValueLOONG64_OpLsh32x32(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Lsh32x32 <t> x y)
-	// result: (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
+	// result: (MASKEQZ (SLL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t)
 		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
 		v1.AddArg(y)
 		v0.AddArg2(x, v1)
 		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
 		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v3.AuxInt = int64ToAuxInt(64)
+		v3.AuxInt = int64ToAuxInt(32)
 		v2.AddArg2(v3, v1)
 		v.AddArg2(v0, v2)
 		return true
@@ -7388,17 +7601,17 @@ func rewriteValueLOONG64_OpLsh32x64(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Lsh32x64 <t> x y)
-	// result: (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
+	// result: (MASKEQZ (SLL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t)
 		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
 		v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v2.AuxInt = int64ToAuxInt(64)
+		v2.AuxInt = int64ToAuxInt(32)
 		v1.AddArg2(v2, y)
 		v.AddArg2(v0, v1)
 		return true
@@ -7410,19 +7623,19 @@ func rewriteValueLOONG64_OpLsh32x8(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Lsh32x8 <t> x y)
-	// result: (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
+	// result: (MASKEQZ (SLL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t)
 		v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
 		v1.AddArg(y)
 		v0.AddArg2(x, v1)
 		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
 		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v3.AuxInt = int64ToAuxInt(64)
+		v3.AuxInt = int64ToAuxInt(32)
 		v2.AddArg2(v3, v1)
 		v.AddArg2(v0, v2)
 		return true
@@ -8772,23 +8985,21 @@ func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32Ux16 <t> x y)
-	// result: (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
+	// result: (MASKEQZ (SRL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t)
-		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v1.AddArg(x)
-		v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-		v2.AddArg(y)
-		v0.AddArg2(v1, v2)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v3.AddArg2(v4, v2)
-		v.AddArg2(v0, v3)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t)
+		v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v1.AddArg(y)
+		v0.AddArg2(x, v1)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v3.AuxInt = int64ToAuxInt(32)
+		v2.AddArg2(v3, v1)
+		v.AddArg2(v0, v2)
 		return true
 	}
 }
@@ -8798,23 +9009,21 @@ func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32Ux32 <t> x y)
-	// result: (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
+	// result: (MASKEQZ (SRL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t)
 		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v1.AddArg(x)
-		v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v2.AddArg(y)
-		v0.AddArg2(v1, v2)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v3.AddArg2(v4, v2)
-		v.AddArg2(v0, v3)
+		v1.AddArg(y)
+		v0.AddArg2(x, v1)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v3.AuxInt = int64ToAuxInt(32)
+		v2.AddArg2(v3, v1)
+		v.AddArg2(v0, v2)
 		return true
 	}
 }
@@ -8824,21 +9033,19 @@ func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32Ux64 <t> x y)
-	// result: (MASKEQZ (SRLV <t> (ZeroExt32to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
+	// result: (MASKEQZ (SRL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t)
-		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v1.AddArg(x)
-		v0.AddArg2(v1, y)
-		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, y)
-		v.AddArg2(v0, v2)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v2.AuxInt = int64ToAuxInt(32)
+		v1.AddArg2(v2, y)
+		v.AddArg2(v0, v1)
 		return true
 	}
 }
@@ -8848,23 +9055,21 @@ func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32Ux8 <t> x y)
-	// result: (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
+	// result: (MASKEQZ (SRL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
 		v.reset(OpLOONG64MASKEQZ)
-		v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t)
-		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v1.AddArg(x)
-		v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-		v2.AddArg(y)
-		v0.AddArg2(v1, v2)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v3.AddArg2(v4, v2)
-		v.AddArg2(v0, v3)
+		v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t)
+		v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v1.AddArg(y)
+		v0.AddArg2(x, v1)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v3.AuxInt = int64ToAuxInt(32)
+		v2.AddArg2(v3, v1)
+		v.AddArg2(v0, v2)
 		return true
 	}
 }
@@ -8874,25 +9079,23 @@ func rewriteValueLOONG64_OpRsh32x16(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32x16 <t> x y)
-	// result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
+	// result: (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt16to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpLOONG64SRAV)
-		v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpLOONG64OR, t)
-		v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v5.AuxInt = int64ToAuxInt(63)
-		v3.AddArg2(v4, v5)
-		v2.AddArg(v3)
-		v1.AddArg2(v2, v4)
-		v.AddArg2(v0, v1)
+		v.reset(OpLOONG64SRA)
+		v0 := b.NewValue0(v.Pos, OpLOONG64OR, t)
+		v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v3.AddArg(y)
+		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v4.AuxInt = int64ToAuxInt(31)
+		v2.AddArg2(v3, v4)
+		v1.AddArg(v2)
+		v0.AddArg2(v1, v3)
+		v.AddArg2(x, v0)
 		return true
 	}
 }
@@ -8902,25 +9105,23 @@ func rewriteValueLOONG64_OpRsh32x32(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32x32 <t> x y)
-	// result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
+	// result: (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt32to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpLOONG64SRAV)
-		v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpLOONG64OR, t)
-		v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v5.AuxInt = int64ToAuxInt(63)
-		v3.AddArg2(v4, v5)
-		v2.AddArg(v3)
-		v1.AddArg2(v2, v4)
-		v.AddArg2(v0, v1)
+		v.reset(OpLOONG64SRA)
+		v0 := b.NewValue0(v.Pos, OpLOONG64OR, t)
+		v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+		v3.AddArg(y)
+		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v4.AuxInt = int64ToAuxInt(31)
+		v2.AddArg2(v3, v4)
+		v1.AddArg(v2)
+		v0.AddArg2(v1, v3)
+		v.AddArg2(x, v0)
 		return true
 	}
 }
@@ -8930,23 +9131,21 @@ func rewriteValueLOONG64_OpRsh32x64(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32x64 <t> x y)
-	// result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
+	// result: (SRA x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [31]))) y))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpLOONG64SRAV)
-		v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpLOONG64OR, t)
-		v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v4.AuxInt = int64ToAuxInt(63)
-		v3.AddArg2(y, v4)
-		v2.AddArg(v3)
-		v1.AddArg2(v2, y)
-		v.AddArg2(v0, v1)
+		v.reset(OpLOONG64SRA)
+		v0 := b.NewValue0(v.Pos, OpLOONG64OR, t)
+		v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v3.AuxInt = int64ToAuxInt(31)
+		v2.AddArg2(y, v3)
+		v1.AddArg(v2)
+		v0.AddArg2(v1, y)
+		v.AddArg2(x, v0)
 		return true
 	}
 }
@@ -8956,25 +9155,23 @@ func rewriteValueLOONG64_OpRsh32x8(v *Value) bool {
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (Rsh32x8 <t> x y)
-	// result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
+	// result: (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt8to64 y)))
 	for {
 		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpLOONG64SRAV)
-		v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpLOONG64OR, t)
-		v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
-		v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
-		v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-		v5.AuxInt = int64ToAuxInt(63)
-		v3.AddArg2(v4, v5)
-		v2.AddArg(v3)
-		v1.AddArg2(v2, v4)
-		v.AddArg2(v0, v1)
+		v.reset(OpLOONG64SRA)
+		v0 := b.NewValue0(v.Pos, OpLOONG64OR, t)
+		v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t)
+		v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool)
+		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v3.AddArg(y)
+		v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+		v4.AuxInt = int64ToAuxInt(31)
+		v2.AddArg2(v3, v4)
+		v1.AddArg(v2)
+		v0.AddArg2(v1, v3)
+		v.AddArg2(x, v0)
 		return true
 	}
 }
author	Xiaolin Zhao <zhaoxiaolin@loongson.cn>	2024-11-14 11:35:39 +0800
committer	abner chenc <chenguoqi@loongson.cn>	2025-03-10 17:55:10 -0700
commit	2a772a2fe7db5602a2932c63a0278ed45e8762cc (patch)
tree	601d35b885347b62aae54d0f34cde3f39a5a663d /src/cmd
parent	2abe5ceb1968edb882c8318af674c4133dd0bb75 (diff)
download	go-2a772a2fe7db5602a2932c63a0278ed45e8762cc.tar.xz