From ef3e1dae2f151ddca4ba50ed8b9a98381d7e9158 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Thu, 26 Sep 2024 14:17:17 +0800 Subject: cmd/compile: optimize loong64 with register indexed load/store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit goos: linux goarch: loong64 pkg: test/bench/go1 cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | BinaryTree17 7.766 ± 1% 7.640 ± 2% -1.62% (p=0.000 n=20) Fannkuch11 2.649 ± 0% 2.358 ± 0% -10.96% (p=0.000 n=20) FmtFprintfEmpty 35.89n ± 0% 35.87n ± 0% -0.06% (p=0.000 n=20) FmtFprintfString 59.44n ± 0% 57.25n ± 2% -3.68% (p=0.000 n=20) FmtFprintfInt 62.07n ± 0% 60.04n ± 0% -3.27% (p=0.000 n=20) FmtFprintfIntInt 97.90n ± 0% 97.26n ± 0% -0.65% (p=0.000 n=20) FmtFprintfPrefixedInt 116.7n ± 0% 119.2n ± 0% +2.14% (p=0.000 n=20) FmtFprintfFloat 204.5n ± 0% 201.9n ± 0% -1.30% (p=0.000 n=20) FmtManyArgs 455.9n ± 0% 466.8n ± 0% +2.39% (p=0.000 n=20) GobDecode 7.458m ± 1% 7.138m ± 1% -4.28% (p=0.000 n=20) GobEncode 8.573m ± 1% 8.473m ± 1% ~ (p=0.091 n=20) Gzip 280.2m ± 0% 284.9m ± 0% +1.67% (p=0.000 n=20) Gunzip 32.68m ± 0% 32.67m ± 0% ~ (p=0.211 n=20) HTTPClientServer 54.22µ ± 0% 53.24µ ± 0% -1.80% (p=0.000 n=20) JSONEncode 9.427m ± 1% 9.152m ± 0% -2.92% (p=0.000 n=20) JSONDecode 47.08m ± 1% 46.85m ± 1% -0.49% (p=0.007 n=20) Mandelbrot200 4.601m ± 0% 4.605m ± 0% +0.08% (p=0.000 n=20) GoParse 4.776m ± 0% 4.655m ± 1% -2.52% (p=0.000 n=20) RegexpMatchEasy0_32 59.77n ± 0% 57.59n ± 0% -3.66% (p=0.000 n=20) RegexpMatchEasy0_1K 458.1n ± 0% 458.8n ± 0% +0.15% (p=0.000 n=20) RegexpMatchEasy1_32 59.36n ± 0% 59.24n ± 0% -0.20% (p=0.000 n=20) RegexpMatchEasy1_1K 557.7n ± 0% 560.2n ± 0% +0.46% (p=0.000 n=20) RegexpMatchMedium_32 803.1n ± 0% 772.8n ± 0% -3.77% (p=0.000 n=20) RegexpMatchMedium_1K 27.29µ ± 0% 25.88µ ± 0% -5.18% (p=0.000 n=20) RegexpMatchHard_32 1.385µ ± 0% 1.304µ ± 0% -5.85% (p=0.000 n=20) RegexpMatchHard_1K 40.92µ ± 0% 39.58µ ± 0% -3.27% (p=0.000 n=20) Revcomp 474.3m ± 0% 410.0m ± 0% -13.56% (p=0.000 n=20) Template 78.16m ± 0% 76.32m ± 1% -2.36% (p=0.000 n=20) TimeParse 271.8n ± 0% 272.1n ± 0% +0.11% (p=0.000 n=20) TimeFormat 292.3n ± 0% 294.8n ± 0% +0.86% (p=0.000 n=20) geomean 51.98µ 50.82µ -2.22% Change-Id: Ia78f1ddee8f1d9ec7192a4b8d2a4ec6058679956 Reviewed-on: https://go-review.googlesource.com/c/go/+/615918 Reviewed-by: Qiqi Huang Reviewed-by: Dmitri Shuralyov Reviewed-by: Michael Knyszek LUCI-TryBot-Result: Go LUCI Reviewed-by: abner chenc --- test/codegen/floats.go | 2 ++ test/codegen/memcombine.go | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'test/codegen') diff --git a/test/codegen/floats.go b/test/codegen/floats.go index d2cf6f2b00..a77843d0e7 100644 --- a/test/codegen/floats.go +++ b/test/codegen/floats.go @@ -54,11 +54,13 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { func indexLoad(b0 []float32, b1 float32, idx int) float32 { // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+` + // loong64:`MOVF\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+` return b0[idx] * b1 } func indexStore(b0 []float64, b1 float64, idx int) { // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)` + // loong64:`MOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)` b0[idx] = b1 } diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index ff67a442e4..ed319d17db 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -19,6 +19,7 @@ func load_le64(b []byte) uint64 { // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR` // s390x:`MOVDBR\s\(.*\),` // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]` + // loong64:`MOVBU\s\(R[0-9]+\),` // ppc64le:`MOVD\s`,-`MOV[BHW]Z` // ppc64:`MOVDBR\s`,-`MOV[BHW]Z` return binary.LittleEndian.Uint64(b) @@ -28,6 +29,7 @@ func load_le64_idx(b []byte, idx int) uint64 { // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),` // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]` + // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` // ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s` return binary.LittleEndian.Uint64(b[idx:]) @@ -38,6 +40,7 @@ func load_le32(b []byte) uint32 { // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\),` // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]` + // loong64:`MOVBU\s\(R[0-9]+\),` // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s` return binary.LittleEndian.Uint32(b) @@ -48,6 +51,7 @@ func load_le32_idx(b []byte, idx int) uint32 { // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),` // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]` + // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s' return binary.LittleEndian.Uint32(b[idx:]) @@ -57,6 +61,7 @@ func load_le16(b []byte) uint16 { // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` // ppc64le:`MOVHZ\s`,-`MOVBZ` // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB` + // loong64:`MOVBU\s\(R[0-9]+\),` // s390x:`MOVHBR\s\(.*\),` // ppc64:`MOVHBR\s`,-`MOVBZ` return binary.LittleEndian.Uint16(b) @@ -67,6 +72,7 @@ func load_le16_idx(b []byte, idx int) uint16 { // ppc64le:`MOVHZ\s`,-`MOVBZ` // ppc64:`MOVHBR\s`,-`MOVBZ` // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` + // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` // s390x:`MOVHBR\s\(.*\)\(.*\*1\),` return binary.LittleEndian.Uint16(b[idx:]) } -- cgit v1.3