aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg
diff options
context:
space:
mode:
authorMeng Zhuo <mengzhuo1203@gmail.com>2019-10-02 16:06:25 +0800
committerCherry Zhang <cherryyz@google.com>2019-10-03 20:32:21 +0000
commita1b1ba7daf4d612cb08e98567d809a6f202498ce (patch)
treed3f4315d4635db8863e5b04bd54dc0f0d93e846b /src/internal/bytealg
parent2bf7a925712dca5646f9215cda17c5b61eea14ce (diff)
downloadgo-a1b1ba7daf4d612cb08e98567d809a6f202498ce.tar.xz
internal/bytealg: (re)adding mips64x compare implementation
The original CL of mips64x compare function has been reverted due to wrong implement for little endian. Original CL: https://go-review.googlesource.com/c/go/+/196837 name old time/op new time/op delta BytesCompare/1 28.9ns ± 4% 22.1ns ± 0% -23.60% (p=0.000 n=9+8) BytesCompare/2 34.6ns ± 0% 23.1ns ± 0% -33.25% (p=0.000 n=8+10) BytesCompare/4 54.6ns ± 0% 40.8ns ± 0% -25.27% (p=0.000 n=8+8) BytesCompare/8 73.9ns ± 0% 49.1ns ± 0% -33.56% (p=0.000 n=8+8) BytesCompare/16 113ns ± 0% 24ns ± 0% -79.20% (p=0.000 n=9+9) BytesCompare/32 190ns ± 0% 26ns ± 0% -86.53% (p=0.000 n=10+10) BytesCompare/64 345ns ± 0% 44ns ± 0% -87.19% (p=0.000 n=10+8) BytesCompare/128 654ns ± 0% 52ns ± 0% -91.97% (p=0.000 n=9+8) BytesCompare/256 1.27µs ± 0% 0.07µs ± 0% -94.14% (p=0.001 n=8+9) BytesCompare/512 2.51µs ± 0% 0.12µs ± 0% -95.26% (p=0.000 n=9+10) BytesCompare/1024 4.99µs ± 0% 0.21µs ± 0% -95.85% (p=0.000 n=8+10) BytesCompare/2048 9.94µs ± 0% 0.38µs ± 0% -96.14% (p=0.000 n=8+8) CompareBytesEqual 105ns ± 0% 64ns ± 0% -39.43% (p=0.000 n=10+9) CompareBytesToNil 34.8ns ± 1% 38.6ns ± 3% +11.01% (p=0.000 n=10+10) CompareBytesEmpty 33.6ns ± 3% 36.6ns ± 0% +8.77% (p=0.000 n=10+8) CompareBytesIdentical 29.7ns ± 0% 40.5ns ± 1% +36.45% (p=0.000 n=10+8) CompareBytesSameLength 69.1ns ± 0% 51.8ns ± 0% -25.04% (p=0.000 n=10+9) CompareBytesDifferentLength 69.8ns ± 0% 52.5ns ± 0% -24.79% (p=0.000 n=10+8) CompareBytesBigUnaligned 5.15ms ± 0% 2.19ms ± 0% -57.59% (p=0.000 n=9+9) CompareBytesBig 5.28ms ± 0% 0.28ms ± 0% -94.64% (p=0.000 n=8+8) CompareBytesBigIdentical 29.7ns ± 0% 36.9ns ± 2% +24.11% (p=0.000 n=8+10) name old speed new speed delta CompareBytesBigUnaligned 204MB/s ± 0% 480MB/s ± 0% +135.77% (p=0.000 n=9+9) CompareBytesBig 198MB/s ± 0% 3704MB/s ± 0% +1765.97% (p=0.000 n=8+8) CompareBytesBigIdentical 35.3TB/s ± 0% 28.4TB/s ± 2% -19.44% (p=0.000 n=8+10) Fixes #34549 Change-Id: I2ef29f13cdd4229745ac2d018bb53c76f2ff1209 Reviewed-on: https://go-review.googlesource.com/c/go/+/197557 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/internal/bytealg')
-rw-r--r--src/internal/bytealg/compare_generic.go2
-rw-r--r--src/internal/bytealg/compare_mips64x.s88
-rw-r--r--src/internal/bytealg/compare_native.go2
3 files changed, 90 insertions, 2 deletions
diff --git a/src/internal/bytealg/compare_generic.go b/src/internal/bytealg/compare_generic.go
index 2ac60f3df9..4839df9528 100644
--- a/src/internal/bytealg/compare_generic.go
+++ b/src/internal/bytealg/compare_generic.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm
+// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
package bytealg
diff --git a/src/internal/bytealg/compare_mips64x.s b/src/internal/bytealg/compare_mips64x.s
new file mode 100644
index 0000000000..4f05fceaca
--- /dev/null
+++ b/src/internal/bytealg/compare_mips64x.s
@@ -0,0 +1,88 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-56
+ MOVV a_base+0(FP), R3
+ MOVV b_base+24(FP), R4
+ MOVV a_len+8(FP), R1
+ MOVV b_len+32(FP), R2
+ MOVV $ret+48(FP), R9
+ JMP cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
+ MOVV a_base+0(FP), R3
+ MOVV b_base+16(FP), R4
+ MOVV a_len+8(FP), R1
+ MOVV b_len+24(FP), R2
+ MOVV $ret+32(FP), R9
+ JMP cmpbody<>(SB)
+
+// On entry:
+// R1 length of a
+// R2 length of b
+// R3 points to the start of a
+// R4 points to the start of b
+// R9 points to the return value (-1/0/1)
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
+ BEQ R3, R4, samebytes // same start of a and b
+
+ SGTU R1, R2, R7
+ BNE R0, R7, r2_lt_r1
+ MOVV R1, R10
+ JMP entry
+r2_lt_r1:
+ MOVV R2, R10 // R10 is min(R1, R2)
+entry:
+ ADDV R3, R10, R8 // R3 start of a, R8 end of a
+ BEQ R3, R8, samebytes // length is 0
+
+ SRLV $4, R10 // R10 is number of chunks
+ BEQ R0, R10, byte_loop
+
+ // make sure both a and b are aligned.
+ OR R3, R4, R11
+ AND $7, R11
+ BNE R0, R11, byte_loop
+
+chunk16_loop:
+ BEQ R0, R10, byte_loop
+ MOVV (R3), R6
+ MOVV (R4), R7
+ BNE R6, R7, byte_loop
+ MOVV 8(R3), R13
+ MOVV 8(R4), R14
+ ADDV $16, R3
+ ADDV $16, R4
+ SUBVU $1, R10
+ BEQ R13, R14, chunk16_loop
+ SUBV $8, R3
+ SUBV $8, R4
+
+byte_loop:
+ BEQ R3, R8, samebytes
+ MOVBU (R3), R6
+ ADDVU $1, R3
+ MOVBU (R4), R7
+ ADDVU $1, R4
+ BEQ R6, R7, byte_loop
+
+byte_cmp:
+ SGTU R6, R7, R8 // R8 = 1 if (R6 > R7)
+ BNE R0, R8, ret
+ MOVV $-1, R8
+ JMP ret
+
+samebytes:
+ SGTU R1, R2, R6
+ SGTU R2, R1, R7
+ SUBV R7, R6, R8
+
+ret:
+ MOVV R8, (R9)
+ RET
diff --git a/src/internal/bytealg/compare_native.go b/src/internal/bytealg/compare_native.go
index b14aa8c72c..95486e8542 100644
--- a/src/internal/bytealg/compare_native.go
+++ b/src/internal/bytealg/compare_native.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm
+// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm mips64 mips64le
package bytealg