diff options
| author | Meng Zhuo <mengzhuo1203@gmail.com> | 2019-09-26 00:00:20 +0800 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2019-09-25 22:03:38 +0000 |
| commit | 78baea836d9f4312972e50e17dcff15ff63d155b (patch) | |
| tree | c286a96de0ebddfb0b24f6b394e5f44e88d995f9 /src | |
| parent | 44e752c38ad507fd2725f58ae58802b39b45f1d8 (diff) | |
| download | go-78baea836d9f4312972e50e17dcff15ff63d155b.tar.xz | |
internal/bytealg: add assembly implementation of Compare/CompareString on mips64x
name old time/op new time/op delta
BytesCompare/1 28.9ns ± 4% 22.8ns ± 0% -21.23% (p=0.000 n=9+10)
BytesCompare/2 34.6ns ± 0% 23.5ns ± 0% -32.01% (p=0.000 n=8+10)
BytesCompare/4 54.6ns ± 0% 41.4ns ± 0% -24.18% (p=0.001 n=8+9)
BytesCompare/8 73.9ns ± 0% 49.7ns ± 0% -32.75% (p=0.002 n=8+10)
BytesCompare/16 113ns ± 0% 23ns ± 0% -79.56% (p=0.000 n=9+10)
BytesCompare/32 190ns ± 0% 26ns ± 0% -86.53% (p=0.000 n=10+10)
BytesCompare/64 345ns ± 0% 44ns ± 0% -87.19% (p=0.000 n=10+10)
BytesCompare/128 654ns ± 0% 52ns ± 0% -91.97% (p=0.000 n=9+8)
BytesCompare/256 1.27µs ± 0% 0.08µs ± 1% -94.10% (p=0.000 n=8+10)
BytesCompare/512 2.51µs ± 0% 0.12µs ± 0% -95.26% (p=0.000 n=9+9)
BytesCompare/1024 4.99µs ± 0% 0.21µs ± 1% -95.84% (p=0.000 n=8+10)
BytesCompare/2048 9.94µs ± 0% 0.38µs ± 0% -96.13% (p=0.000 n=8+10)
CompareBytesEqual 105ns ± 0% 64ns ± 0% -39.05% (p=0.000 n=10+9)
CompareBytesToNil 34.8ns ± 1% 39.5ns ± 3% +13.48% (p=0.000 n=10+10)
CompareBytesEmpty 33.6ns ± 3% 36.6ns ± 0% +8.77% (p=0.000 n=10+10)
CompareBytesIdentical 29.7ns ± 0% 36.6ns ± 0% +23.23% (p=0.000 n=10+10)
CompareBytesSameLength 69.1ns ± 0% 51.1ns ± 0% -26.05% (p=0.000 n=10+10)
CompareBytesDifferentLength 69.8ns ± 0% 51.1ns ± 0% -26.79% (p=0.000 n=10+10)
CompareBytesBigUnaligned 5.15ms ± 0% 2.18ms ± 0% -57.62% (p=0.000 n=9+9)
CompareBytesBig 5.28ms ± 0% 0.28ms ± 0% -94.64% (p=0.000 n=8+10)
CompareBytesBigIdentical 29.7ns ± 0% 36.8ns ± 0% +23.91% (p=0.000 n=8+8)
name old speed new speed delta
CompareBytesBigUnaligned 204MB/s ± 0% 480MB/s ± 0% +135.94% (p=0.000 n=9+9)
CompareBytesBig 198MB/s ± 0% 3703MB/s ± 0% +1765.85% (p=0.000 n=8+10)
CompareBytesBigIdentical 35.3TB/s ± 0% 28.5TB/s ± 0% -19.31% (p=0.000 n=8+8)
Change-Id: I112d9de2324986fd65ed237a86b11856a1c0f4a7
Reviewed-on: https://go-review.googlesource.com/c/go/+/196837
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/internal/bytealg/compare_generic.go | 2 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_mips64x.s | 92 | ||||
| -rw-r--r-- | src/internal/bytealg/compare_native.go | 2 |
3 files changed, 94 insertions, 2 deletions
diff --git a/src/internal/bytealg/compare_generic.go b/src/internal/bytealg/compare_generic.go index 2ac60f3df9..4839df9528 100644 --- a/src/internal/bytealg/compare_generic.go +++ b/src/internal/bytealg/compare_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm +// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le package bytealg diff --git a/src/internal/bytealg/compare_mips64x.s b/src/internal/bytealg/compare_mips64x.s new file mode 100644 index 0000000000..7bed4d24ac --- /dev/null +++ b/src/internal/bytealg/compare_mips64x.s @@ -0,0 +1,92 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips64 mips64le + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Compare(SB),NOSPLIT,$0-56 + MOVV a_base+0(FP), R3 + MOVV b_base+24(FP), R4 + MOVV a_len+8(FP), R1 + MOVV b_len+32(FP), R2 + MOVV $ret+48(FP), R9 + JMP cmpbody<>(SB) + +TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 + MOVV a_base+0(FP), R3 + MOVV b_base+16(FP), R4 + MOVV a_len+8(FP), R1 + MOVV b_len+24(FP), R2 + MOVV $ret+32(FP), R9 + JMP cmpbody<>(SB) + +// On entry: +// R1 length of a +// R2 length of b +// R3 points to the start of a +// R4 points to the start of b +// R9 points to the return value (-1/0/1) +// +// On exit: +// R6, R7, R8, R10, R11, R13, R14 clobbered +TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0 + BEQ R3, R4, samebytes // same start of a and b + + SGTU R1, R2, R7 + BNE R0, R7, r2_lt_r1 + MOVV R1, R10 + JMP entry +r2_lt_r1: + MOVV R2, R10 // R10 is min(R1, R2) +entry: + ADDV R3, R10, R8 // R3 start of a, R8 end of a + BEQ R3, R8, samebytes // length is 0 + + SRLV $4, R10 // R10 is number of chunks + BEQ R0, R10, byte_loop + + // make sure both a and b are aligned. + OR R3, R4, R11 + AND $7, R11 + BNE R0, R11, byte_loop + +chunk16_loop: + BEQ R0, R10, byte_loop + MOVV (R3), R6 + MOVV (R4), R7 + BNE R6, R7, byte_cmp + MOVV 8(R3), R13 + MOVV 8(R4), R14 + ADDV $16, R3 + ADDV $16, R4 + SUBVU $1, R10 + BEQ R13, R14, chunk16_loop + MOVV R13, R6 + MOVV R14, R7 + JMP byte_cmp + +byte_loop: + BEQ R3, R8, samebytes + MOVBU (R3), R6 + ADDVU $1, R3 + MOVBU (R4), R7 + ADDVU $1, R4 + BEQ R6, R7, byte_loop + +byte_cmp: + SGTU R6, R7, R8 // R8 = 1 if (R6 > R7) + BNE R0, R8, ret + MOVV $-1, R8 + JMP ret + +samebytes: + SGTU R1, R2, R6 + SGTU R2, R1, R7 + SUBV R7, R6, R8 + +ret: + MOVV R8, (R9) + RET diff --git a/src/internal/bytealg/compare_native.go b/src/internal/bytealg/compare_native.go index b14aa8c72c..051b2ae079 100644 --- a/src/internal/bytealg/compare_native.go +++ b/src/internal/bytealg/compare_native.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm +// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle mips64 mips64le wasm package bytealg |
