From 11cdbab9d4f3e4f0ce690d595933c72df54fad33 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Wed, 23 Sep 2020 03:58:52 -0700 Subject: bytes, internal/bytealg: fix incorrect IndexString usage The IndexString implementation in the bytealg package requires that the string passed into it be in the range '2 <= len(s) <= MaxLen' where MaxLen may be any value (including 0). CL 156998 added calls to bytealg.IndexString where MaxLen was not first checked. This led to an illegal instruction on s390x with the vector facility disabled. This CL guards the calls to bytealg.IndexString with a MaxLen check. If the check fails then the code now falls back to the pre CL 156998 implementation (a loop over the runes in the string). Since the MaxLen check is now in place the generic implementation is no longer called so I have returned it to its original unimplemented state. In future we may want to drop MaxLen to prevent this kind of confusion. Fixes #41552. Change-Id: Ibeb3f08720444a05c08d719ed97f6cef2423bbe9 Reviewed-on: https://go-review.googlesource.com/c/go/+/256717 Run-TryBot: Michael Munday TryBot-Result: Go Bot Trust: Michael Munday Reviewed-by: Keith Randall --- src/internal/bytealg/index_generic.go | 38 ++--------------------------------- 1 file changed, 2 insertions(+), 36 deletions(-) (limited to 'src/internal/bytealg') diff --git a/src/internal/bytealg/index_generic.go b/src/internal/bytealg/index_generic.go index 83345f1013..98e859f925 100644 --- a/src/internal/bytealg/index_generic.go +++ b/src/internal/bytealg/index_generic.go @@ -16,42 +16,8 @@ func Index(a, b []byte) int { // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a. // Requires 2 <= len(b) <= MaxLen. -func IndexString(s, substr string) int { - // This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny - // call bytealg.IndexString. Some platforms have an optimized assembly version of this function. - // This implementation is used for those that do not. Although the pure Go implementation here - // works for the case of len(b) > MaxLen, we do not require that its assembly implementation also - // supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the - // case of len(b) > MaxLen. - n := len(substr) - c0 := substr[0] - c1 := substr[1] - i := 0 - t := len(s) - n + 1 - fails := 0 - for i < t { - if s[i] != c0 { - o := IndexByteString(s[i:t], c0) - if o < 0 { - return -1 - } - i += o - } - if s[i+1] == c1 && s[i:i+n] == substr { - return i - } - i++ - fails++ - if fails >= 4+i>>4 && i < t { - // See comment in src/bytes/bytes.go. - j := IndexRabinKarp(s[i:], substr) - if j < 0 { - return -1 - } - return i + j - } - } - return -1 +func IndexString(a, b string) int { + panic("unimplemented") } // Cutover reports the number of failures of IndexByte we should tolerate -- cgit v1.3 From e69f6e839315b1df4e6273c38ae1a49e340b8a91 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 12 Oct 2020 11:19:41 +0200 Subject: internal/bytealg: fix typo in IndexRabinKarp{,Bytes} godoc Change-Id: I09ba19e19b195e345a0fe29d542e0d86529b0d31 Reviewed-on: https://go-review.googlesource.com/c/go/+/261359 Trust: Tobias Klauser Run-TryBot: Tobias Klauser TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- src/internal/bytealg/bytealg.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/internal/bytealg') diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go index 6fd9308369..b30c234436 100644 --- a/src/internal/bytealg/bytealg.go +++ b/src/internal/bytealg/bytealg.go @@ -99,7 +99,7 @@ func HashStrRev(sep string) (uint32, uint32) { } // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the -// first occurence of substr in s, or -1 if not present. +// first occurrence of substr in s, or -1 if not present. func IndexRabinKarpBytes(s, sep []byte) int { // Rabin-Karp search hashsep, pow := HashStrBytes(sep) @@ -124,7 +124,7 @@ func IndexRabinKarpBytes(s, sep []byte) int { } // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the -// first occurence of substr in s, or -1 if not present. +// first occurrence of substr in s, or -1 if not present. func IndexRabinKarp(s, substr string) int { // Rabin-Karp search hashss, pow := HashStr(substr) -- cgit v1.3 From 19f6422e005ed3f4b1a9f6850d382519673a3c87 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 19 Oct 2020 11:09:10 +0200 Subject: internal/bytealg: add assembly implementation of Count/CountString for riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simple single-byte loop count for now, to be further improved in future CLs. Benchmark on linux/riscv64 (HiFive Unleashed): name old time/op new time/op delta CountSingle/10-4 190ns ± 1% 145ns ± 1% -23.66% (p=0.000 n=10+9) CountSingle/32-4 422ns ± 1% 268ns ± 0% -36.43% (p=0.000 n=10+7) CountSingle/4K-4 43.3µs ± 0% 23.8µs ± 0% -45.09% (p=0.000 n=8+10) CountSingle/4M-4 54.2ms ± 1% 33.3ms ± 1% -38.48% (p=0.000 n=10+10) CountSingle/64M-4 1.52s ± 1% 1.20s ± 1% -21.20% (p=0.000 n=9+9) name old speed new speed delta CountSingle/10-4 52.7MB/s ± 1% 69.1MB/s ± 1% +31.03% (p=0.000 n=10+9) CountSingle/32-4 75.9MB/s ± 1% 119.5MB/s ± 0% +57.34% (p=0.000 n=10+8) CountSingle/4K-4 94.6MB/s ± 0% 172.2MB/s ± 0% +82.10% (p=0.000 n=8+10) CountSingle/4M-4 77.4MB/s ± 1% 125.8MB/s ± 1% +62.54% (p=0.000 n=10+10) CountSingle/64M-4 44.2MB/s ± 1% 56.1MB/s ± 1% +26.91% (p=0.000 n=9+9) Change-Id: I2a6bd50d22d5f598517bb3c5a50066c54280cac5 Reviewed-on: https://go-review.googlesource.com/c/go/+/263541 Trust: Tobias Klauser Run-TryBot: Tobias Klauser TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Joel Sing --- src/internal/bytealg/count_generic.go | 2 +- src/internal/bytealg/count_native.go | 2 +- src/internal/bytealg/count_riscv64.s | 44 +++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 src/internal/bytealg/count_riscv64.s (limited to 'src/internal/bytealg') diff --git a/src/internal/bytealg/count_generic.go b/src/internal/bytealg/count_generic.go index 7cc1d50312..5575e81ab8 100644 --- a/src/internal/bytealg/count_generic.go +++ b/src/internal/bytealg/count_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!arm,!arm64,!ppc64le,!ppc64,!s390x +// +build !amd64,!arm,!arm64,!ppc64le,!ppc64,!riscv64,!s390x package bytealg diff --git a/src/internal/bytealg/count_native.go b/src/internal/bytealg/count_native.go index 0448fca9e8..b1ff1d265a 100644 --- a/src/internal/bytealg/count_native.go +++ b/src/internal/bytealg/count_native.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm arm64 ppc64le ppc64 s390x +// +build amd64 arm arm64 ppc64le ppc64 riscv64 s390x package bytealg diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s new file mode 100644 index 0000000000..3f4eb23286 --- /dev/null +++ b/src/internal/bytealg/count_riscv64.s @@ -0,0 +1,44 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·Count(SB),NOSPLIT,$0-40 + MOV b_base+0(FP), A1 + MOV b_len+8(FP), A2 + MOVBU c+24(FP), A3 // byte to count + MOV ZERO, A4 // count + ADD A1, A2 // end + +loop: + BEQ A1, A2, done + MOVBU (A1), A5 + ADD $1, A1 + BNE A3, A5, loop + ADD $1, A4 + JMP loop + +done: + MOV A4, ret+32(FP) + RET + +TEXT ·CountString(SB),NOSPLIT,$0-32 + MOV s_base+0(FP), A1 + MOV s_len+8(FP), A2 + MOVBU c+16(FP), A3 // byte to count + MOV ZERO, A4 // count + ADD A1, A2 // end + +loop: + BEQ A1, A2, done + MOVBU (A1), A5 + ADD $1, A1 + BNE A3, A5, loop + ADD $1, A4 + JMP loop + +done: + MOV A4, ret+24(FP) + RET -- cgit v1.3 From 4f597abe77338588011a5b91ffefb0f7e11aa868 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Thu, 22 Oct 2020 14:03:11 +0800 Subject: internal/bytealg: improve mips64x equal on large size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta Equal/0 9.94ns ± 4% 9.12ns ± 5% -8.26% (p=0.000 n=10+10) Equal/1 24.5ns ± 0% 27.2ns ± 1% +11.22% (p=0.000 n=9+10) Equal/6 28.1ns ± 0% 32.1ns ± 1% +14.20% (p=0.000 n=8+10) Equal/9 37.1ns ± 0% 37.8ns ± 1% +1.95% (p=0.000 n=8+9) Equal/15 47.3ns ± 0% 44.3ns ± 0% -6.34% (p=0.000 n=9+10) Equal/16 42.9ns ± 0% 24.6ns ± 0% -42.66% (p=0.000 n=10+7) Equal/20 44.3ns ± 0% 57.4ns ± 0% +29.57% (p=0.000 n=9+10) Equal/32 63.2ns ± 0% 35.8ns ± 0% -43.35% (p=0.000 n=10+10) Equal/4K 6.49µs ± 0% 0.50µs ± 0% -92.27% (p=0.000 n=10+8) Equal/4M 6.70ms ± 0% 0.48ms ± 0% -92.78% (p=0.000 n=8+10) Equal/64M 110ms ± 0% 8ms ± 0% -92.65% (p=0.000 n=9+9) CompareBytesEqual 36.6ns ± 0% 35.9ns ± 0% -1.83% (p=0.000 n=10+9) name old speed new speed delta Equal/1 40.8MB/s ± 0% 36.7MB/s ± 0% -10.16% (p=0.000 n=10+10) Equal/6 213MB/s ± 0% 187MB/s ± 1% -12.32% (p=0.000 n=10+10) Equal/9 243MB/s ± 0% 238MB/s ± 1% -1.94% (p=0.000 n=9+10) Equal/15 317MB/s ± 0% 339MB/s ± 0% +6.86% (p=0.000 n=9+9) Equal/16 373MB/s ± 0% 651MB/s ± 0% +74.70% (p=0.000 n=8+10) Equal/20 452MB/s ± 0% 348MB/s ± 0% -22.90% (p=0.000 n=8+10) Equal/32 506MB/s ± 0% 893MB/s ± 0% +76.53% (p=0.000 n=10+9) Equal/4K 631MB/s ± 0% 8166MB/s ± 0% +1194.73% (p=0.000 n=10+10) Equal/4M 626MB/s ± 0% 8673MB/s ± 0% +1284.94% (p=0.000 n=8+10) Equal/64M 608MB/s ± 0% 8277MB/s ± 0% +1260.83% (p=0.000 n=9+9) Change-Id: I1cd14ade16390a5097a8d4e9721d5e822fa6218f Reviewed-on: https://go-review.googlesource.com/c/go/+/199597 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Meng Zhuo --- src/internal/bytealg/equal_mips64x.s | 72 ++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) (limited to 'src/internal/bytealg') diff --git a/src/internal/bytealg/equal_mips64x.s b/src/internal/bytealg/equal_mips64x.s index 58dc4303b4..641e3ff06c 100644 --- a/src/internal/bytealg/equal_mips64x.s +++ b/src/internal/bytealg/equal_mips64x.s @@ -16,18 +16,82 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 BEQ R1, R2, eq MOVV size+16(FP), R3 ADDV R1, R3, R4 -loop: - BNE R1, R4, test + + // chunk size is 16 + SGTU $16, R3, R8 + BEQ R0, R8, chunk_entry + +byte_loop: + BNE R1, R4, byte_test MOVV $1, R1 MOVB R1, ret+24(FP) RET -test: +byte_test: MOVBU (R1), R6 ADDV $1, R1 MOVBU (R2), R7 ADDV $1, R2 - BEQ R6, R7, loop + BEQ R6, R7, byte_loop + JMP not_eq + +chunk_entry: + // make sure both a and b are aligned + OR R1, R2, R9 + AND $0x7, R9 + BNE R0, R9, byte_loop + JMP chunk_loop_1 + +chunk_loop: + // chunk size is 16 + SGTU $16, R3, R8 + BNE R0, R8, chunk_tail_8 +chunk_loop_1: + MOVV (R1), R6 + MOVV (R2), R7 + BNE R6, R7, not_eq + MOVV 8(R1), R12 + MOVV 8(R2), R13 + ADDV $16, R1 + ADDV $16, R2 + SUBV $16, R3 + BEQ R12, R13, chunk_loop + JMP not_eq + +chunk_tail_8: + AND $8, R3, R14 + BEQ R0, R14, chunk_tail_4 + MOVV (R1), R6 + MOVV (R2), R7 + BNE R6, R7, not_eq + ADDV $8, R1 + ADDV $8, R2 + +chunk_tail_4: + AND $4, R3, R14 + BEQ R0, R14, chunk_tail_2 + MOVWU (R1), R6 + MOVWU (R2), R7 + BNE R6, R7, not_eq + ADDV $4, R1 + ADDV $4, R2 + +chunk_tail_2: + AND $2, R3, R14 + BEQ R0, R14, chunk_tail_1 + MOVHU (R1), R6 + MOVHU (R2), R7 + BNE R6, R7, not_eq + ADDV $2, R1 + ADDV $2, R2 + +chunk_tail_1: + AND $1, R3, R14 + BEQ R0, R14, eq + MOVBU (R1), R6 + MOVBU (R2), R7 + BEQ R6, R7, eq +not_eq: MOVB R0, ret+24(FP) RET eq: -- cgit v1.3