aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg/bytealg.go
diff options
context:
space:
mode:
authorerifan01 <eric.fang@arm.com>2019-11-21 14:38:25 +0800
committerIan Lance Taylor <iant@golang.org>2020-03-04 17:36:05 +0000
commit18a6fd44bb9de9399dba86a4c1ae1e56f967bcda (patch)
tree52d4c6a7c8d9cbaff8f7717971305d7d88378a63 /src/internal/bytealg/bytealg.go
parentcec08794ef325e84f141e1a7b4deca0bedaeab34 (diff)
downloadgo-18a6fd44bb9de9399dba86a4c1ae1e56f967bcda.tar.xz
bytes, strings: moves indexRabinKarp function to internal/bytealg
In order to facilitate optimization of IndexAny and LastIndexAny, this patch moves three Rabin-Karp related functions indexRabinKarp, hashStr and hashStrRev in strings package to initernal/bytealg. There are also three functions in the bytes package with the same names and functions but different parameter types. To highlight this, this patch also moves them to internal/bytealg and gives them slightly different names. Related benchmark changes on amd64 and arm64: name old time/op new time/op delta pkg:strings goos:linux goarch:amd64 Index-16 14.0ns ± 1% 14.1ns ± 2% ~ (p=0.738 n=5+5) LastIndex-16 15.5ns ± 1% 15.7ns ± 4% ~ (p=0.897 n=5+5) pkg:bytes goos:linux goarch:amd64 Index/10-16 26.5ns ± 1% 26.5ns ± 0% ~ (p=0.873 n=5+5) Index/32-16 26.2ns ± 0% 25.7ns ± 0% -1.68% (p=0.008 n=5+5) Index/4K-16 5.12µs ± 4% 5.14µs ± 2% ~ (p=0.841 n=5+5) Index/4M-16 5.44ms ± 3% 5.34ms ± 2% ~ (p=0.056 n=5+5) Index/64M-16 85.8ms ± 3% 84.6ms ± 0% -1.37% (p=0.016 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:amd64 Index/10-16 377MB/s ± 1% 377MB/s ± 0% ~ (p=1.000 n=5+5) Index/32-16 1.22GB/s ± 1% 1.24GB/s ± 0% +1.66% (p=0.008 n=5+5) Index/4K-16 800MB/s ± 4% 797MB/s ± 2% ~ (p=0.841 n=5+5) Index/4M-16 771MB/s ± 3% 786MB/s ± 2% ~ (p=0.056 n=5+5) Index/64M-16 783MB/s ± 3% 793MB/s ± 0% +1.36% (p=0.016 n=5+5) name old time/op new time/op delta pkg:strings goos:linux goarch:arm64 Index-8 22.6ns ± 0% 22.5ns ± 0% ~ (p=0.167 n=5+5) LastIndex-8 17.5ns ± 0% 17.5ns ± 0% ~ (all equal) pkg:bytes goos:linux goarch:arm64 Index/10-8 25.0ns ± 0% 25.0ns ± 0% ~ (all equal) Index/32-8 160ns ± 0% 160ns ± 0% ~ (all equal) Index/4K-8 6.26µs ± 0% 6.26µs ± 0% ~ (p=0.167 n=5+5) Index/4M-8 6.30ms ± 0% 6.31ms ± 0% ~ (p=1.000 n=5+5) Index/64M-8 101ms ± 0% 101ms ± 0% ~ (p=0.690 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:arm64 Index/10-8 399MB/s ± 0% 400MB/s ± 0% +0.08% (p=0.008 n=5+5) Index/32-8 200MB/s ± 0% 200MB/s ± 0% ~ (p=0.127 n=4+5) Index/4K-8 654MB/s ± 0% 654MB/s ± 0% +0.01% (p=0.016 n=5+5) Index/4M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.833 n=5+5) Index/64M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.913 n=5+5) Change-Id: Icce3bc162bb8613ac36dc963a46c51f8e82ab842 Reviewed-on: https://go-review.googlesource.com/c/go/+/208638 Run-TryBot: eric fang <eric.fang@arm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/internal/bytealg/bytealg.go')
-rw-r--r--src/internal/bytealg/bytealg.go125
1 files changed, 125 insertions, 0 deletions
diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go
index 9ecd8eb004..4c90cd3671 100644
--- a/src/internal/bytealg/bytealg.go
+++ b/src/internal/bytealg/bytealg.go
@@ -21,3 +21,128 @@ const (
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
var MaxLen int
+
+// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
+// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
+// three of them without causing allocation?
+
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
+const PrimeRK = 16777619
+
+// HashStrBytes returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStrBytes(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStr(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStrRevBytes returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRevBytes(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRev(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarpBytes(s, sep []byte) int {
+ // Rabin-Karp search
+ hashsep, pow := HashStrBytes(sep)
+ n := len(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*PrimeRK + uint32(s[i])
+ }
+ if h == hashsep && Equal(s[:n], sep) {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= PrimeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && Equal(s[i-n:i], sep) {
+ return i - n
+ }
+ }
+ return -1
+}
+
+// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarp(s, substr string) int {
+ // Rabin-Karp search
+ hashss, pow := HashStr(substr)
+ n := len(substr)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*PrimeRK + uint32(s[i])
+ }
+ if h == hashss && s[:n] == substr {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= PrimeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashss && s[i-n:i] == substr {
+ return i - n
+ }
+ }
+ return -1
+}