diff options
| author | Keith Randall <khr@golang.org> | 2017-11-04 10:19:53 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2017-11-15 17:35:09 +0000 |
| commit | a025277505d49fac9a5100ae9305020b063657c2 (patch) | |
| tree | e3d69a79234f52bd9432e3d5abaace46d6990046 /src/strings | |
| parent | 0ffe90b50189f04d820c35991858026204dba256 (diff) | |
| download | go-a025277505d49fac9a5100ae9305020b063657c2.tar.xz | |
bytes,strings: in generic Index, use mix of IndexByte and Rabin-Karp
Use IndexByte first, as it allows us to skip lots of bytes quickly.
If IndexByte is generating a lot of false positives, switch over to Rabin-Karp.
Experiments for ppc64le
bytes:
name old time/op new time/op delta
IndexPeriodic/IndexPeriodic2-2 1.12ms ± 0% 0.18ms ± 0% -83.54% (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic4-2 635µs ± 0% 184µs ± 0% -71.06% (p=0.000 n=9+9)
IndexPeriodic/IndexPeriodic8-2 289µs ± 0% 184µs ± 0% -36.51% (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic16-2 133µs ± 0% 183µs ± 0% +37.68% (p=0.000 n=10+9)
IndexPeriodic/IndexPeriodic32-2 68.3µs ± 0% 70.2µs ± 0% +2.76% (p=0.000 n=10+10)
IndexPeriodic/IndexPeriodic64-2 35.8µs ± 0% 36.6µs ± 0% +2.17% (p=0.000 n=8+10)
strings:
name old time/op new time/op delta
IndexPeriodic/IndexPeriodic2-2 184µs ± 0% 184µs ± 0% +0.11% (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic4-2 184µs ± 0% 184µs ± 0% ~ (p=0.886 n=4+4)
IndexPeriodic/IndexPeriodic8-2 184µs ± 0% 184µs ± 0% ~ (p=0.486 n=4+4)
IndexPeriodic/IndexPeriodic16-2 185µs ± 1% 184µs ± 0% ~ (p=0.343 n=4+4)
IndexPeriodic/IndexPeriodic32-2 184µs ± 0% 69µs ± 0% -62.37% (p=0.029 n=4+4)
IndexPeriodic/IndexPeriodic64-2 184µs ± 0% 37µs ± 0% -80.17% (p=0.029 n=4+4)
Fixes #22578
Change-Id: If2a4d8554cb96bfd699b58149d13ac294615f8b8
Reviewed-on: https://go-review.googlesource.com/76070
Reviewed-by: Alberto Donizetti <alb.donizetti@gmail.com>
Diffstat (limited to 'src/strings')
| -rw-r--r-- | src/strings/strings.go | 24 | ||||
| -rw-r--r-- | src/strings/strings_amd64.go | 20 | ||||
| -rw-r--r-- | src/strings/strings_generic.go | 38 | ||||
| -rw-r--r-- | src/strings/strings_s390x.go | 20 | ||||
| -rw-r--r-- | src/strings/strings_test.go | 15 |
5 files changed, 64 insertions, 53 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index 8520f8a732..c66c248c02 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -918,3 +918,27 @@ func EqualFold(s, t string) bool { // One string is empty. Are both? return s == t } + +func indexRabinKarp(s, substr string) int { + // Rabin-Karp search + hashss, pow := hashStr(substr) + n := len(substr) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashss && s[:n] == substr { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashss && s[i-n:i] == substr { + return i - n + } + } + return -1 + +} diff --git a/src/strings/strings_amd64.go b/src/strings/strings_amd64.go index a9c01bbf7f..68a1d0125c 100644 --- a/src/strings/strings_amd64.go +++ b/src/strings/strings_amd64.go @@ -75,25 +75,7 @@ func Index(s, substr string) int { } return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 + return indexRabinKarp(s, substr) } // Count counts the number of non-overlapping instances of substr in s. diff --git a/src/strings/strings_generic.go b/src/strings/strings_generic.go index 5429a74a22..b2af48bec8 100644 --- a/src/strings/strings_generic.go +++ b/src/strings/strings_generic.go @@ -25,22 +25,30 @@ func Index(s, substr string) int { case n > len(s): return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) + c := substr[0] + i := 0 + t := s[:len(s)-n+1] + fails := 0 + for i < len(t) { + if t[i] != c { + o := IndexByte(t[i:], c) + if o < 0 { + return -1 + } + i += o + } + if s[i:i+n] == substr { + return i + } i++ - if h == hashss && s[i-n:i] == substr { - return i - n + fails++ + if fails >= 4+i>>4 && i < len(t) { + // See comment in ../bytes/bytes_generic.go. + j := indexRabinKarp(s[i:], substr) + if j < 0 { + return -1 + } + return i + j } } return -1 diff --git a/src/strings/strings_s390x.go b/src/strings/strings_s390x.go index ccf2da632d..67c8e1700d 100644 --- a/src/strings/strings_s390x.go +++ b/src/strings/strings_s390x.go @@ -76,25 +76,7 @@ func Index(s, substr string) int { } return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 + return indexRabinKarp(s, substr) } // Count counts the number of non-overlapping instances of substr in s. diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 289dd92d51..d8fcb62a87 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -125,6 +125,9 @@ var indexTests = []IndexTest{ {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1}, {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1}, {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65}, + // test fallback to Rabin-Karp. + {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, + {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, } var lastIndexTests = []IndexTest{ @@ -1641,3 +1644,15 @@ func BenchmarkTrimASCII(b *testing.B) { } } } + +func BenchmarkIndexPeriodic(b *testing.B) { + key := "aa" + for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { + b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) { + s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip) + for i := 0; i < b.N; i++ { + Index(s, key) + } + }) + } +} |
