aboutsummaryrefslogtreecommitdiff
path: root/src/bytes/bytes_generic.go
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2017-11-04 10:19:53 -0700
committerKeith Randall <khr@golang.org>2017-11-15 17:35:09 +0000
commita025277505d49fac9a5100ae9305020b063657c2 (patch)
treee3d69a79234f52bd9432e3d5abaace46d6990046 /src/bytes/bytes_generic.go
parent0ffe90b50189f04d820c35991858026204dba256 (diff)
downloadgo-a025277505d49fac9a5100ae9305020b063657c2.tar.xz
bytes,strings: in generic Index, use mix of IndexByte and Rabin-Karp
Use IndexByte first, as it allows us to skip lots of bytes quickly. If IndexByte is generating a lot of false positives, switch over to Rabin-Karp. Experiments for ppc64le bytes: name old time/op new time/op delta IndexPeriodic/IndexPeriodic2-2 1.12ms ± 0% 0.18ms ± 0% -83.54% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic4-2 635µs ± 0% 184µs ± 0% -71.06% (p=0.000 n=9+9) IndexPeriodic/IndexPeriodic8-2 289µs ± 0% 184µs ± 0% -36.51% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic16-2 133µs ± 0% 183µs ± 0% +37.68% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic32-2 68.3µs ± 0% 70.2µs ± 0% +2.76% (p=0.000 n=10+10) IndexPeriodic/IndexPeriodic64-2 35.8µs ± 0% 36.6µs ± 0% +2.17% (p=0.000 n=8+10) strings: name old time/op new time/op delta IndexPeriodic/IndexPeriodic2-2 184µs ± 0% 184µs ± 0% +0.11% (p=0.029 n=4+4) IndexPeriodic/IndexPeriodic4-2 184µs ± 0% 184µs ± 0% ~ (p=0.886 n=4+4) IndexPeriodic/IndexPeriodic8-2 184µs ± 0% 184µs ± 0% ~ (p=0.486 n=4+4) IndexPeriodic/IndexPeriodic16-2 185µs ± 1% 184µs ± 0% ~ (p=0.343 n=4+4) IndexPeriodic/IndexPeriodic32-2 184µs ± 0% 69µs ± 0% -62.37% (p=0.029 n=4+4) IndexPeriodic/IndexPeriodic64-2 184µs ± 0% 37µs ± 0% -80.17% (p=0.029 n=4+4) Fixes #22578 Change-Id: If2a4d8554cb96bfd699b58149d13ac294615f8b8 Reviewed-on: https://go-review.googlesource.com/76070 Reviewed-by: Alberto Donizetti <alb.donizetti@gmail.com>
Diffstat (limited to 'src/bytes/bytes_generic.go')
-rw-r--r--src/bytes/bytes_generic.go36
1 files changed, 27 insertions, 9 deletions
diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go
index 32abd3b33f..b30e53bf2e 100644
--- a/src/bytes/bytes_generic.go
+++ b/src/bytes/bytes_generic.go
@@ -6,23 +6,25 @@
package bytes
-// TODO: implements short string optimization on non amd64 platforms
-// and get rid of bytes_amd64.go
-
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep []byte) int {
n := len(sep)
- if n == 0 {
+ switch {
+ case n == 0:
return 0
- }
- if n > len(s) {
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n == len(s):
+ if Equal(sep, s) {
+ return 0
+ }
+ return -1
+ case n > len(s):
return -1
}
c := sep[0]
- if n == 1 {
- return IndexByte(s, c)
- }
i := 0
+ fails := 0
t := s[:len(s)-n+1]
for i < len(t) {
if t[i] != c {
@@ -36,6 +38,22 @@ func Index(s, sep []byte) int {
return i
}
i++
+ fails++
+ if fails >= 4+i>>4 && i < len(t) {
+ // Give up on IndexByte, it isn't skipping ahead
+ // far enough to be better than Rabin-Karp.
+ // Experiments (using IndexPeriodic) suggest
+ // the cutover is about 16 byte skips.
+ // TODO: if large prefixes of sep are matching
+ // we should cutover at even larger average skips,
+ // because Equal becomes that much more expensive.
+ // This code does not take that effect into account.
+ j := indexRabinKarp(s[i:], sep)
+ if j < 0 {
+ return -1
+ }
+ return i + j
+ }
}
return -1
}