diff options
Diffstat (limited to 'src/bytes/bytes.go')
| -rw-r--r-- | src/bytes/bytes.go | 58 |
1 files changed, 57 insertions, 1 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 5c03e54d78..ac15ab9b69 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -137,6 +137,7 @@ func LastIndexByte(s []byte, c byte) int { // If r is [utf8.RuneError], it returns the first instance of any // invalid UTF-8 byte sequence. func IndexRune(s []byte, r rune) int { + const haveFastIndex = bytealg.MaxBruteForce > 0 switch { case 0 <= r && r < utf8.RuneSelf: return IndexByte(s, byte(r)) @@ -152,9 +153,64 @@ func IndexRune(s []byte, r rune) int { case !utf8.ValidRune(r): return -1 default: + // Search for rune r using the last byte of its UTF-8 encoded form. + // The distribution of the last byte is more uniform compared to the + // first byte which has a 78% chance of being [240, 243, 244]. var b [utf8.UTFMax]byte n := utf8.EncodeRune(b[:], r) - return Index(s, b[:n]) + last := n - 1 + i := last + fails := 0 + for i < len(s) { + if s[i] != b[last] { + o := IndexByte(s[i+1:], b[last]) + if o < 0 { + return -1 + } + i += o + 1 + } + // Step backwards comparing bytes. + for j := 1; j < n; j++ { + if s[i-j] != b[last-j] { + goto next + } + } + return i - last + next: + fails++ + i++ + if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) || + (!haveFastIndex && fails >= 4+i>>4 && i < len(s)) { + goto fallback + } + } + return -1 + + fallback: + // Switch to bytealg.Index, if available, or a brute for search when + // IndexByte returns too many false positives. + if haveFastIndex { + if j := bytealg.Index(s[i-last:], b[:n]); j >= 0 { + return i + j - last + } + } else { + // If bytealg.Index is not available a brute force search is + // ~1.5-3x faster than Rabin-Karp since n is small. + c0 := b[last] + c1 := b[last-1] // There are at least 2 chars to match + loop: + for ; i < len(s); i++ { + if s[i] == c0 && s[i-1] == c1 { + for k := 2; k < n; k++ { + if s[i-k] != b[last-k] { + continue loop + } + } + return i - last + } + } + } + return -1 } } |
