diff options
| author | Charlie Vieth <charlie.vieth@gmail.com> | 2022-08-24 14:23:28 -0400 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2022-09-21 14:00:37 +0000 |
| commit | c70fd4b30aba5db2df7b5f6b0833c62b909f50eb (patch) | |
| tree | 19030ad14b06d7a36219a7be71eca75c49f0df22 /src/strings/strings.go | |
| parent | 9c916c79011f3af98b5670eb2ba55349ba904522 (diff) | |
| download | go-c70fd4b30aba5db2df7b5f6b0833c62b909f50eb.tar.xz | |
bytes, strings: add ASCII fast path to EqualFold
This commit adds an ASCII fast path to bytes/strings EqualFold that
roughly doubles performance when all characters are ASCII.
It also changes strings.EqualFold to use `for range` for the first
string since this is ~10% faster than using utf8.DecodeRuneInString for
both (see #31666).
Performance (similar results on arm64 and amd64):
name old time/op new time/op delta
EqualFold/Tests-10 238ns ± 0% 172ns ± 1% -27.91% (p=0.000 n=10+10)
EqualFold/ASCII-10 20.5ns ± 0% 9.7ns ± 0% -52.73% (p=0.000 n=10+10)
EqualFold/UnicodePrefix-10 86.5ns ± 0% 77.6ns ± 0% -10.37% (p=0.000 n=10+10)
EqualFold/UnicodeSuffix-10 86.8ns ± 2% 71.3ns ± 0% -17.88% (p=0.000 n=10+8)
Change-Id: I058f3f97a08dc04d65af895674d85420f920abe1
Reviewed-on: https://go-review.googlesource.com/c/go/+/425459
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/strings/strings.go')
| -rw-r--r-- | src/strings/strings.go | 49 |
1 files changed, 39 insertions, 10 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index 7cf3686569..fee161e4cc 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -1067,15 +1067,44 @@ func ReplaceAll(s, old, new string) string { // are equal under simple Unicode case-folding, which is a more general // form of case-insensitivity. func EqualFold(s, t string) bool { - for s != "" && t != "" { - // Extract first rune from each string. - var sr, tr rune - if s[0] < utf8.RuneSelf { - sr, s = rune(s[0]), s[1:] - } else { - r, size := utf8.DecodeRuneInString(s) - sr, s = r, s[size:] + // ASCII fast path + i := 0 + for ; i < len(s) && i < len(t); i++ { + sr := s[i] + tr := t[i] + if sr|tr >= utf8.RuneSelf { + goto hasUnicode } + + // Easy case. + if tr == sr { + continue + } + + // Make sr < tr to simplify what follows. + if tr < sr { + tr, sr = sr, tr + } + // ASCII only, sr/tr must be upper/lower case + if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { + continue + } + return false + } + // Check if we've exhausted both strings. + return len(s) == len(t) + +hasUnicode: + s = s[i:] + t = t[i:] + for _, sr := range s { + // If t is exhausted the strings are not equal. + if len(t) == 0 { + return false + } + + // Extract first rune from second string. + var tr rune if t[0] < utf8.RuneSelf { tr, t = rune(t[0]), t[1:] } else { @@ -1115,8 +1144,8 @@ func EqualFold(s, t string) bool { return false } - // One string is empty. Are both? - return s == t + // First string is empty, so check if the second one is also empty. + return len(t) == 0 } // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. |
