aboutsummaryrefslogtreecommitdiff
path: root/src/strings
diff options
context:
space:
mode:
authorCharlie Vieth <charlie.vieth@gmail.com>2022-08-24 14:23:28 -0400
committerGopher Robot <gobot@golang.org>2022-09-21 14:00:37 +0000
commitc70fd4b30aba5db2df7b5f6b0833c62b909f50eb (patch)
tree19030ad14b06d7a36219a7be71eca75c49f0df22 /src/strings
parent9c916c79011f3af98b5670eb2ba55349ba904522 (diff)
downloadgo-c70fd4b30aba5db2df7b5f6b0833c62b909f50eb.tar.xz
bytes, strings: add ASCII fast path to EqualFold
This commit adds an ASCII fast path to bytes/strings EqualFold that roughly doubles performance when all characters are ASCII. It also changes strings.EqualFold to use `for range` for the first string since this is ~10% faster than using utf8.DecodeRuneInString for both (see #31666). Performance (similar results on arm64 and amd64): name old time/op new time/op delta EqualFold/Tests-10 238ns ± 0% 172ns ± 1% -27.91% (p=0.000 n=10+10) EqualFold/ASCII-10 20.5ns ± 0% 9.7ns ± 0% -52.73% (p=0.000 n=10+10) EqualFold/UnicodePrefix-10 86.5ns ± 0% 77.6ns ± 0% -10.37% (p=0.000 n=10+10) EqualFold/UnicodeSuffix-10 86.8ns ± 2% 71.3ns ± 0% -17.88% (p=0.000 n=10+8) Change-Id: I058f3f97a08dc04d65af895674d85420f920abe1 Reviewed-on: https://go-review.googlesource.com/c/go/+/425459 Reviewed-by: Ian Lance Taylor <iant@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Auto-Submit: Ian Lance Taylor <iant@google.com> Run-TryBot: Ian Lance Taylor <iant@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/strings')
-rw-r--r--src/strings/strings.go49
-rw-r--r--src/strings/strings_test.go33
2 files changed, 67 insertions, 15 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go
index 7cf3686569..fee161e4cc 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -1067,15 +1067,44 @@ func ReplaceAll(s, old, new string) string {
// are equal under simple Unicode case-folding, which is a more general
// form of case-insensitivity.
func EqualFold(s, t string) bool {
- for s != "" && t != "" {
- // Extract first rune from each string.
- var sr, tr rune
- if s[0] < utf8.RuneSelf {
- sr, s = rune(s[0]), s[1:]
- } else {
- r, size := utf8.DecodeRuneInString(s)
- sr, s = r, s[size:]
+ // ASCII fast path
+ i := 0
+ for ; i < len(s) && i < len(t); i++ {
+ sr := s[i]
+ tr := t[i]
+ if sr|tr >= utf8.RuneSelf {
+ goto hasUnicode
}
+
+ // Easy case.
+ if tr == sr {
+ continue
+ }
+
+ // Make sr < tr to simplify what follows.
+ if tr < sr {
+ tr, sr = sr, tr
+ }
+ // ASCII only, sr/tr must be upper/lower case
+ if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+ continue
+ }
+ return false
+ }
+ // Check if we've exhausted both strings.
+ return len(s) == len(t)
+
+hasUnicode:
+ s = s[i:]
+ t = t[i:]
+ for _, sr := range s {
+ // If t is exhausted the strings are not equal.
+ if len(t) == 0 {
+ return false
+ }
+
+ // Extract first rune from second string.
+ var tr rune
if t[0] < utf8.RuneSelf {
tr, t = rune(t[0]), t[1:]
} else {
@@ -1115,8 +1144,8 @@ func EqualFold(s, t string) bool {
return false
}
- // One string is empty. Are both?
- return s == t
+ // First string is empty, so check if the second one is also empty.
+ return len(t) == 0
}
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go
index 9323ff988d..210bd9e44b 100644
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -1556,13 +1556,36 @@ func TestEqualFold(t *testing.T) {
}
func BenchmarkEqualFold(b *testing.B) {
- for i := 0; i < b.N; i++ {
- for _, tt := range EqualFoldTests {
- if out := EqualFold(tt.s, tt.t); out != tt.out {
- b.Fatal("wrong result")
+ b.Run("Tests", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, tt := range EqualFoldTests {
+ if out := EqualFold(tt.s, tt.t); out != tt.out {
+ b.Fatal("wrong result")
+ }
}
}
- }
+ })
+
+ const s1 = "abcdefghijKz"
+ const s2 = "abcDefGhijKz"
+
+ b.Run("ASCII", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold(s1, s2)
+ }
+ })
+
+ b.Run("UnicodePrefix", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold("αβδ"+s1, "ΑΒΔ"+s2)
+ }
+ })
+
+ b.Run("UnicodeSuffix", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold(s1+"αβδ", s2+"ΑΒΔ")
+ }
+ })
}
var CountTests = []struct {