bytes, strings: add ASCII fast path to EqualFold

This commit adds an ASCII fast path to bytes/strings EqualFold that roughly doubles performance when all characters are ASCII. It also changes strings.EqualFold to use `for range` for the first string since this is ~10% faster than using utf8.DecodeRuneInString for both (see #31666). Performance (similar results on arm64 and amd64): name old time/op new time/op delta EqualFold/Tests-10 238ns ± 0% 172ns ± 1% -27.91% (p=0.000 n=10+10) EqualFold/ASCII-10 20.5ns ± 0% 9.7ns ± 0% -52.73% (p=0.000 n=10+10) EqualFold/UnicodePrefix-10 86.5ns ± 0% 77.6ns ± 0% -10.37% (p=0.000 n=10+10) EqualFold/UnicodeSuffix-10 86.8ns ± 2% 71.3ns ± 0% -17.88% (p=0.000 n=10+8) Change-Id: I058f3f97a08dc04d65af895674d85420f920abe1 Reviewed-on: https://go-review.googlesource.com/c/go/+/425459 Reviewed-by: Ian Lance Taylor <iant@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Auto-Submit: Ian Lance Taylor <iant@google.com> Run-TryBot: Ian Lance Taylor <iant@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
author: Charlie Vieth <charlie.vieth@gmail.com> 2022-08-24 14:23:28 -0400
committer: Gopher Robot <gobot@golang.org> 2022-09-21 14:00:37 +0000
commit: c70fd4b30aba5db2df7b5f6b0833c62b909f50eb (patch)
tree: 19030ad14b06d7a36219a7be71eca75c49f0df22 /src/bytes/bytes.go
parent: 9c916c79011f3af98b5670eb2ba55349ba904522 (diff)
download: go-c70fd4b30aba5db2df7b5f6b0833c62b909f50eb.tar.xz
1 files changed, 30 insertions, 0 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index 7b28cf1efc..119b1f62b1 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -1134,6 +1134,36 @@ func ReplaceAll(s, old, new []byte) []byte {
 // are equal under simple Unicode case-folding, which is a more general
 // form of case-insensitivity.
 func EqualFold(s, t []byte) bool {
+	// ASCII fast path
+	i := 0
+	for ; i < len(s) && i < len(t); i++ {
+		sr := s[i]
+		tr := t[i]
+		if sr|tr >= utf8.RuneSelf {
+			goto hasUnicode
+		}
+
+		// Easy case.
+		if tr == sr {
+			continue
+		}
+
+		// Make sr < tr to simplify what follows.
+		if tr < sr {
+			tr, sr = sr, tr
+		}
+		// ASCII only, sr/tr must be upper/lower case
+		if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+			continue
+		}
+		return false
+	}
+	// Check if we've exhausted both strings.
+	return len(s) == len(t)
+
+hasUnicode:
+	s = s[i:]
+	t = t[i:]
 	for len(s) != 0 && len(t) != 0 {
 		// Extract first rune from each.
 		var sr, tr rune
author	Charlie Vieth <charlie.vieth@gmail.com>	2022-08-24 14:23:28 -0400
committer	Gopher Robot <gobot@golang.org>	2022-09-21 14:00:37 +0000
commit	c70fd4b30aba5db2df7b5f6b0833c62b909f50eb (patch)
tree	19030ad14b06d7a36219a7be71eca75c49f0df22 /src/bytes/bytes.go
parent	9c916c79011f3af98b5670eb2ba55349ba904522 (diff)
download	go-c70fd4b30aba5db2df7b5f6b0833c62b909f50eb.tar.xz