aboutsummaryrefslogtreecommitdiff
path: root/src/bytes
diff options
context:
space:
mode:
authorJulien Cretel <jub0bsinthecloud@gmail.com>2025-08-25 20:38:20 +0000
committerGopher Robot <gobot@golang.org>2025-08-29 09:34:38 -0700
commit89d41d254a758f9b5e554761c92508220f4342a5 (patch)
treead43e144eef942755282feffdc502148fcd9bcc1 /src/bytes
parent38204e087267a3408e36aa23d2b5ac36f22feef3 (diff)
downloadgo-89d41d254a758f9b5e554761c92508220f4342a5.tar.xz
bytes, strings: speed up TrimSpace
This change lifts bounds checks out of loops in the TrimSpace functions, among other micro-optimizations. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: bytes cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.406n ± 0% 3.829n ± 1% -13.11% (p=0.000 n=20) TrimSpace/ASCII-8 7.688n ± 1% 5.872n ± 1% -23.61% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 82.25n ± 1% 81.00n ± 1% -1.51% (p=0.001 n=20) TrimSpace/JustNonASCII-8 131.6n ± 8% 132.2n ± 1% ~ (p=0.899 n=20) geomean 24.61n 22.15n -9.99% pkg: strings │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.178n ± 0% 3.857n ± 2% -7.68% (p=0.001 n=20) TrimSpace/ASCII-8 7.708n ± 0% 5.585n ± 1% -27.55% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 98.70n ± 1% 88.54n ± 1% -10.30% (p=0.000 n=20) TrimSpace/JustNonASCII-8 132.8n ± 2% 123.2n ± 0% -7.16% (p=0.000 n=20) geomean 25.49n 22.02n -13.61% Change-Id: I523f03a909c82a51940b44c7b2634985b7447982 GitHub-Last-Rev: 35163f04c63ce2ef5e9e831c4371750504edb892 GitHub-Pull-Request: golang/go#75127 Reviewed-on: https://go-review.googlesource.com/c/go/+/698735 Reviewed-by: Sean Liao <sean@liao.dev> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Sean Liao <sean@liao.dev> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/bytes')
-rw-r--r--src/bytes/bytes.go51
1 files changed, 22 insertions, 29 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index a0a8fa0b29..ce2e004910 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -1117,41 +1117,34 @@ func trimRightUnicode(s []byte, cutset string) []byte {
// TrimSpace returns a subslice of s by slicing off all leading and
// trailing white space, as defined by Unicode.
func TrimSpace(s []byte) []byte {
- // Fast path for ASCII: look for the first ASCII non-space byte
- start := 0
- for ; start < len(s); start++ {
- c := s[start]
+ // Fast path for ASCII: look for the first ASCII non-space byte.
+ for lo, c := range s {
if c >= utf8.RuneSelf {
// If we run into a non-ASCII byte, fall back to the
- // slower unicode-aware method on the remaining bytes
- return TrimFunc(s[start:], unicode.IsSpace)
- }
- if asciiSpace[c] == 0 {
- break
+ // slower unicode-aware method on the remaining bytes.
+ return TrimFunc(s[lo:], unicode.IsSpace)
}
- }
-
- // Now look for the first ASCII non-space byte from the end
- stop := len(s)
- for ; stop > start; stop-- {
- c := s[stop-1]
- if c >= utf8.RuneSelf {
- return TrimFunc(s[start:stop], unicode.IsSpace)
+ if asciiSpace[c] != 0 {
+ continue
}
- if asciiSpace[c] == 0 {
- break
+ s = s[lo:]
+ // Now look for the first ASCII non-space byte from the end.
+ for hi := len(s) - 1; hi >= 0; hi-- {
+ c := s[hi]
+ if c >= utf8.RuneSelf {
+ return TrimFunc(s[:hi+1], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ // At this point, s[:hi+1] starts and ends with ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have
+ // already been handled above.
+ return s[:hi+1]
+ }
}
}
-
- // At this point s[start:stop] starts and ends with an ASCII
- // non-space bytes, so we're done. Non-ASCII cases have already
- // been handled above.
- if start == stop {
- // Special case to preserve previous TrimLeftFunc behavior,
- // returning nil instead of empty slice if all spaces.
- return nil
- }
- return s[start:stop]
+ // Special case to preserve previous TrimLeftFunc behavior,
+ // returning nil instead of empty slice if all spaces.
+ return nil
}
// Runes interprets s as a sequence of UTF-8-encoded code points.