aboutsummaryrefslogtreecommitdiff
path: root/src/bytes
diff options
context:
space:
mode:
authorJulien Cretel <jub0bsinthecloud@gmail.com>2025-09-02 22:10:40 +0000
committert hepudds <thepudds1460@gmail.com>2025-09-03 07:04:47 -0700
commit925a3cdcd13472c8f78d51c9ce99a59e77d46eb4 (patch)
tree52488ccff88e1b6a81631eb3e086e41b50d1340b /src/bytes
parent3e596d448fb6b9668d144cffaa65e1d12a5fdce8 (diff)
downloadgo-925a3cdcd13472c8f78d51c9ce99a59e77d46eb4.tar.xz
unicode/utf8: make DecodeRune{,InString} inlineable
This change makes the fast path for ASCII characters inlineable in DecodeRune and DecodeRuneInString and removes most instances of manual inlining at call sites. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: unicode/utf8 cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ DecodeASCIIRune-8 2.4545n ± 2% 0.6253n ± 2% -74.52% (p=0.000 n=20) DecodeJapaneseRune-8 3.988n ± 1% 4.023n ± 1% +0.86% (p=0.050 n=20) DecodeASCIIRuneInString-8 2.4675n ± 1% 0.6264n ± 2% -74.61% (p=0.000 n=20) DecodeJapaneseRuneInString-8 3.992n ± 1% 4.001n ± 1% ~ (p=0.625 n=20) geomean 3.134n 1.585n -49.43% Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should be reverted to their idiomatic implementations. Fixes #31666 Updates #48195 Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d GitHub-Last-Rev: 2e352a0045027e059be79cdb60241b5cf35fec71 GitHub-Pull-Request: golang/go#75181 Reviewed-on: https://go-review.googlesource.com/c/go/+/699675 Reviewed-by: Sean Liao <sean@liao.dev> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
Diffstat (limited to 'src/bytes')
-rw-r--r--src/bytes/bytes.go40
-rw-r--r--src/bytes/iter.go6
2 files changed, 9 insertions, 37 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index ce2e004910..9a7f4ee3c9 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
@@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
// fine. It could also shrink but that falls out naturally.
b := make([]byte, 0, len(s))
for i := 0; i < len(s); {
- wid := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[i:])
- }
+ r, wid := utf8.DecodeRune(s[i:])
r = mapping(r)
if r >= 0 {
b = utf8.AppendRune(b, r)
@@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
start := 0
for start < len(s) {
- wid := 1
- r := rune(s[start])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[start:])
- }
+ r, wid := utf8.DecodeRune(s[start:])
if f(r) == truth {
return start
}
@@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
- r, n := rune(s[0]), 1
- if r >= utf8.RuneSelf {
- r, n = utf8.DecodeRune(s)
- }
+ r, n := utf8.DecodeRune(s)
if !containsRune(cutset, r) {
break
}
@@ -1251,19 +1236,10 @@ hasUnicode:
t = t[i:]
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
- var sr, tr rune
- if s[0] < utf8.RuneSelf {
- sr, s = rune(s[0]), s[1:]
- } else {
- r, size := utf8.DecodeRune(s)
- sr, s = r, s[size:]
- }
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRune(t)
- tr, t = r, t[size:]
- }
+ sr, size := utf8.DecodeRune(s)
+ s = s[size:]
+ tr, size := utf8.DecodeRune(t)
+ t = t[size:]
// If they match, keep going; if not, return false.
diff --git a/src/bytes/iter.go b/src/bytes/iter.go
index b2abb2c9ba..a4ece881d2 100644
--- a/src/bytes/iter.go
+++ b/src/bytes/iter.go
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
return func(yield func([]byte) bool) {
start := -1
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i:i]) {