From 925a3cdcd13472c8f78d51c9ce99a59e77d46eb4 Mon Sep 17 00:00:00 2001 From: Julien Cretel Date: Tue, 2 Sep 2025 22:10:40 +0000 Subject: unicode/utf8: make DecodeRune{,InString} inlineable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes the fast path for ASCII characters inlineable in DecodeRune and DecodeRuneInString and removes most instances of manual inlining at call sites. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: unicode/utf8 cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ DecodeASCIIRune-8 2.4545n ± 2% 0.6253n ± 2% -74.52% (p=0.000 n=20) DecodeJapaneseRune-8 3.988n ± 1% 4.023n ± 1% +0.86% (p=0.050 n=20) DecodeASCIIRuneInString-8 2.4675n ± 1% 0.6264n ± 2% -74.61% (p=0.000 n=20) DecodeJapaneseRuneInString-8 3.992n ± 1% 4.001n ± 1% ~ (p=0.625 n=20) geomean 3.134n 1.585n -49.43% Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should be reverted to their idiomatic implementations. Fixes #31666 Updates #48195 Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d GitHub-Last-Rev: 2e352a0045027e059be79cdb60241b5cf35fec71 GitHub-Pull-Request: golang/go#75181 Reviewed-on: https://go-review.googlesource.com/c/go/+/699675 Reviewed-by: Sean Liao LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Reviewed-by: Michael Pratt --- src/unicode/utf8/utf8.go | 26 ++++++++++++++++++++++++++ src/unicode/utf8/utf8_test.go | 27 +++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 4 deletions(-) (limited to 'src/unicode') diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 01cad1cc81..68283341d9 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -155,6 +155,20 @@ func FullRuneInString(s string) bool { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRune(p []byte) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is weird but effective at rendering the + // function inlineable. + for _, b := range p { + if b < RuneSelf { + return rune(b), 1 + } + break + } + r, size = decodeRuneSlow(p) + return +} + +func decodeRuneSlow(p []byte) (r rune, size int) { n := len(p) if n < 1 { return RuneError, 0 @@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRuneInString(s string) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is a bit weird but effective at rendering the + // function inlineable. + if s != "" && s[0] < RuneSelf { + return rune(s[0]), 1 + } else { + r, size = decodeRuneInStringSlow(s) + } + return +} + +func decodeRuneInStringSlow(s string) (rune, int) { n := len(s) if n < 1 { return RuneError, 0 diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index aece0fab73..bf4f074ffd 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) { func BenchmarkDecodeASCIIRune(b *testing.B) { a := []byte{'a'} - for i := 0; i < b.N; i++ { - DecodeRune(a) + for range b.N { + runeSink, sizeSink = DecodeRune(a) } } func BenchmarkDecodeJapaneseRune(b *testing.B) { nihon := []byte("本") - for i := 0; i < b.N; i++ { - DecodeRune(nihon) + for range b.N { + runeSink, sizeSink = DecodeRune(nihon) + } +} + +func BenchmarkDecodeASCIIRuneInString(b *testing.B) { + a := "a" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(a) } } +func BenchmarkDecodeJapaneseRuneInString(b *testing.B) { + nihon := "本" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(nihon) + } +} + +var ( + runeSink rune + sizeSink int +) + // boolSink is used to reference the return value of benchmarked // functions to avoid dead code elimination. var boolSink bool -- cgit v1.3