aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bufio/bufio.go5
-rw-r--r--src/bytes/bytes.go40
-rw-r--r--src/bytes/iter.go6
-rw-r--r--src/cmd/compile/internal/test/inl_test.go2
-rw-r--r--src/encoding/json/decode.go4
-rw-r--r--src/fmt/format.go5
-rw-r--r--src/fmt/print.go5
-rw-r--r--src/regexp/regexp.go28
-rw-r--r--src/strconv/quote.go8
-rw-r--r--src/strings/iter.go6
-rw-r--r--src/strings/reader.go4
-rw-r--r--src/strings/strings.go16
-rw-r--r--src/unicode/utf8/utf8.go26
-rw-r--r--src/unicode/utf8/utf8_test.go27
14 files changed, 74 insertions, 108 deletions
diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go
index 5244ce2e0c..141a9a1a2a 100644
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
if b.r == b.w {
return 0, 0, b.readErr()
}
- r, size = rune(b.buf[b.r]), 1
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(b.buf[b.r:b.w])
- }
+ r, size = utf8.DecodeRune(b.buf[b.r:b.w])
b.r += size
b.lastByte = int(b.buf[b.r-1])
b.lastRuneSize = size
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index ce2e004910..9a7f4ee3c9 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
@@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
// fine. It could also shrink but that falls out naturally.
b := make([]byte, 0, len(s))
for i := 0; i < len(s); {
- wid := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[i:])
- }
+ r, wid := utf8.DecodeRune(s[i:])
r = mapping(r)
if r >= 0 {
b = utf8.AppendRune(b, r)
@@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
start := 0
for start < len(s) {
- wid := 1
- r := rune(s[start])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[start:])
- }
+ r, wid := utf8.DecodeRune(s[start:])
if f(r) == truth {
return start
}
@@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
- r, n := rune(s[0]), 1
- if r >= utf8.RuneSelf {
- r, n = utf8.DecodeRune(s)
- }
+ r, n := utf8.DecodeRune(s)
if !containsRune(cutset, r) {
break
}
@@ -1251,19 +1236,10 @@ hasUnicode:
t = t[i:]
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
- var sr, tr rune
- if s[0] < utf8.RuneSelf {
- sr, s = rune(s[0]), s[1:]
- } else {
- r, size := utf8.DecodeRune(s)
- sr, s = r, s[size:]
- }
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRune(t)
- tr, t = r, t[size:]
- }
+ sr, size := utf8.DecodeRune(s)
+ s = s[size:]
+ tr, size := utf8.DecodeRune(t)
+ t = t[size:]
// If they match, keep going; if not, return false.
diff --git a/src/bytes/iter.go b/src/bytes/iter.go
index b2abb2c9ba..a4ece881d2 100644
--- a/src/bytes/iter.go
+++ b/src/bytes/iter.go
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
return func(yield func([]byte) bool) {
start := -1
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i:i]) {
diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go
index eda6084b48..a49cd767db 100644
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
"assemble64",
},
"unicode/utf8": {
+ "DecodeRune",
+ "DecodeRuneInString",
"FullRune",
"FullRuneInString",
"RuneLen",
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 70885a517e..fc29296c0f 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
if c == '\\' || c == '"' || c < ' ' {
break
}
- if c < utf8.RuneSelf {
- r++
- continue
- }
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
diff --git a/src/fmt/format.go b/src/fmt/format.go
index 90e18cd696..334a94e298 100644
--- a/src/fmt/format.go
+++ b/src/fmt/format.go
@@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
if n < 0 {
return b[:i]
}
- wid := 1
- if b[i] >= utf8.RuneSelf {
- _, wid = utf8.DecodeRune(b[i:])
- }
+ _, wid := utf8.DecodeRune(b[i:])
i += wid
}
}
diff --git a/src/fmt/print.go b/src/fmt/print.go
index 155218046f..01cfa1a1c7 100644
--- a/src/fmt/print.go
+++ b/src/fmt/print.go
@@ -1145,10 +1145,7 @@ formatLoop:
break
}
- verb, size := rune(format[i]), 1
- if verb >= utf8.RuneSelf {
- verb, size = utf8.DecodeRuneInString(format[i:])
- }
+ verb, size := utf8.DecodeRuneInString(format[i:])
i += size
switch {
diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go
index 253415fb6a..66c7369399 100644
--- a/src/regexp/regexp.go
+++ b/src/regexp/regexp.go
@@ -384,10 +384,6 @@ type inputString struct {
func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
return utf8.DecodeRuneInString(i.str[pos:])
}
return endOfText, 0
@@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
- r1 = rune(i.str[pos-1])
- if r1 >= utf8.RuneSelf {
- r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
- }
+ r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
- r2 = rune(i.str[pos])
- if r2 >= utf8.RuneSelf {
- r2, _ = utf8.DecodeRuneInString(i.str[pos:])
- }
+ r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
@@ -431,10 +421,6 @@ type inputBytes struct {
func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
return utf8.DecodeRune(i.str[pos:])
}
return endOfText, 0
@@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
- r1 = rune(i.str[pos-1])
- if r1 >= utf8.RuneSelf {
- r1, _ = utf8.DecodeLastRune(i.str[:pos])
- }
+ r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
- r2 = rune(i.str[pos])
- if r2 >= utf8.RuneSelf {
- r2, _ = utf8.DecodeRune(i.str[pos:])
- }
+ r2, _ = utf8.DecodeRune(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
diff --git a/src/strconv/quote.go b/src/strconv/quote.go
index 99c292a8ed..da2325647d 100644
--- a/src/strconv/quote.go
+++ b/src/strconv/quote.go
@@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
buf = nBuf
}
buf = append(buf, quote)
- for width := 0; len(s) > 0; s = s[width:] {
- r := rune(s[0])
- width = 1
- if r >= utf8.RuneSelf {
- r, width = utf8.DecodeRuneInString(s)
- }
+ for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
+ r, width = utf8.DecodeRuneInString(s)
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
diff --git a/src/strings/iter.go b/src/strings/iter.go
index 69fe031739..84e763a834 100644
--- a/src/strings/iter.go
+++ b/src/strings/iter.go
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
return func(yield func(string) bool) {
start := -1
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRuneInString(s[i:])
- }
+ r, size := utf8.DecodeRuneInString(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i]) {
diff --git a/src/strings/reader.go b/src/strings/reader.go
index 497ffb7a39..f12c9b18b3 100644
--- a/src/strings/reader.go
+++ b/src/strings/reader.go
@@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) {
return 0, 0, io.EOF
}
r.prevRune = int(r.i)
- if c := r.s[r.i]; c < utf8.RuneSelf {
- r.i++
- return rune(c), 1, nil
- }
ch, size = utf8.DecodeRuneInString(r.s[r.i:])
r.i += int64(size)
return
diff --git a/src/strings/strings.go b/src/strings/strings.go
index 74007977d9..3cc3e79f98 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string {
// Unicode code points c satisfying f(c) removed.
func TrimRightFunc(s string, f func(rune) bool) string {
i := lastIndexFunc(s, f, false)
- if i >= 0 && s[i] >= utf8.RuneSelf {
+ if i >= 0 {
_, wid := utf8.DecodeRuneInString(s[i:])
i += wid
} else {
@@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string {
func trimLeftUnicode(s, cutset string) string {
for len(s) > 0 {
- r, n := rune(s[0]), 1
- if r >= utf8.RuneSelf {
- r, n = utf8.DecodeRuneInString(s)
- }
+ r, n := utf8.DecodeRuneInString(s)
if !ContainsRune(cutset, r) {
break
}
@@ -1224,13 +1221,8 @@ hasUnicode:
}
// Extract first rune from second string.
- var tr rune
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRuneInString(t)
- tr, t = r, t[size:]
- }
+ tr, size := utf8.DecodeRuneInString(t)
+ t = t[size:]
// If they match, keep going; if not, return false.
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
index 01cad1cc81..68283341d9 100644
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -155,6 +155,20 @@ func FullRuneInString(s string) bool {
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) {
+ // Inlineable fast path for ASCII characters; see #48195.
+ // This implementation is weird but effective at rendering the
+ // function inlineable.
+ for _, b := range p {
+ if b < RuneSelf {
+ return rune(b), 1
+ }
+ break
+ }
+ r, size = decodeRuneSlow(p)
+ return
+}
+
+func decodeRuneSlow(p []byte) (r rune, size int) {
n := len(p)
if n < 1 {
return RuneError, 0
@@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) {
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) {
+ // Inlineable fast path for ASCII characters; see #48195.
+ // This implementation is a bit weird but effective at rendering the
+ // function inlineable.
+ if s != "" && s[0] < RuneSelf {
+ return rune(s[0]), 1
+ } else {
+ r, size = decodeRuneInStringSlow(s)
+ }
+ return
+}
+
+func decodeRuneInStringSlow(s string) (rune, int) {
n := len(s)
if n < 1 {
return RuneError, 0
diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go
index aece0fab73..bf4f074ffd 100644
--- a/src/unicode/utf8/utf8_test.go
+++ b/src/unicode/utf8/utf8_test.go
@@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
func BenchmarkDecodeASCIIRune(b *testing.B) {
a := []byte{'a'}
- for i := 0; i < b.N; i++ {
- DecodeRune(a)
+ for range b.N {
+ runeSink, sizeSink = DecodeRune(a)
}
}
func BenchmarkDecodeJapaneseRune(b *testing.B) {
nihon := []byte("本")
- for i := 0; i < b.N; i++ {
- DecodeRune(nihon)
+ for range b.N {
+ runeSink, sizeSink = DecodeRune(nihon)
+ }
+}
+
+func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
+ a := "a"
+ for range b.N {
+ runeSink, sizeSink = DecodeRuneInString(a)
}
}
+func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
+ nihon := "本"
+ for range b.N {
+ runeSink, sizeSink = DecodeRuneInString(nihon)
+ }
+}
+
+var (
+ runeSink rune
+ sizeSink int
+)
+
// boolSink is used to reference the return value of benchmarked
// functions to avoid dead code elimination.
var boolSink bool