diff options
| author | Cuong Manh Le <cuong.manhle.vn@gmail.com> | 2024-09-13 10:15:51 +0700 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2024-09-15 02:05:37 +0000 |
| commit | 3d33437c450aa74014ea1d41cd986b6ee6266984 (patch) | |
| tree | 4d63703ee7dfacf326f5215a48e7374be46533d9 /src/unicode/utf8/utf8.go | |
| parent | 9bd34ebb7e820922458929c9a1146fb0a5b0ca4d (diff) | |
| download | go-3d33437c450aa74014ea1d41cd986b6ee6266984.tar.xz | |
unicode/utf8: speedup RuneCount
CL 612617 did speedup RuneCountInString, thus we can now use it to
speedup RuneCount, too.
name old time/op new time/op delta
RuneCountTenASCIIChars-8 8.69ns ± 1% 3.59ns ± 2% -58.66% (p=0.000 n=9+9)
RuneCountTenJapaneseChars-8 49.8ns ± 2% 40.9ns ± 0% -17.94% (p=0.000 n=10+8)
Change-Id: I311750c00efc79af35fb0ca3b482a5d94e0a7977
Reviewed-on: https://go-review.googlesource.com/c/go/+/612955
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Reviewed-by: Tim King <taking@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/unicode/utf8/utf8.go')
| -rw-r--r-- | src/unicode/utf8/utf8.go | 32 |
1 files changed, 4 insertions, 28 deletions
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 9743b74258..180c008ed5 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -414,35 +414,11 @@ func appendRuneNonASCII(p []byte, r rune) []byte { func RuneCount(p []byte) int { np := len(p) var n int - for i := 0; i < np; { - n++ - c := p[i] - if c < RuneSelf { - // ASCII fast path - i++ - continue - } - x := first[c] - if x == xx { - i++ // invalid. - continue - } - size := int(x & 7) - if i+size > np { - i++ // Short or invalid. - continue + for ; n < np; n++ { + if c := p[n]; c >= RuneSelf { + // non-ASCII slow path + return n + RuneCountInString(string(p[n:])) } - accept := acceptRanges[x>>4] - if c := p[i+1]; c < accept.lo || accept.hi < c { - size = 1 - } else if size == 2 { - } else if c := p[i+2]; c < locb || hicb < c { - size = 1 - } else if size == 3 { - } else if c := p[i+3]; c < locb || hicb < c { - size = 1 - } - i += size } return n } |
