diff options
| author | qmuntal <quimmuntal@gmail.com> | 2022-11-25 14:22:36 +0100 |
|---|---|---|
| committer | Quim Muntal <quimmuntal@gmail.com> | 2023-01-23 20:59:01 +0000 |
| commit | 2423370136d4b1915d06bb1aaacbedaa900bc5c7 (patch) | |
| tree | 09605c862dd25a23c19b0795e2b0fad141ff893a /src/unicode/utf16 | |
| parent | 0c8480acfe39a2ce093487ee3f6c614d6875a2bc (diff) | |
| download | go-2423370136d4b1915d06bb1aaacbedaa900bc5c7.tar.xz | |
utf16: reduce utf16.Decode allocations
This CL avoids allocating in utf16.Decode for code point sequences
with less than 64 elements. It does so by splitting the function in two,
one that can be inlined that preallocates a buffer and the other that
does the heavy-lifting.
The mid-stack inliner will allocate the buffer in the caller stack,
and in many cases this will be enough to avoid the allocation.
unicode/utf16 benchmarks:
name old time/op new time/op delta
DecodeValidASCII-12 60.1ns ± 3% 16.0ns ±20% -73.40% (p=0.000 n=8+10)
DecodeValidJapaneseChars-12 61.3ns ±10% 14.9ns ±39% -75.71% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
DecodeValidASCII-12 48.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
DecodeValidJapaneseChars-12 48.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
DecodeValidASCII-12 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
DecodeValidJapaneseChars-12 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
I've also benchmarked os.File.ReadDir with this change applied
to demonstrate that it does make a difference in the caller site, in this
case via syscall.UTF16ToString:
name old time/op new time/op delta
ReadDir-12 592µs ± 8% 620µs ±16% ~ (p=0.280 n=10+10)
name old alloc/op new alloc/op delta
ReadDir-12 30.4kB ± 0% 22.4kB ± 0% -26.10% (p=0.000 n=8+10)
name old allocs/op new allocs/op delta
ReadDir-12 402 ± 0% 272 ± 0% -32.34% (p=0.000 n=10+10)
Change-Id: I65cf5caa3fd3b3a466c0ed837a50a96e975bbe6b
Reviewed-on: https://go-review.googlesource.com/c/go/+/453415
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
Diffstat (limited to 'src/unicode/utf16')
| -rw-r--r-- | src/unicode/utf16/utf16.go | 22 | ||||
| -rw-r--r-- | src/unicode/utf16/utf16_test.go | 17 |
2 files changed, 32 insertions, 7 deletions
diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go index 38d8be6060..1c6d2c66c3 100644 --- a/src/unicode/utf16/utf16.go +++ b/src/unicode/utf16/utf16.go @@ -103,23 +103,31 @@ func AppendRune(a []uint16, r rune) []uint16 { // Decode returns the Unicode code point sequence represented // by the UTF-16 encoding s. func Decode(s []uint16) []rune { - a := make([]rune, len(s)) - n := 0 + // Preallocate capacity to hold up to 64 runes. + // Decode inlines, so the allocation can live on the stack. + buf := make([]rune, 0, 64) + return decode(s, buf) +} + +// decode appends to buf the Unicode code point sequence represented +// by the UTF-16 encoding s and return the extended buffer. +func decode(s []uint16, buf []rune) []rune { for i := 0; i < len(s); i++ { + var ar rune switch r := s[i]; { case r < surr1, surr3 <= r: // normal rune - a[n] = rune(r) + ar = rune(r) case surr1 <= r && r < surr2 && i+1 < len(s) && surr2 <= s[i+1] && s[i+1] < surr3: // valid surrogate sequence - a[n] = DecodeRune(rune(r), rune(s[i+1])) + ar = DecodeRune(rune(r), rune(s[i+1])) i++ default: // invalid surrogate sequence - a[n] = replacementChar + ar = replacementChar } - n++ + buf = append(buf, ar) } - return a[:n] + return buf } diff --git a/src/unicode/utf16/utf16_test.go b/src/unicode/utf16/utf16_test.go index be339b1fdf..a5a503d387 100644 --- a/src/unicode/utf16/utf16_test.go +++ b/src/unicode/utf16/utf16_test.go @@ -5,6 +5,7 @@ package utf16_test import ( + "internal/testenv" "reflect" "testing" "unicode" @@ -103,6 +104,22 @@ var decodeTests = []decodeTest{ {[]uint16{0xdfff}, []rune{0xfffd}}, } +func TestAllocationsDecode(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + + for _, tt := range decodeTests { + allocs := testing.AllocsPerRun(10, func() { + out := Decode(tt.in) + if out == nil { + t.Errorf("Decode(%x) = nil", tt.in) + } + }) + if allocs > 0 { + t.Errorf("Decode allocated %v times", allocs) + } + } +} + func TestDecode(t *testing.T) { for _, tt := range decodeTests { out := Decode(tt.in) |
