From a1762c2cc67822d86cb37747a56f0d4a07d24ced Mon Sep 17 00:00:00 2001 From: eric fang Date: Wed, 13 May 2020 06:38:39 +0000 Subject: unicode/utf8: refactor benchmarks for FullRune function BenchmarkFullASCIIRune tests the performance of function utf8.FullRune, which will be inlined in BenchmarkFullASCIIRune. Since the return value of FullRune is not referenced, it will be removed as dead code. This CL makes the FullRune functions return value referenced by a global variable to avoid this point. In addition, this CL adds one more benchmark to cover more code paths, and puts them together as sub benchmarks of BenchmarkFullRune. Change-Id: I6e79f4c087adf70e351498a4b58d7482dcd1ec4a Reviewed-on: https://go-review.googlesource.com/c/go/+/233979 Run-TryBot: eric fang TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/unicode/utf8/utf8_test.go | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'src/unicode') diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index 359461bd05..eaf1b5ffee 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -597,16 +597,24 @@ func BenchmarkDecodeJapaneseRune(b *testing.B) { } } -func BenchmarkFullASCIIRune(b *testing.B) { - a := []byte{'a'} - for i := 0; i < b.N; i++ { - FullRune(a) - } -} - -func BenchmarkFullJapaneseRune(b *testing.B) { - nihon := []byte("本") - for i := 0; i < b.N; i++ { - FullRune(nihon) +// boolSink is used to reference the return value of benchmarked +// functions to avoid dead code elimination. +var boolSink bool + +func BenchmarkFullRune(b *testing.B) { + benchmarks := []struct { + name string + data []byte + }{ + {"ASCII", []byte("a")}, + {"Incomplete", []byte("\xf0\x90\x80")}, + {"Japanese", []byte("本")}, + } + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + boolSink = FullRune(bm.data) + } + }) } } -- cgit v1.3 From a3868028ac8470d1ab7782614707bb90925e7fe3 Mon Sep 17 00:00:00 2001 From: Ainar Garipov Date: Fri, 18 Sep 2020 21:01:34 +0300 Subject: unicode/utf8: document the handling of runes out of range in EncodeRune Document the way EncodeRune currently handles runes which are out of range. Also add an example showing that behaviour. Change-Id: I0f8e7645ae053474ec319085a2bb6d7f73bc137c Reviewed-on: https://go-review.googlesource.com/c/go/+/255998 Reviewed-by: Rob Pike Reviewed-by: Giovanni Bajo Trust: Giovanni Bajo Run-TryBot: Giovanni Bajo TryBot-Result: Go Bot --- src/unicode/utf8/example_test.go | 20 ++++++++++++++++++++ src/unicode/utf8/utf8.go | 1 + 2 files changed, 21 insertions(+) (limited to 'src/unicode') diff --git a/src/unicode/utf8/example_test.go b/src/unicode/utf8/example_test.go index 7b3e7ac742..5cd931d242 100644 --- a/src/unicode/utf8/example_test.go +++ b/src/unicode/utf8/example_test.go @@ -107,6 +107,26 @@ func ExampleEncodeRune() { // 3 } +func ExampleEncodeRune_outOfRange() { + runes := []rune{ + // Less than 0, out of range. + -1, + // Greater than 0x10FFFF, out of range. + 0x110000, + // The Unicode replacement character. + utf8.RuneError, + } + for i, c := range runes { + buf := make([]byte, 3) + size := utf8.EncodeRune(buf, c) + fmt.Printf("%d: %d %[2]s %d\n", i, buf, size) + } + // Output: + // 0: [239 191 189] � 3 + // 1: [239 191 189] � 3 + // 2: [239 191 189] � 3 +} + func ExampleFullRune() { buf := []byte{228, 184, 150} // 世 fmt.Println(utf8.FullRune(buf)) diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index ef0d740960..557e8a7770 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -337,6 +337,7 @@ func RuneLen(r rune) int { } // EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. +// If the rune is out of range, it writes the encoding of RuneError. // It returns the number of bytes written. func EncodeRune(p []byte, r rune) int { // Negative values are erroneous. Making it unsigned addresses the problem. -- cgit v1.3