aboutsummaryrefslogtreecommitdiff
path: root/src/unicode/utf8
diff options
context:
space:
mode:
authorKatie Hockman <katie@golang.org>2020-12-14 10:03:05 -0500
committerKatie Hockman <katie@golang.org>2020-12-14 10:06:13 -0500
commit0345ede87ee12698988973884cfc0fd3d499dffd (patch)
tree7123cff141ee5661208d2f5f437b8f5252ac7f6a /src/unicode/utf8
parent4651d6b267818b0e0d128a5443289717c4bb8cbc (diff)
parent0a02371b0576964e81c3b40d328db9a3ef3b031b (diff)
downloadgo-0345ede87ee12698988973884cfc0fd3d499dffd.tar.xz
[dev.fuzz] all: merge master into dev.fuzz
Change-Id: I5d8c8329ccc9d747bd81ade6b1cb7cb8ae2e94b2
Diffstat (limited to 'src/unicode/utf8')
-rw-r--r--src/unicode/utf8/example_test.go20
-rw-r--r--src/unicode/utf8/utf8.go1
-rw-r--r--src/unicode/utf8/utf8_test.go28
3 files changed, 39 insertions, 10 deletions
diff --git a/src/unicode/utf8/example_test.go b/src/unicode/utf8/example_test.go
index 7b3e7ac742..5cd931d242 100644
--- a/src/unicode/utf8/example_test.go
+++ b/src/unicode/utf8/example_test.go
@@ -107,6 +107,26 @@ func ExampleEncodeRune() {
// 3
}
+func ExampleEncodeRune_outOfRange() {
+ runes := []rune{
+ // Less than 0, out of range.
+ -1,
+ // Greater than 0x10FFFF, out of range.
+ 0x110000,
+ // The Unicode replacement character.
+ utf8.RuneError,
+ }
+ for i, c := range runes {
+ buf := make([]byte, 3)
+ size := utf8.EncodeRune(buf, c)
+ fmt.Printf("%d: %d %[2]s %d\n", i, buf, size)
+ }
+ // Output:
+ // 0: [239 191 189] � 3
+ // 1: [239 191 189] � 3
+ // 2: [239 191 189] � 3
+}
+
func ExampleFullRune() {
buf := []byte{228, 184, 150} // 世
fmt.Println(utf8.FullRune(buf))
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
index ef0d740960..557e8a7770 100644
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -337,6 +337,7 @@ func RuneLen(r rune) int {
}
// EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune.
+// If the rune is out of range, it writes the encoding of RuneError.
// It returns the number of bytes written.
func EncodeRune(p []byte, r rune) int {
// Negative values are erroneous. Making it unsigned addresses the problem.
diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go
index 359461bd05..eaf1b5ffee 100644
--- a/src/unicode/utf8/utf8_test.go
+++ b/src/unicode/utf8/utf8_test.go
@@ -597,16 +597,24 @@ func BenchmarkDecodeJapaneseRune(b *testing.B) {
}
}
-func BenchmarkFullASCIIRune(b *testing.B) {
- a := []byte{'a'}
- for i := 0; i < b.N; i++ {
- FullRune(a)
- }
-}
+// boolSink is used to reference the return value of benchmarked
+// functions to avoid dead code elimination.
+var boolSink bool
-func BenchmarkFullJapaneseRune(b *testing.B) {
- nihon := []byte("本")
- for i := 0; i < b.N; i++ {
- FullRune(nihon)
+func BenchmarkFullRune(b *testing.B) {
+ benchmarks := []struct {
+ name string
+ data []byte
+ }{
+ {"ASCII", []byte("a")},
+ {"Incomplete", []byte("\xf0\x90\x80")},
+ {"Japanese", []byte("本")},
+ }
+ for _, bm := range benchmarks {
+ b.Run(bm.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ boolSink = FullRune(bm.data)
+ }
+ })
}
}