aboutsummaryrefslogtreecommitdiff
path: root/src/pkg
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2012-07-19 11:58:14 -0700
committerRob Pike <r@golang.org>2012-07-19 11:58:14 -0700
commitfc360f238137717e7246cc0fde908b71a3f1e5c7 (patch)
treec712e35f63c31e4882598962a0885757d2a5cbf1 /src/pkg
parent55ff3f7076e9ef45f8c853eece8acf36e891d885 (diff)
downloadgo-fc360f238137717e7246cc0fde908b71a3f1e5c7.tar.xz
unicode/utf8: reject out-of-range runes.
Surrogates are still admitted, but I have sent mail to golang-dev on that topic. Fixes #3785. R=golang-dev, rogpeppe, iant CC=golang-dev https://golang.org/cl/6398049
Diffstat (limited to 'src/pkg')
-rw-r--r--src/pkg/unicode/utf8/utf8.go16
-rw-r--r--src/pkg/unicode/utf8/utf8_test.go5
2 files changed, 19 insertions, 2 deletions
diff --git a/src/pkg/unicode/utf8/utf8.go b/src/pkg/unicode/utf8/utf8.go
index 57ea19e96d..cd9c80c5a5 100644
--- a/src/pkg/unicode/utf8/utf8.go
+++ b/src/pkg/unicode/utf8/utf8.go
@@ -102,7 +102,7 @@ func decodeRuneInternal(p []byte) (r rune, size int, short bool) {
// 4-byte, 21-bit sequence?
if c0 < t5 {
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
- if r <= rune3Max {
+ if r <= rune3Max || MaxRune < r {
return RuneError, 1, false
}
return r, 4, false
@@ -177,7 +177,7 @@ func decodeRuneInStringInternal(s string) (r rune, size int, short bool) {
// 4-byte, 21-bit sequence?
if c0 < t5 {
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
- if r <= rune3Max {
+ if r <= rune3Max || MaxRune < r {
return RuneError, 1, false
}
return r, 4, false
@@ -202,6 +202,9 @@ func FullRuneInString(s string) bool {
// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) {
r, size, _ = decodeRuneInternal(p)
return
@@ -209,6 +212,9 @@ func DecodeRune(p []byte) (r rune, size int) {
// DecodeRuneInString is like DecodeRune but its input is a string.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) {
r, size, _ = decodeRuneInStringInternal(s)
return
@@ -216,6 +222,9 @@ func DecodeRuneInString(s string) (r rune, size int) {
// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeLastRune(p []byte) (r rune, size int) {
end := len(p)
if end == 0 {
@@ -250,6 +259,9 @@ func DecodeLastRune(p []byte) (r rune, size int) {
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
func DecodeLastRuneInString(s string) (r rune, size int) {
end := len(s)
if end == 0 {
diff --git a/src/pkg/unicode/utf8/utf8_test.go b/src/pkg/unicode/utf8/utf8_test.go
index 4f73c8fb81..65e6c7e8b3 100644
--- a/src/pkg/unicode/utf8/utf8_test.go
+++ b/src/pkg/unicode/utf8/utf8_test.go
@@ -311,6 +311,11 @@ var validTests = []ValidTest{
{string([]byte{66, 250}), false},
{string([]byte{66, 250, 67}), false},
{"a\uFFFDb", true},
+ {string("\xF7\xBF\xBF\xBF"), true}, // U+1FFFFF
+ {string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
+ {string("\xc0\x80"), false}, // U+0000 encoded in two bytes: incorrect
+ // TODO {string("\xed\xa0\x80"), false }, // U+D800 high surrogate (sic)
+ // TODO {string("\xed\xbf\xbf"), false }, // U+DFFF low surrogate (sic)
}
func TestValid(t *testing.T) {