diff options
Diffstat (limited to 'src/encoding/json/decode.go')
| -rw-r--r-- | src/encoding/json/decode.go | 69 |
1 files changed, 37 insertions, 32 deletions
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index 86d8a69db7..b43484692e 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -213,6 +213,9 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool + // safeUnquote is the number of current string literal bytes that don't + // need to be unquoted. When negative, no bytes need unquoting. + safeUnquote int } // readIndex returns the position of the last byte read. @@ -314,13 +317,27 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string + // safeUnquote is initialized at -1, which means that all bytes + // checked so far can be unquoted at a later time with no work + // at all. When reaching the closing '"', if safeUnquote is + // still -1, all bytes can be unquoted with no work. Otherwise, + // only those bytes up until the first '\\' or non-ascii rune + // can be safely unquoted. + safeUnquote := -1 for ; i < len(data); i++ { - switch data[i] { - case '\\': + if c := data[i]; c == '\\' { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } i++ // escaped char - case '"': + } else if c == '"' { + d.safeUnquote = safeUnquote i++ // tokenize the closing quote too break Switch + } else if c >= utf8.RuneSelf { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -674,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) + key, ok := d.unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -875,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -926,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1086,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquote(item) + key, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1135,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := unquote(item) + s, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1178,38 +1195,26 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) +// The first byte in s must be '"'. +func (d *decodeState) unquote(s []byte) (t string, ok bool) { + s, ok = d.unquoteBytes(s) t = string(s) return } -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { + // We already know that s[0] == '"'. However, we don't know that the + // closing quote exists in all cases, such as when the string is nested + // via the ",string" option. + if len(s) < 2 || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { + // If there are no unusual characters, no unquoting is needed, so return + // a slice of the original bytes. + r := d.safeUnquote + if r == -1 { return s, true } |
