diff options
Diffstat (limited to 'src/encoding')
| -rw-r--r-- | src/encoding/json/bench_test.go | 19 | ||||
| -rw-r--r-- | src/encoding/json/decode.go | 69 | ||||
| -rw-r--r-- | src/encoding/json/encode.go | 15 | ||||
| -rw-r--r-- | src/encoding/json/indent.go | 16 | ||||
| -rw-r--r-- | src/encoding/json/scanner.go | 36 | ||||
| -rw-r--r-- | src/encoding/xml/xml.go | 5 | ||||
| -rw-r--r-- | src/encoding/xml/xml_test.go | 45 |
7 files changed, 155 insertions, 50 deletions
diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go index f92d39f0c6..4a5fe7ec84 100644 --- a/src/encoding/json/bench_test.go +++ b/src/encoding/json/bench_test.go @@ -389,3 +389,22 @@ func BenchmarkTypeFieldsCache(b *testing.B) { }) } } + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(ioutil.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatal("Encode:", err) + } + } + }) +} diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index 86d8a69db7..b43484692e 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -213,6 +213,9 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool + // safeUnquote is the number of current string literal bytes that don't + // need to be unquoted. When negative, no bytes need unquoting. + safeUnquote int } // readIndex returns the position of the last byte read. @@ -314,13 +317,27 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string + // safeUnquote is initialized at -1, which means that all bytes + // checked so far can be unquoted at a later time with no work + // at all. When reaching the closing '"', if safeUnquote is + // still -1, all bytes can be unquoted with no work. Otherwise, + // only those bytes up until the first '\\' or non-ascii rune + // can be safely unquoted. + safeUnquote := -1 for ; i < len(data); i++ { - switch data[i] { - case '\\': + if c := data[i]; c == '\\' { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } i++ // escaped char - case '"': + } else if c == '"' { + d.safeUnquote = safeUnquote i++ // tokenize the closing quote too break Switch + } else if c >= utf8.RuneSelf { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -674,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) + key, ok := d.unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -875,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -926,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1086,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquote(item) + key, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1135,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := unquote(item) + s, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1178,38 +1195,26 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) +// The first byte in s must be '"'. +func (d *decodeState) unquote(s []byte) (t string, ok bool) { + s, ok = d.unquoteBytes(s) t = string(s) return } -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { + // We already know that s[0] == '"'. However, we don't know that the + // closing quote exists in all cases, such as when the string is nested + // via the ",string" option. + if len(s) < 2 || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { + // If there are no unusual characters, no unquoting is needed, so return + // a slice of the original bytes. + r := d.safeUnquote + if r == -1 { return s, true } diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go index a7473a7eba..b81e505866 100644 --- a/src/encoding/json/encode.go +++ b/src/encoding/json/encode.go @@ -399,19 +399,22 @@ var ( // newTypeEncoder constructs an encoderFunc for a type. // The returned encoder only checks CanAddr when allowAddr is true. func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { - if t.Implements(marshalerType) { - return marshalerEncoder - } + // If we have a non-pointer value whose type implements + // Marshaler with a value receiver, then we're better off taking + // the address of the value - otherwise we end up with an + // allocation as we cast the value to an interface. if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(marshalerType) { return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) } - - if t.Implements(textMarshalerType) { - return textMarshalerEncoder + if t.Implements(marshalerType) { + return marshalerEncoder } if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(textMarshalerType) { return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) } + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } switch t.Kind() { case reflect.Bool: diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go index 06adfc1263..2924d3b49b 100644 --- a/src/encoding/json/indent.go +++ b/src/encoding/json/indent.go @@ -4,7 +4,9 @@ package json -import "bytes" +import ( + "bytes" +) // Compact appends to dst the JSON-encoded src with // insignificant space characters elided. @@ -14,8 +16,8 @@ func Compact(dst *bytes.Buffer, src []byte) error { func compact(dst *bytes.Buffer, src []byte, escape bool) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) start := 0 for i, c := range src { if escape && (c == '<' || c == '>' || c == '&') { @@ -36,7 +38,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error { dst.WriteByte(hex[src[i+2]&0xF]) start = i + 3 } - v := scan.step(&scan, c) + v := scan.step(scan, c) if v >= scanSkipSpace { if v == scanError { break @@ -78,13 +80,13 @@ func newline(dst *bytes.Buffer, prefix, indent string, depth int) { // if src ends in a trailing newline, so will dst. func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) needIndent := false depth := 0 for _, c := range src { scan.bytes++ - v := scan.step(&scan, c) + v := scan.step(scan, c) if v == scanSkipSpace { continue } diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go index 88572245fc..552bd70360 100644 --- a/src/encoding/json/scanner.go +++ b/src/encoding/json/scanner.go @@ -13,11 +13,16 @@ package json // This file starts with two simple examples using the scanner // before diving into the scanner itself. -import "strconv" +import ( + "strconv" + "sync" +) // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { - return checkValid(data, &scanner{}) == nil + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil } // checkValid verifies that data is valid JSON-encoded data. @@ -45,7 +50,7 @@ type SyntaxError struct { func (e *SyntaxError) Error() string { return e.msg } // A scanner is a JSON scanning state machine. -// Callers call scan.reset() and then pass bytes in one at a time +// Callers call scan.reset and then pass bytes in one at a time // by calling scan.step(&scan, c) for each byte. // The return value, referred to as an opcode, tells the // caller about significant parsing events like beginning @@ -72,10 +77,33 @@ type scanner struct { // Error that happened, if any. err error - // total bytes consumed, updated by decoder.Decode + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) bytes int64 } +var scannerPool = sync.Pool{ + New: func() interface{} { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + // These values are returned by the state transition functions // assigned to scanner.state and the method scanner.eof. // They give details about the current state of the scan that diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go index ca059440a1..5e73dcf731 100644 --- a/src/encoding/xml/xml.go +++ b/src/encoding/xml/xml.go @@ -286,7 +286,10 @@ func (d *Decoder) Token() (Token, error) { t = d.nextToken d.nextToken = nil } else if t, err = d.rawToken(); err != nil { - if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + switch { + case err == io.EOF && d.t != nil: + err = nil + case err == io.EOF && d.stk != nil && d.stk.kind != stkEOF: err = d.syntaxError("unexpected EOF") } return t, err diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go index ee4ffa2420..efddca43e9 100644 --- a/src/encoding/xml/xml_test.go +++ b/src/encoding/xml/xml_test.go @@ -14,6 +14,51 @@ import ( "unicode/utf8" ) +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + t.Run("EarlyEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) + t.Run("LateEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) +} + const testInput = ` <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
