From 22d377077c01ced185f5f8d27f608e8c6dcb473c Mon Sep 17 00:00:00 2001 From: Phil Pearl Date: Sun, 13 Oct 2019 13:01:58 +0100 Subject: encoding/json: improve performance of Compact MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change improves performance of Compact by using a sync.Pool to allow re-use of a scanner. This also has the side-effect of removing an allocation for each field that implements Marshaler when marshalling JSON. name old time/op new time/op delta EncodeMarshaler-8 118ns ± 2% 104ns ± 1% -12.21% (p=0.001 n=7+7) name old alloc/op new alloc/op delta EncodeMarshaler-8 100B ± 0% 36B ± 0% -64.00% (p=0.000 n=8+8) name old allocs/op new allocs/op delta EncodeMarshaler-8 3.00 ± 0% 2.00 ± 0% -33.33% (p=0.000 n=8+8) Change-Id: Ic70c61a0a6354823da5220f5aad04b94c054f233 Reviewed-on: https://go-review.googlesource.com/c/go/+/200864 Reviewed-by: Daniel Martí Run-TryBot: Daniel Martí TryBot-Result: Gobot Gobot --- src/encoding/json/bench_test.go | 19 +++++++++++++++++++ src/encoding/json/indent.go | 16 +++++++++------- src/encoding/json/scanner.go | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 60 insertions(+), 11 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go index f92d39f0c6..4a5fe7ec84 100644 --- a/src/encoding/json/bench_test.go +++ b/src/encoding/json/bench_test.go @@ -389,3 +389,22 @@ func BenchmarkTypeFieldsCache(b *testing.B) { }) } } + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(ioutil.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatal("Encode:", err) + } + } + }) +} diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go index 06adfc1263..2924d3b49b 100644 --- a/src/encoding/json/indent.go +++ b/src/encoding/json/indent.go @@ -4,7 +4,9 @@ package json -import "bytes" +import ( + "bytes" +) // Compact appends to dst the JSON-encoded src with // insignificant space characters elided. @@ -14,8 +16,8 @@ func Compact(dst *bytes.Buffer, src []byte) error { func compact(dst *bytes.Buffer, src []byte, escape bool) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) start := 0 for i, c := range src { if escape && (c == '<' || c == '>' || c == '&') { @@ -36,7 +38,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error { dst.WriteByte(hex[src[i+2]&0xF]) start = i + 3 } - v := scan.step(&scan, c) + v := scan.step(scan, c) if v >= scanSkipSpace { if v == scanError { break @@ -78,13 +80,13 @@ func newline(dst *bytes.Buffer, prefix, indent string, depth int) { // if src ends in a trailing newline, so will dst. func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) needIndent := false depth := 0 for _, c := range src { scan.bytes++ - v := scan.step(&scan, c) + v := scan.step(scan, c) if v == scanSkipSpace { continue } diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go index 88572245fc..552bd70360 100644 --- a/src/encoding/json/scanner.go +++ b/src/encoding/json/scanner.go @@ -13,11 +13,16 @@ package json // This file starts with two simple examples using the scanner // before diving into the scanner itself. -import "strconv" +import ( + "strconv" + "sync" +) // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { - return checkValid(data, &scanner{}) == nil + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil } // checkValid verifies that data is valid JSON-encoded data. @@ -45,7 +50,7 @@ type SyntaxError struct { func (e *SyntaxError) Error() string { return e.msg } // A scanner is a JSON scanning state machine. -// Callers call scan.reset() and then pass bytes in one at a time +// Callers call scan.reset and then pass bytes in one at a time // by calling scan.step(&scan, c) for each byte. // The return value, referred to as an opcode, tells the // caller about significant parsing events like beginning @@ -72,10 +77,33 @@ type scanner struct { // Error that happened, if any. err error - // total bytes consumed, updated by decoder.Decode + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) bytes int64 } +var scannerPool = sync.Pool{ + New: func() interface{} { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + // These values are returned by the state transition functions // assigned to scanner.state and the method scanner.eof. // They give details about the current state of the scan that -- cgit v1.3 From acbed0372ea000db8b1ea69eca9d7acecdf89469 Mon Sep 17 00:00:00 2001 From: Phil Pearl Date: Sun, 27 Oct 2019 16:05:54 +0000 Subject: encoding/json: remove allocation when using a Marshaler with value receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we marshal a non-pointer struct field whose type implements Marshaler with a non-pointer receiver, then we avoid an allocation if we take the address of the field before casting it to an interface. name old time/op new time/op delta EncodeMarshaler-8 104ns ± 1% 92ns ± 2% -11.72% (p=0.001 n=7+7) name old alloc/op new alloc/op delta EncodeMarshaler-8 36.0B ± 0% 4.0B ± 0% -88.89% (p=0.000 n=8+8) name old allocs/op new allocs/op delta EncodeMarshaler-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=8+8) Test coverage already looks good enough for this change. TestRefValMarshal already covers all possible combinations of value & pointer receivers on value and pointer struct fields. Change-Id: I6fc7f72396396d98f9a90c3c86e813690f41c099 Reviewed-on: https://go-review.googlesource.com/c/go/+/203608 Reviewed-by: Daniel Martí Run-TryBot: Daniel Martí TryBot-Result: Gobot Gobot --- src/encoding/json/encode.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go index a7473a7eba..b81e505866 100644 --- a/src/encoding/json/encode.go +++ b/src/encoding/json/encode.go @@ -399,19 +399,22 @@ var ( // newTypeEncoder constructs an encoderFunc for a type. // The returned encoder only checks CanAddr when allowAddr is true. func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { - if t.Implements(marshalerType) { - return marshalerEncoder - } + // If we have a non-pointer value whose type implements + // Marshaler with a value receiver, then we're better off taking + // the address of the value - otherwise we end up with an + // allocation as we cast the value to an interface. if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(marshalerType) { return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) } - - if t.Implements(textMarshalerType) { - return textMarshalerEncoder + if t.Implements(marshalerType) { + return marshalerEncoder } if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(textMarshalerType) { return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) } + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } switch t.Kind() { case reflect.Bool: -- cgit v1.3 From a05934639bde593326f8d7ed9eb3f73f9ba6eb53 Mon Sep 17 00:00:00 2001 From: Sam Whited Date: Tue, 21 Aug 2018 17:11:30 -0500 Subject: encoding/xml: fix token decoder on early EOF The documentation for TokenReader suggests that implementations of the interface may return a token and io.EOF together, indicating that it is the last token in the stream. This is similar to io.Reader. However, if you wrap such a TokenReader in a Decoder it complained about the EOF. A test was added to ensure this behavior on Decoder's. Change-Id: I9083c91d9626180d3bcf5c069a017050f3c7c4a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/130556 Run-TryBot: Sam Whited TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/encoding/xml/xml.go | 5 ++++- src/encoding/xml/xml_test.go | 45 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'src/encoding') diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go index ca059440a1..5e73dcf731 100644 --- a/src/encoding/xml/xml.go +++ b/src/encoding/xml/xml.go @@ -286,7 +286,10 @@ func (d *Decoder) Token() (Token, error) { t = d.nextToken d.nextToken = nil } else if t, err = d.rawToken(); err != nil { - if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + switch { + case err == io.EOF && d.t != nil: + err = nil + case err == io.EOF && d.stk != nil && d.stk.kind != stkEOF: err = d.syntaxError("unexpected EOF") } return t, err diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go index ee4ffa2420..efddca43e9 100644 --- a/src/encoding/xml/xml_test.go +++ b/src/encoding/xml/xml_test.go @@ -14,6 +14,51 @@ import ( "unicode/utf8" ) +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + t.Run("EarlyEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) + t.Run("LateEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) +} + const testInput = ` Date: Wed, 21 Aug 2019 18:22:24 +0200 Subject: encoding/json: avoid work when unquoting strings, take 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a re-submission of CL 151157, since it was reverted in CL 190909 due to an introduced crash found by a fuzzer. The revert CL included regression tests, while this CL includes a fixed version of the original change. In particular, what we forgot in the original optimization was that we still need the length and trailing quote checks at the beginning of unquoteBytes. Without those, we could end up in a crash later on. We can work out how many bytes can be unquoted trivially in rescanLiteral, which already iterates over a string's bytes. Removing the extra loop in unquoteBytes simplifies the function and speeds it up, especially when decoding simple strings, which are common. While at it, we can remove the check that s[0]=='"', since all call sites already meet that condition. name old time/op new time/op delta CodeDecoder-8 10.6ms ± 2% 10.5ms ± 1% -1.01% (p=0.004 n=20+10) name old speed new speed delta CodeDecoder-8 183MB/s ± 2% 185MB/s ± 1% +1.02% (p=0.003 n=20+10) Updates #28923. Change-Id: I8c6b13302bcd86a364bc998d72451332c0809cde Reviewed-on: https://go-review.googlesource.com/c/go/+/190659 Run-TryBot: Daniel Martí TryBot-Result: Gobot Gobot Reviewed-by: Peter Weinberger --- src/encoding/json/decode.go | 69 ++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 32 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index 86d8a69db7..b43484692e 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -213,6 +213,9 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool + // safeUnquote is the number of current string literal bytes that don't + // need to be unquoted. When negative, no bytes need unquoting. + safeUnquote int } // readIndex returns the position of the last byte read. @@ -314,13 +317,27 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string + // safeUnquote is initialized at -1, which means that all bytes + // checked so far can be unquoted at a later time with no work + // at all. When reaching the closing '"', if safeUnquote is + // still -1, all bytes can be unquoted with no work. Otherwise, + // only those bytes up until the first '\\' or non-ascii rune + // can be safely unquoted. + safeUnquote := -1 for ; i < len(data); i++ { - switch data[i] { - case '\\': + if c := data[i]; c == '\\' { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } i++ // escaped char - case '"': + } else if c == '"' { + d.safeUnquote = safeUnquote i++ // tokenize the closing quote too break Switch + } else if c >= utf8.RuneSelf { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -674,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) + key, ok := d.unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -875,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -926,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1086,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquote(item) + key, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1135,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := unquote(item) + s, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1178,38 +1195,26 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) +// The first byte in s must be '"'. +func (d *decodeState) unquote(s []byte) (t string, ok bool) { + s, ok = d.unquoteBytes(s) t = string(s) return } -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { + // We already know that s[0] == '"'. However, we don't know that the + // closing quote exists in all cases, such as when the string is nested + // via the ",string" option. + if len(s) < 2 || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { + // If there are no unusual characters, no unquoting is needed, so return + // a slice of the original bytes. + r := d.safeUnquote + if r == -1 { return s, true } -- cgit v1.3