From 22d377077c01ced185f5f8d27f608e8c6dcb473c Mon Sep 17 00:00:00 2001
From: Phil Pearl <philip.j.r.pearl@gmail.com>
Date: Sun, 13 Oct 2019 13:01:58 +0100
Subject: encoding/json: improve performance of Compact
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change improves performance of Compact by using a sync.Pool to allow re-use
of a scanner. This also has the side-effect of removing an allocation for each
field that implements Marshaler when marshalling JSON.

name               old time/op    new time/op    delta
EncodeMarshaler-8     118ns ± 2%     104ns ± 1%  -12.21%  (p=0.001 n=7+7)

name               old alloc/op   new alloc/op   delta
EncodeMarshaler-8      100B ± 0%       36B ± 0%  -64.00%  (p=0.000 n=8+8)

name               old allocs/op  new allocs/op  delta
EncodeMarshaler-8      3.00 ± 0%      2.00 ± 0%  -33.33%  (p=0.000 n=8+8)

Change-Id: Ic70c61a0a6354823da5220f5aad04b94c054f233
Reviewed-on: https://go-review.googlesource.com/c/go/+/200864
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/encoding/json/bench_test.go | 19 +++++++++++++++++++
 src/encoding/json/indent.go     | 16 +++++++++-------
 src/encoding/json/scanner.go    | 36 ++++++++++++++++++++++++++++++++----
 3 files changed, 60 insertions(+), 11 deletions(-)

(limited to 'src/encoding')

diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go
index f92d39f0c6..4a5fe7ec84 100644
--- a/src/encoding/json/bench_test.go
+++ b/src/encoding/json/bench_test.go
@@ -389,3 +389,22 @@ func BenchmarkTypeFieldsCache(b *testing.B) {
 		})
 	}
 }
+
+func BenchmarkEncodeMarshaler(b *testing.B) {
+	b.ReportAllocs()
+
+	m := struct {
+		A int
+		B RawMessage
+	}{}
+
+	b.RunParallel(func(pb *testing.PB) {
+		enc := NewEncoder(ioutil.Discard)
+
+		for pb.Next() {
+			if err := enc.Encode(&m); err != nil {
+				b.Fatal("Encode:", err)
+			}
+		}
+	})
+}
diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go
index 06adfc1263..2924d3b49b 100644
--- a/src/encoding/json/indent.go
+++ b/src/encoding/json/indent.go
@@ -4,7 +4,9 @@
 
 package json
 
-import "bytes"
+import (
+	"bytes"
+)
 
 // Compact appends to dst the JSON-encoded src with
 // insignificant space characters elided.
@@ -14,8 +16,8 @@ func Compact(dst *bytes.Buffer, src []byte) error {
 
 func compact(dst *bytes.Buffer, src []byte, escape bool) error {
 	origLen := dst.Len()
-	var scan scanner
-	scan.reset()
+	scan := newScanner()
+	defer freeScanner(scan)
 	start := 0
 	for i, c := range src {
 		if escape && (c == '<' || c == '>' || c == '&') {
@@ -36,7 +38,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error {
 			dst.WriteByte(hex[src[i+2]&0xF])
 			start = i + 3
 		}
-		v := scan.step(&scan, c)
+		v := scan.step(scan, c)
 		if v >= scanSkipSpace {
 			if v == scanError {
 				break
@@ -78,13 +80,13 @@ func newline(dst *bytes.Buffer, prefix, indent string, depth int) {
 // if src ends in a trailing newline, so will dst.
 func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
 	origLen := dst.Len()
-	var scan scanner
-	scan.reset()
+	scan := newScanner()
+	defer freeScanner(scan)
 	needIndent := false
 	depth := 0
 	for _, c := range src {
 		scan.bytes++
-		v := scan.step(&scan, c)
+		v := scan.step(scan, c)
 		if v == scanSkipSpace {
 			continue
 		}
diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go
index 88572245fc..552bd70360 100644
--- a/src/encoding/json/scanner.go
+++ b/src/encoding/json/scanner.go
@@ -13,11 +13,16 @@ package json
 // This file starts with two simple examples using the scanner
 // before diving into the scanner itself.
 
-import "strconv"
+import (
+	"strconv"
+	"sync"
+)
 
 // Valid reports whether data is a valid JSON encoding.
 func Valid(data []byte) bool {
-	return checkValid(data, &scanner{}) == nil
+	scan := newScanner()
+	defer freeScanner(scan)
+	return checkValid(data, scan) == nil
 }
 
 // checkValid verifies that data is valid JSON-encoded data.
@@ -45,7 +50,7 @@ type SyntaxError struct {
 func (e *SyntaxError) Error() string { return e.msg }
 
 // A scanner is a JSON scanning state machine.
-// Callers call scan.reset() and then pass bytes in one at a time
+// Callers call scan.reset and then pass bytes in one at a time
 // by calling scan.step(&scan, c) for each byte.
 // The return value, referred to as an opcode, tells the
 // caller about significant parsing events like beginning
@@ -72,10 +77,33 @@ type scanner struct {
 	// Error that happened, if any.
 	err error
 
-	// total bytes consumed, updated by decoder.Decode
+	// total bytes consumed, updated by decoder.Decode (and deliberately
+	// not set to zero by scan.reset)
 	bytes int64
 }
 
+var scannerPool = sync.Pool{
+	New: func() interface{} {
+		return &scanner{}
+	},
+}
+
+func newScanner() *scanner {
+	scan := scannerPool.Get().(*scanner)
+	// scan.reset by design doesn't set bytes to zero
+	scan.bytes = 0
+	scan.reset()
+	return scan
+}
+
+func freeScanner(scan *scanner) {
+	// Avoid hanging on to too much memory in extreme cases.
+	if len(scan.parseState) > 1024 {
+		scan.parseState = nil
+	}
+	scannerPool.Put(scan)
+}
+
 // These values are returned by the state transition functions
 // assigned to scanner.state and the method scanner.eof.
 // They give details about the current state of the scan that
-- 
cgit v1.3


From acbed0372ea000db8b1ea69eca9d7acecdf89469 Mon Sep 17 00:00:00 2001
From: Phil Pearl <philip.j.r.pearl@gmail.com>
Date: Sun, 27 Oct 2019 16:05:54 +0000
Subject: encoding/json: remove allocation when using a Marshaler with value
 receiver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we marshal a non-pointer struct field whose type implements Marshaler with
a non-pointer receiver, then we avoid an allocation if we take the address of
the field before casting it to an interface.

name               old time/op    new time/op    delta
EncodeMarshaler-8     104ns ± 1%      92ns ± 2%  -11.72%  (p=0.001 n=7+7)

name               old alloc/op   new alloc/op   delta
EncodeMarshaler-8     36.0B ± 0%      4.0B ± 0%  -88.89%  (p=0.000 n=8+8)

name               old allocs/op  new allocs/op  delta
EncodeMarshaler-8      2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=8+8)

Test coverage already looks good enough for this change. TestRefValMarshal
already covers all possible combinations of value & pointer receivers on
value and pointer struct fields.

Change-Id: I6fc7f72396396d98f9a90c3c86e813690f41c099
Reviewed-on: https://go-review.googlesource.com/c/go/+/203608
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/encoding/json/encode.go | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'src/encoding')

diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go
index a7473a7eba..b81e505866 100644
--- a/src/encoding/json/encode.go
+++ b/src/encoding/json/encode.go
@@ -399,19 +399,22 @@ var (
 // newTypeEncoder constructs an encoderFunc for a type.
 // The returned encoder only checks CanAddr when allowAddr is true.
 func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc {
-	if t.Implements(marshalerType) {
-		return marshalerEncoder
-	}
+	// If we have a non-pointer value whose type implements
+	// Marshaler with a value receiver, then we're better off taking
+	// the address of the value - otherwise we end up with an
+	// allocation as we cast the value to an interface.
 	if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(marshalerType) {
 		return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false))
 	}
-
-	if t.Implements(textMarshalerType) {
-		return textMarshalerEncoder
+	if t.Implements(marshalerType) {
+		return marshalerEncoder
 	}
 	if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(textMarshalerType) {
 		return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false))
 	}
+	if t.Implements(textMarshalerType) {
+		return textMarshalerEncoder
+	}
 
 	switch t.Kind() {
 	case reflect.Bool:
-- 
cgit v1.3


From a05934639bde593326f8d7ed9eb3f73f9ba6eb53 Mon Sep 17 00:00:00 2001
From: Sam Whited <sam@samwhited.com>
Date: Tue, 21 Aug 2018 17:11:30 -0500
Subject: encoding/xml: fix token decoder on early EOF

The documentation for TokenReader suggests that implementations of the
interface may return a token and io.EOF together, indicating that it is
the last token in the stream. This is similar to io.Reader. However, if
you wrap such a TokenReader in a Decoder it complained about the EOF.
A test was added to ensure this behavior on Decoder's.

Change-Id: I9083c91d9626180d3bcf5c069a017050f3c7c4a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/130556
Run-TryBot: Sam Whited <sam@samwhited.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/encoding/xml/xml.go      |  5 ++++-
 src/encoding/xml/xml_test.go | 45 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'src/encoding')

diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go
index ca059440a1..5e73dcf731 100644
--- a/src/encoding/xml/xml.go
+++ b/src/encoding/xml/xml.go
@@ -286,7 +286,10 @@ func (d *Decoder) Token() (Token, error) {
 		t = d.nextToken
 		d.nextToken = nil
 	} else if t, err = d.rawToken(); err != nil {
-		if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
+		switch {
+		case err == io.EOF && d.t != nil:
+			err = nil
+		case err == io.EOF && d.stk != nil && d.stk.kind != stkEOF:
 			err = d.syntaxError("unexpected EOF")
 		}
 		return t, err
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go
index ee4ffa2420..efddca43e9 100644
--- a/src/encoding/xml/xml_test.go
+++ b/src/encoding/xml/xml_test.go
@@ -14,6 +14,51 @@ import (
 	"unicode/utf8"
 )
 
+type toks struct {
+	earlyEOF bool
+	t        []Token
+}
+
+func (t *toks) Token() (Token, error) {
+	if len(t.t) == 0 {
+		return nil, io.EOF
+	}
+	var tok Token
+	tok, t.t = t.t[0], t.t[1:]
+	if t.earlyEOF && len(t.t) == 0 {
+		return tok, io.EOF
+	}
+	return tok, nil
+}
+
+func TestDecodeEOF(t *testing.T) {
+	start := StartElement{Name: Name{Local: "test"}}
+	t.Run("EarlyEOF", func(t *testing.T) {
+		d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{
+			start,
+			start.End(),
+		}})
+		err := d.Decode(&struct {
+			XMLName Name `xml:"test"`
+		}{})
+		if err != nil {
+			t.Error(err)
+		}
+	})
+	t.Run("LateEOF", func(t *testing.T) {
+		d := NewTokenDecoder(&toks{t: []Token{
+			start,
+			start.End(),
+		}})
+		err := d.Decode(&struct {
+			XMLName Name `xml:"test"`
+		}{})
+		if err != nil {
+			t.Error(err)
+		}
+	})
+}
+
 const testInput = `
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-- 
cgit v1.3


From 54697702e435bddb69c0b76b25b3209c78d2120a Mon Sep 17 00:00:00 2001
From: Daniel Martí <mvdan@mvdan.cc>
Date: Wed, 21 Aug 2019 18:22:24 +0200
Subject: encoding/json: avoid work when unquoting strings, take 2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a re-submission of CL 151157, since it was reverted in CL 190909
due to an introduced crash found by a fuzzer. The revert CL included
regression tests, while this CL includes a fixed version of the original
change.

In particular, what we forgot in the original optimization was that we
still need the length and trailing quote checks at the beginning of
unquoteBytes. Without those, we could end up in a crash later on.

We can work out how many bytes can be unquoted trivially in
rescanLiteral, which already iterates over a string's bytes.

Removing the extra loop in unquoteBytes simplifies the function and
speeds it up, especially when decoding simple strings, which are common.

While at it, we can remove the check that s[0]=='"', since all call
sites already meet that condition.

name           old time/op    new time/op    delta
CodeDecoder-8    10.6ms ± 2%    10.5ms ± 1%  -1.01%  (p=0.004 n=20+10)

name           old speed      new speed      delta
CodeDecoder-8   183MB/s ± 2%   185MB/s ± 1%  +1.02%  (p=0.003 n=20+10)

Updates #28923.

Change-Id: I8c6b13302bcd86a364bc998d72451332c0809cde
Reviewed-on: https://go-review.googlesource.com/c/go/+/190659
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Peter Weinberger <pjw@google.com>
---
 src/encoding/json/decode.go | 69 ++++++++++++++++++++++++---------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

(limited to 'src/encoding')

diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 86d8a69db7..b43484692e 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -213,6 +213,9 @@ type decodeState struct {
 	savedError            error
 	useNumber             bool
 	disallowUnknownFields bool
+	// safeUnquote is the number of current string literal bytes that don't
+	// need to be unquoted. When negative, no bytes need unquoting.
+	safeUnquote int
 }
 
 // readIndex returns the position of the last byte read.
@@ -314,13 +317,27 @@ func (d *decodeState) rescanLiteral() {
 Switch:
 	switch data[i-1] {
 	case '"': // string
+		// safeUnquote is initialized at -1, which means that all bytes
+		// checked so far can be unquoted at a later time with no work
+		// at all. When reaching the closing '"', if safeUnquote is
+		// still -1, all bytes can be unquoted with no work. Otherwise,
+		// only those bytes up until the first '\\' or non-ascii rune
+		// can be safely unquoted.
+		safeUnquote := -1
 		for ; i < len(data); i++ {
-			switch data[i] {
-			case '\\':
+			if c := data[i]; c == '\\' {
+				if safeUnquote < 0 { // first unsafe byte
+					safeUnquote = int(i - d.off)
+				}
 				i++ // escaped char
-			case '"':
+			} else if c == '"' {
+				d.safeUnquote = safeUnquote
 				i++ // tokenize the closing quote too
 				break Switch
+			} else if c >= utf8.RuneSelf {
+				if safeUnquote < 0 { // first unsafe byte
+					safeUnquote = int(i - d.off)
+				}
 			}
 		}
 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
@@ -674,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error {
 		start := d.readIndex()
 		d.rescanLiteral()
 		item := d.data[start:d.readIndex()]
-		key, ok := unquoteBytes(item)
+		key, ok := d.unquoteBytes(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -875,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
 			d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
 			return nil
 		}
-		s, ok := unquoteBytes(item)
+		s, ok := d.unquoteBytes(item)
 		if !ok {
 			if fromQuoted {
 				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -926,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
 		}
 
 	case '"': // string
-		s, ok := unquoteBytes(item)
+		s, ok := d.unquoteBytes(item)
 		if !ok {
 			if fromQuoted {
 				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1086,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
 		start := d.readIndex()
 		d.rescanLiteral()
 		item := d.data[start:d.readIndex()]
-		key, ok := unquote(item)
+		key, ok := d.unquote(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -1135,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} {
 		return c == 't'
 
 	case '"': // string
-		s, ok := unquote(item)
+		s, ok := d.unquote(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -1178,38 +1195,26 @@ func getu4(s []byte) rune {
 
 // unquote converts a quoted JSON string literal s into an actual string t.
 // The rules are different than for Go, so cannot use strconv.Unquote.
-func unquote(s []byte) (t string, ok bool) {
-	s, ok = unquoteBytes(s)
+// The first byte in s must be '"'.
+func (d *decodeState) unquote(s []byte) (t string, ok bool) {
+	s, ok = d.unquoteBytes(s)
 	t = string(s)
 	return
 }
 
-func unquoteBytes(s []byte) (t []byte, ok bool) {
-	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
+	// We already know that s[0] == '"'. However, we don't know that the
+	// closing quote exists in all cases, such as when the string is nested
+	// via the ",string" option.
+	if len(s) < 2 || s[len(s)-1] != '"' {
 		return
 	}
 	s = s[1 : len(s)-1]
 
-	// Check for unusual characters. If there are none,
-	// then no unquoting is needed, so return a slice of the
-	// original bytes.
-	r := 0
-	for r < len(s) {
-		c := s[r]
-		if c == '\\' || c == '"' || c < ' ' {
-			break
-		}
-		if c < utf8.RuneSelf {
-			r++
-			continue
-		}
-		rr, size := utf8.DecodeRune(s[r:])
-		if rr == utf8.RuneError && size == 1 {
-			break
-		}
-		r += size
-	}
-	if r == len(s) {
+	// If there are no unusual characters, no unquoting is needed, so return
+	// a slice of the original bytes.
+	r := d.safeUnquote
+	if r == -1 {
 		return s, true
 	}
 
-- 
cgit v1.3