7 files changed, 155 insertions, 50 deletions
diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go
index f92d39f0c6..4a5fe7ec84 100644
--- a/src/encoding/json/bench_test.go
+++ b/src/encoding/json/bench_test.go
@@ -389,3 +389,22 @@ func BenchmarkTypeFieldsCache(b *testing.B) {
 		})
 	}
 }
+
+func BenchmarkEncodeMarshaler(b *testing.B) {
+	b.ReportAllocs()
+
+	m := struct {
+		A int
+		B RawMessage
+	}{}
+
+	b.RunParallel(func(pb *testing.PB) {
+		enc := NewEncoder(ioutil.Discard)
+
+		for pb.Next() {
+			if err := enc.Encode(&m); err != nil {
+				b.Fatal("Encode:", err)
+			}
+		}
+	})
+}
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 86d8a69db7..b43484692e 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -213,6 +213,9 @@ type decodeState struct {
 	savedError            error
 	useNumber             bool
 	disallowUnknownFields bool
+	// safeUnquote is the number of current string literal bytes that don't
+	// need to be unquoted. When negative, no bytes need unquoting.
+	safeUnquote int
 }
 
 // readIndex returns the position of the last byte read.
@@ -314,13 +317,27 @@ func (d *decodeState) rescanLiteral() {
 Switch:
 	switch data[i-1] {
 	case '"': // string
+		// safeUnquote is initialized at -1, which means that all bytes
+		// checked so far can be unquoted at a later time with no work
+		// at all. When reaching the closing '"', if safeUnquote is
+		// still -1, all bytes can be unquoted with no work. Otherwise,
+		// only those bytes up until the first '\\' or non-ascii rune
+		// can be safely unquoted.
+		safeUnquote := -1
 		for ; i < len(data); i++ {
-			switch data[i] {
-			case '\\':
+			if c := data[i]; c == '\\' {
+				if safeUnquote < 0 { // first unsafe byte
+					safeUnquote = int(i - d.off)
+				}
 				i++ // escaped char
-			case '"':
+			} else if c == '"' {
+				d.safeUnquote = safeUnquote
 				i++ // tokenize the closing quote too
 				break Switch
+			} else if c >= utf8.RuneSelf {
+				if safeUnquote < 0 { // first unsafe byte
+					safeUnquote = int(i - d.off)
+				}
 			}
 		}
 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
@@ -674,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error {
 		start := d.readIndex()
 		d.rescanLiteral()
 		item := d.data[start:d.readIndex()]
-		key, ok := unquoteBytes(item)
+		key, ok := d.unquoteBytes(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -875,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
 			d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
 			return nil
 		}
-		s, ok := unquoteBytes(item)
+		s, ok := d.unquoteBytes(item)
 		if !ok {
 			if fromQuoted {
 				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -926,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
 		}
 
 	case '"': // string
-		s, ok := unquoteBytes(item)
+		s, ok := d.unquoteBytes(item)
 		if !ok {
 			if fromQuoted {
 				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1086,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
 		start := d.readIndex()
 		d.rescanLiteral()
 		item := d.data[start:d.readIndex()]
-		key, ok := unquote(item)
+		key, ok := d.unquote(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -1135,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} {
 		return c == 't'
 
 	case '"': // string
-		s, ok := unquote(item)
+		s, ok := d.unquote(item)
 		if !ok {
 			panic(phasePanicMsg)
 		}
@@ -1178,38 +1195,26 @@ func getu4(s []byte) rune {
 
 // unquote converts a quoted JSON string literal s into an actual string t.
 // The rules are different than for Go, so cannot use strconv.Unquote.
-func unquote(s []byte) (t string, ok bool) {
-	s, ok = unquoteBytes(s)
+// The first byte in s must be '"'.
+func (d *decodeState) unquote(s []byte) (t string, ok bool) {
+	s, ok = d.unquoteBytes(s)
 	t = string(s)
 	return
 }
 
-func unquoteBytes(s []byte) (t []byte, ok bool) {
-	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
+	// We already know that s[0] == '"'. However, we don't know that the
+	// closing quote exists in all cases, such as when the string is nested
+	// via the ",string" option.
+	if len(s) < 2 || s[len(s)-1] != '"' {
 		return
 	}
 	s = s[1 : len(s)-1]
 
-	// Check for unusual characters. If there are none,
-	// then no unquoting is needed, so return a slice of the
-	// original bytes.
-	r := 0
-	for r < len(s) {
-		c := s[r]
-		if c == '\\' || c == '"' || c < ' ' {
-			break
-		}
-		if c < utf8.RuneSelf {
-			r++
-			continue
-		}
-		rr, size := utf8.DecodeRune(s[r:])
-		if rr == utf8.RuneError && size == 1 {
-			break
-		}
-		r += size
-	}
-	if r == len(s) {
+	// If there are no unusual characters, no unquoting is needed, so return
+	// a slice of the original bytes.
+	r := d.safeUnquote
+	if r == -1 {
 		return s, true
 	}
 
diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go
index a7473a7eba..b81e505866 100644
--- a/src/encoding/json/encode.go
+++ b/src/encoding/json/encode.go
@@ -399,19 +399,22 @@ var (
 // newTypeEncoder constructs an encoderFunc for a type.
 // The returned encoder only checks CanAddr when allowAddr is true.
 func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc {
-	if t.Implements(marshalerType) {
-		return marshalerEncoder
-	}
+	// If we have a non-pointer value whose type implements
+	// Marshaler with a value receiver, then we're better off taking
+	// the address of the value - otherwise we end up with an
+	// allocation as we cast the value to an interface.
 	if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(marshalerType) {
 		return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false))
 	}
-
-	if t.Implements(textMarshalerType) {
-		return textMarshalerEncoder
+	if t.Implements(marshalerType) {
+		return marshalerEncoder
 	}
 	if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(textMarshalerType) {
 		return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false))
 	}
+	if t.Implements(textMarshalerType) {
+		return textMarshalerEncoder
+	}
 
 	switch t.Kind() {
 	case reflect.Bool:
diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go
index 06adfc1263..2924d3b49b 100644
--- a/src/encoding/json/indent.go
+++ b/src/encoding/json/indent.go
@@ -4,7 +4,9 @@
 
 package json
 
-import "bytes"
+import (
+	"bytes"
+)
 
 // Compact appends to dst the JSON-encoded src with
 // insignificant space characters elided.
@@ -14,8 +16,8 @@ func Compact(dst *bytes.Buffer, src []byte) error {
 
 func compact(dst *bytes.Buffer, src []byte, escape bool) error {
 	origLen := dst.Len()
-	var scan scanner
-	scan.reset()
+	scan := newScanner()
+	defer freeScanner(scan)
 	start := 0
 	for i, c := range src {
 		if escape && (c == '<' || c == '>' || c == '&') {
@@ -36,7 +38,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error {
 			dst.WriteByte(hex[src[i+2]&0xF])
 			start = i + 3
 		}
-		v := scan.step(&scan, c)
+		v := scan.step(scan, c)
 		if v >= scanSkipSpace {
 			if v == scanError {
 				break
@@ -78,13 +80,13 @@ func newline(dst *bytes.Buffer, prefix, indent string, depth int) {
 // if src ends in a trailing newline, so will dst.
 func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
 	origLen := dst.Len()
-	var scan scanner
-	scan.reset()
+	scan := newScanner()
+	defer freeScanner(scan)
 	needIndent := false
 	depth := 0
 	for _, c := range src {
 		scan.bytes++
-		v := scan.step(&scan, c)
+		v := scan.step(scan, c)
 		if v == scanSkipSpace {
 			continue
 		}
diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go
index 88572245fc..552bd70360 100644
--- a/src/encoding/json/scanner.go
+++ b/src/encoding/json/scanner.go
@@ -13,11 +13,16 @@ package json
 // This file starts with two simple examples using the scanner
 // before diving into the scanner itself.
 
-import "strconv"
+import (
+	"strconv"
+	"sync"
+)
 
 // Valid reports whether data is a valid JSON encoding.
 func Valid(data []byte) bool {
-	return checkValid(data, &scanner{}) == nil
+	scan := newScanner()
+	defer freeScanner(scan)
+	return checkValid(data, scan) == nil
 }
 
 // checkValid verifies that data is valid JSON-encoded data.
@@ -45,7 +50,7 @@ type SyntaxError struct {
 func (e *SyntaxError) Error() string { return e.msg }
 
 // A scanner is a JSON scanning state machine.
-// Callers call scan.reset() and then pass bytes in one at a time
+// Callers call scan.reset and then pass bytes in one at a time
 // by calling scan.step(&scan, c) for each byte.
 // The return value, referred to as an opcode, tells the
 // caller about significant parsing events like beginning
@@ -72,10 +77,33 @@ type scanner struct {
 	// Error that happened, if any.
 	err error
 
-	// total bytes consumed, updated by decoder.Decode
+	// total bytes consumed, updated by decoder.Decode (and deliberately
+	// not set to zero by scan.reset)
 	bytes int64
 }
 
+var scannerPool = sync.Pool{
+	New: func() interface{} {
+		return &scanner{}
+	},
+}
+
+func newScanner() *scanner {
+	scan := scannerPool.Get().(*scanner)
+	// scan.reset by design doesn't set bytes to zero
+	scan.bytes = 0
+	scan.reset()
+	return scan
+}
+
+func freeScanner(scan *scanner) {
+	// Avoid hanging on to too much memory in extreme cases.
+	if len(scan.parseState) > 1024 {
+		scan.parseState = nil
+	}
+	scannerPool.Put(scan)
+}
+
 // These values are returned by the state transition functions
 // assigned to scanner.state and the method scanner.eof.
 // They give details about the current state of the scan that
diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go
index ca059440a1..5e73dcf731 100644
--- a/src/encoding/xml/xml.go
+++ b/src/encoding/xml/xml.go
@@ -286,7 +286,10 @@ func (d *Decoder) Token() (Token, error) {
 		t = d.nextToken
 		d.nextToken = nil
 	} else if t, err = d.rawToken(); err != nil {
-		if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
+		switch {
+		case err == io.EOF && d.t != nil:
+			err = nil
+		case err == io.EOF && d.stk != nil && d.stk.kind != stkEOF:
 			err = d.syntaxError("unexpected EOF")
 		}
 		return t, err
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go
index ee4ffa2420..efddca43e9 100644
--- a/src/encoding/xml/xml_test.go
+++ b/src/encoding/xml/xml_test.go
@@ -14,6 +14,51 @@ import (
 	"unicode/utf8"
 )
 
+type toks struct {
+	earlyEOF bool
+	t        []Token
+}
+
+func (t *toks) Token() (Token, error) {
+	if len(t.t) == 0 {
+		return nil, io.EOF
+	}
+	var tok Token
+	tok, t.t = t.t[0], t.t[1:]
+	if t.earlyEOF && len(t.t) == 0 {
+		return tok, io.EOF
+	}
+	return tok, nil
+}
+
+func TestDecodeEOF(t *testing.T) {
+	start := StartElement{Name: Name{Local: "test"}}
+	t.Run("EarlyEOF", func(t *testing.T) {
+		d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{
+			start,
+			start.End(),
+		}})
+		err := d.Decode(&struct {
+			XMLName Name `xml:"test"`
+		}{})
+		if err != nil {
+			t.Error(err)
+		}
+	})
+	t.Run("LateEOF", func(t *testing.T) {
+		d := NewTokenDecoder(&toks{t: []Token{
+			start,
+			start.End(),
+		}})
+		err := d.Decode(&struct {
+			XMLName Name `xml:"test"`
+		}{})
+		if err != nil {
+			t.Error(err)
+		}
+	})
+}
+
 const testInput = `
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"