aboutsummaryrefslogtreecommitdiff
path: root/src/encoding/json/internal/jsonwire/encode.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/encoding/json/internal/jsonwire/encode.go')
-rw-r--r--src/encoding/json/internal/jsonwire/encode.go294
1 files changed, 294 insertions, 0 deletions
diff --git a/src/encoding/json/internal/jsonwire/encode.go b/src/encoding/json/internal/jsonwire/encode.go
new file mode 100644
index 0000000000..3901ff8bed
--- /dev/null
+++ b/src/encoding/json/internal/jsonwire/encode.go
@@ -0,0 +1,294 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsonwire
+
+import (
+ "math"
+ "slices"
+ "strconv"
+ "unicode/utf16"
+ "unicode/utf8"
+
+ "encoding/json/internal/jsonflags"
+)
+
+// escapeASCII reports whether the ASCII character needs to be escaped.
+// It conservatively assumes EscapeForHTML.
+var escapeASCII = [...]uint8{
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
+ 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+}
+
+// NeedEscape reports whether src needs escaping of any characters.
+// It conservatively assumes EscapeForHTML and EscapeForJS.
+// It reports true for inputs with invalid UTF-8.
+func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
+ var i int
+ for uint(len(src)) > uint(i) {
+ if c := src[i]; c < utf8.RuneSelf {
+ if escapeASCII[c] > 0 {
+ return true
+ }
+ i++
+ } else {
+ r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
+ if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
+ return true
+ }
+ i += rn
+ }
+ }
+ return false
+}
+
+// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
+//
+// It takes in flags and respects the following:
+// - EscapeForHTML escapes '<', '>', and '&'.
+// - EscapeForJS escapes '\u2028' and '\u2029'.
+// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
+//
+// Regardless of whether AllowInvalidUTF8 is specified,
+// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
+// If no escape flags are set, then the shortest representable form is used,
+// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
+func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
+ var i, n int
+ var hasInvalidUTF8 bool
+ dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
+ dst = append(dst, '"')
+ for uint(len(src)) > uint(n) {
+ if c := src[n]; c < utf8.RuneSelf {
+ // Handle single-byte ASCII.
+ n++
+ if escapeASCII[c] == 0 {
+ continue // no escaping possibly needed
+ }
+ // Handle escaping of single-byte ASCII.
+ if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
+ dst = append(dst, src[i:n-1]...)
+ dst = appendEscapedASCII(dst, c)
+ i = n
+ }
+ } else {
+ // Handle multi-byte Unicode.
+ r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
+ n += rn
+ if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
+ continue // no escaping possibly needed
+ }
+ // Handle escaping of multi-byte Unicode.
+ switch {
+ case isInvalidUTF8(r, rn):
+ hasInvalidUTF8 = true
+ dst = append(dst, src[i:n-rn]...)
+ if flags.Get(jsonflags.EscapeInvalidUTF8) {
+ dst = append(dst, `\ufffd`...)
+ } else {
+ dst = append(dst, "\ufffd"...)
+ }
+ i = n
+ case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
+ dst = append(dst, src[i:n-rn]...)
+ dst = appendEscapedUnicode(dst, r)
+ i = n
+ }
+ }
+ }
+ dst = append(dst, src[i:n]...)
+ dst = append(dst, '"')
+ if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
+ return dst, ErrInvalidUTF8
+ }
+ return dst, nil
+}
+
+func appendEscapedASCII(dst []byte, c byte) []byte {
+ switch c {
+ case '"', '\\':
+ dst = append(dst, '\\', c)
+ case '\b':
+ dst = append(dst, "\\b"...)
+ case '\f':
+ dst = append(dst, "\\f"...)
+ case '\n':
+ dst = append(dst, "\\n"...)
+ case '\r':
+ dst = append(dst, "\\r"...)
+ case '\t':
+ dst = append(dst, "\\t"...)
+ default:
+ dst = appendEscapedUTF16(dst, uint16(c))
+ }
+ return dst
+}
+
+func appendEscapedUnicode(dst []byte, r rune) []byte {
+ if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
+ dst = appendEscapedUTF16(dst, uint16(r1))
+ dst = appendEscapedUTF16(dst, uint16(r2))
+ } else {
+ dst = appendEscapedUTF16(dst, uint16(r))
+ }
+ return dst
+}
+
+func appendEscapedUTF16(dst []byte, x uint16) []byte {
+ const hex = "0123456789abcdef"
+ return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
+}
+
+// ReformatString consumes a JSON string from src and appends it to dst,
+// reformatting it if necessary according to the specified flags.
+// It returns the appended output and the number of consumed input bytes.
+func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
+ // TODO: Should this update ValueFlags as input?
+ var valFlags ValueFlags
+ n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
+ if err != nil {
+ return dst, n, err
+ }
+
+ // If the output requires no special escapes, and the input
+ // is already in canonical form or should be preserved verbatim,
+ // then directly copy the input to the output.
+ if !flags.Get(jsonflags.AnyEscape) &&
+ (valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
+ dst = append(dst, src[:n]...) // copy the string verbatim
+ return dst, n, nil
+ }
+
+ // Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
+ // remain escaped, however we still need to respect the
+ // [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
+ if flags.Get(jsonflags.PreserveRawStrings) {
+ var i, lastAppendIndex int
+ for i < n {
+ if c := src[i]; c < utf8.RuneSelf {
+ if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
+ dst = append(dst, src[lastAppendIndex:i]...)
+ dst = appendEscapedASCII(dst, c)
+ lastAppendIndex = i + 1
+ }
+ i++
+ } else {
+ r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
+ if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
+ dst = append(dst, src[lastAppendIndex:i]...)
+ dst = appendEscapedUnicode(dst, r)
+ lastAppendIndex = i + rn
+ }
+ i += rn
+ }
+ }
+ return append(dst, src[lastAppendIndex:n]...), n, nil
+ }
+
+ // The input contains characters that might need escaping,
+ // unnecessary escape sequences, or invalid UTF-8.
+ // Perform a round-trip unquote and quote to properly reformat
+ // these sequences according the current flags.
+ b, _ := AppendUnquote(nil, src[:n])
+ dst, _ = AppendQuote(dst, b, flags)
+ return dst, n, nil
+}
+
+// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
+// It formats numbers similar to the ES6 number-to-string conversion.
+// See https://go.dev/issue/14135.
+//
+// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
+// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
+// which is formatted as -0 instead of just 0.
+//
+// For 32-bit floating-point numbers,
+// the output is a 32-bit equivalent of the algorithm.
+// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
+func AppendFloat(dst []byte, src float64, bits int) []byte {
+ if bits == 32 {
+ src = float64(float32(src))
+ }
+
+ abs := math.Abs(src)
+ fmt := byte('f')
+ if abs != 0 {
+ if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
+ bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
+ fmt = 'e'
+ }
+ }
+ dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
+ if fmt == 'e' {
+ // Clean up e-09 to e-9.
+ n := len(dst)
+ if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
+ dst[n-2] = dst[n-1]
+ dst = dst[:n-1]
+ }
+ }
+ return dst
+}
+
+// ReformatNumber consumes a JSON string from src and appends it to dst,
+// canonicalizing it if specified.
+// It returns the appended output and the number of consumed input bytes.
+func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
+ n, err := ConsumeNumber(src)
+ if err != nil {
+ return dst, n, err
+ }
+ if !flags.Get(jsonflags.CanonicalizeNumbers) {
+ dst = append(dst, src[:n]...) // copy the number verbatim
+ return dst, n, nil
+ }
+
+ // Identify the kind of number.
+ var isFloat bool
+ for _, c := range src[:n] {
+ if c == '.' || c == 'e' || c == 'E' {
+ isFloat = true // has fraction or exponent
+ break
+ }
+ }
+
+ // Check if need to canonicalize this kind of number.
+ switch {
+ case string(src[:n]) == "-0":
+ break // canonicalize -0 as 0 regardless of kind
+ case isFloat:
+ if !flags.Get(jsonflags.CanonicalizeRawFloats) {
+ dst = append(dst, src[:n]...) // copy the number verbatim
+ return dst, n, nil
+ }
+ default:
+ // As an optimization, we can copy integer numbers below 2⁵³ verbatim
+ // since the canonical form is always identical.
+ const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
+ if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
+ dst = append(dst, src[:n]...) // copy the number verbatim
+ return dst, n, nil
+ }
+ }
+
+ // Parse and reformat the number (which uses a canonical format).
+ fv, _ := strconv.ParseFloat(string(src[:n]), 64)
+ switch {
+ case fv == 0:
+ fv = 0 // normalize negative zero as just zero
+ case math.IsInf(fv, +1):
+ fv = +math.MaxFloat64
+ case math.IsInf(fv, -1):
+ fv = -math.MaxFloat64
+ }
+ return AppendFloat(dst, fv, 64), n, nil
+}