diff options
| author | Damien Neil <dneil@google.com> | 2025-04-11 14:19:51 -0700 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-04-18 08:24:07 -0700 |
| commit | 0e17905793cb5e0acc323a0cdf3733199d93976a (patch) | |
| tree | fec117ceb6b56866e6c51e6acd72901cf91717ce /src/encoding/json/jsontext/encode.go | |
| parent | c889004615b40535ebd5054cbcf2deebdb3a299a (diff) | |
| download | go-0e17905793cb5e0acc323a0cdf3733199d93976a.tar.xz | |
encoding/json: add json/v2 with GOEXPERIMENT=jsonv2 guard
This imports the proposed new v2 JSON API implemented in
github.com/go-json-experiment/json as of commit
d3c622f1b874954c355e60c8e6b6baa5f60d2fed.
When GOEXPERIMENT=jsonv2 is set, the encoding/json/v2 and
encoding/jsontext packages are visible, the encoding/json
package is implemented in terms of encoding/json/v2, and
the encoding/json package include various additional APIs.
(See #71497 for details.)
When GOEXPERIMENT=jsonv2 is not set, the new API is not
present and the encoding/json package is unchanged.
The experimental API is not bound by the Go compatibility
promise and is expected to evolve as updates are made to
the json/v2 proposal.
The contents of encoding/json/internal/jsontest/testdata
are compressed with zstd v1.5.7 with the -19 option.
Fixes #71845
For #71497
Change-Id: Ib8c94e5f0586b6aaa22833190b41cf6ef59f4f01
Reviewed-on: https://go-review.googlesource.com/c/go/+/665796
Auto-Submit: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src/encoding/json/jsontext/encode.go')
| -rw-r--r-- | src/encoding/json/jsontext/encode.go | 972 |
1 files changed, 972 insertions, 0 deletions
diff --git a/src/encoding/json/jsontext/encode.go b/src/encoding/json/jsontext/encode.go new file mode 100644 index 0000000000..a1e6307adc --- /dev/null +++ b/src/encoding/json/jsontext/encode.go @@ -0,0 +1,972 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.jsonv2 + +package jsontext + +import ( + "bytes" + "io" + "math/bits" + + "encoding/json/internal/jsonflags" + "encoding/json/internal/jsonopts" + "encoding/json/internal/jsonwire" +) + +// Encoder is a streaming encoder from raw JSON tokens and values. +// It is used to write a stream of top-level JSON values, +// each terminated with a newline character. +// +// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved. +// For example, the following JSON value: +// +// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}} +// +// can be composed with the following calls (ignoring errors for brevity): +// +// e.WriteToken(BeginObject) // { +// e.WriteToken(String("name")) // "name" +// e.WriteToken(String("value")) // "value" +// e.WriteValue(Value(`"array"`)) // "array" +// e.WriteToken(BeginArray) // [ +// e.WriteToken(Null) // null +// e.WriteToken(False) // false +// e.WriteValue(Value("true")) // true +// e.WriteToken(Float(3.14159)) // 3.14159 +// e.WriteToken(EndArray) // ] +// e.WriteValue(Value(`"object"`)) // "object" +// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"} +// e.WriteToken(EndObject) // } +// +// The above is one of many possible sequence of calls and +// may not represent the most sensible method to call for any given token/value. +// For example, it is probably more common to call [Encoder.WriteToken] with a string +// for object names. +type Encoder struct { + s encoderState +} + +// encoderState is the low-level state of Encoder. +// It has exported fields and method for use by the "json" package. +type encoderState struct { + state + encodeBuffer + jsonopts.Struct + + SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers +} + +// encodeBuffer is a buffer split into 2 segments: +// +// - buf[0:len(buf)] // written (but unflushed) portion of the buffer +// - buf[len(buf):cap(buf)] // unused portion of the buffer +type encodeBuffer struct { + Buf []byte // may alias wr if it is a bytes.Buffer + + // baseOffset is added to len(buf) to obtain the absolute offset + // relative to the start of io.Writer stream. + baseOffset int64 + + wr io.Writer + + // maxValue is the approximate maximum Value size passed to WriteValue. + maxValue int + // unusedCache is the buffer returned by the UnusedBuffer method. + unusedCache []byte + // bufStats is statistics about buffer utilization. + // It is only used with pooled encoders in pools.go. + bufStats bufferStatistics +} + +// NewEncoder constructs a new streaming encoder writing to w +// configured with the provided options. +// It flushes the internal buffer when the buffer is sufficiently full or +// when a top-level value has been written. +// +// If w is a [bytes.Buffer], then the encoder appends directly into the buffer +// without copying the contents from an intermediate buffer. +func NewEncoder(w io.Writer, opts ...Options) *Encoder { + e := new(Encoder) + e.Reset(w, opts...) + return e +} + +// Reset resets an encoder such that it is writing afresh to w and +// configured with the provided options. Reset must not be called on +// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method +// or the [encoding/json/v2.MarshalToFunc] function. +func (e *Encoder) Reset(w io.Writer, opts ...Options) { + switch { + case e == nil: + panic("jsontext: invalid nil Encoder") + case w == nil: + panic("jsontext: invalid nil io.Writer") + case e.s.Flags.Get(jsonflags.WithinArshalCall): + panic("jsontext: cannot reset Encoder passed to json.MarshalerTo") + } + e.s.reset(nil, w, opts...) +} + +func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) { + e.state.reset() + e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats} + if bb, ok := w.(*bytes.Buffer); ok && bb != nil { + e.Buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb + } + opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts + opts2.Join(opts...) + e.Struct = opts2 + if e.Flags.Get(jsonflags.Multiline) { + if !e.Flags.Has(jsonflags.SpaceAfterColon) { + e.Flags.Set(jsonflags.SpaceAfterColon | 1) + } + if !e.Flags.Has(jsonflags.SpaceAfterComma) { + e.Flags.Set(jsonflags.SpaceAfterComma | 0) + } + if !e.Flags.Has(jsonflags.Indent) { + e.Flags.Set(jsonflags.Indent | 1) + e.Indent = "\t" + } + } +} + +// Options returns the options used to construct the decoder and +// may additionally contain semantic options passed to a +// [encoding/json/v2.MarshalEncode] call. +// +// If operating within +// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or +// a [encoding/json/v2.MarshalToFunc] function call, +// then the returned options are only valid within the call. +func (e *Encoder) Options() Options { + return &e.s.Struct +} + +// NeedFlush determines whether to flush at this point. +func (e *encoderState) NeedFlush() bool { + // NOTE: This function is carefully written to be inlinable. + + // Avoid flushing if e.wr is nil since there is no underlying writer. + // Flush if less than 25% of the capacity remains. + // Flushing at some constant fraction ensures that the buffer stops growing + // so long as the largest Token or Value fits within that unused capacity. + return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4) +} + +// Flush flushes the buffer to the underlying io.Writer. +// It may append a trailing newline after the top-level value. +func (e *encoderState) Flush() error { + if e.wr == nil || e.avoidFlush() { + return nil + } + + // In streaming mode, always emit a newline after the top-level value. + if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) { + e.Buf = append(e.Buf, '\n') + } + + // Inform objectNameStack that we are about to flush the buffer content. + e.Names.copyQuotedBuffer(e.Buf) + + // Specialize bytes.Buffer for better performance. + if bb, ok := e.wr.(*bytes.Buffer); ok { + // If e.buf already aliases the internal buffer of bb, + // then the Write call simply increments the internal offset, + // otherwise Write operates as expected. + // See https://go.dev/issue/42986. + n, _ := bb.Write(e.Buf) // never fails unless bb is nil + e.baseOffset += int64(n) + + // If the internal buffer of bytes.Buffer is too small, + // append operations elsewhere in the Encoder may grow the buffer. + // This would be semantically correct, but hurts performance. + // As such, ensure 25% of the current length is always available + // to reduce the probability that other appends must allocate. + if avail := bb.Available(); avail < bb.Len()/4 { + bb.Grow(avail + 1) + } + + e.Buf = bb.AvailableBuffer() + return nil + } + + // Flush the internal buffer to the underlying io.Writer. + n, err := e.wr.Write(e.Buf) + e.baseOffset += int64(n) + if err != nil { + // In the event of an error, preserve the unflushed portion. + // Thus, write errors aren't fatal so long as the io.Writer + // maintains consistent state after errors. + if n > 0 { + e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])] + } + return &ioError{action: "write", err: err} + } + e.Buf = e.Buf[:0] + + // Check whether to grow the buffer. + // Note that cap(e.buf) may already exceed maxBufferSize since + // an append elsewhere already grew it to store a large token. + const maxBufferSize = 4 << 10 + const growthSizeFactor = 2 // higher value is faster + const growthRateFactor = 2 // higher value is slower + // By default, grow if below the maximum buffer size. + grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor + // Growing can be expensive, so only grow + // if a sufficient number of bytes have been processed. + grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor + if grow { + e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor) + } + + return nil +} +func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) } +func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) } +func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf } + +// avoidFlush indicates whether to avoid flushing to ensure there is always +// enough in the buffer to unwrite the last object member if it were empty. +func (e *encoderState) avoidFlush() bool { + switch { + case e.Tokens.Last.Length() == 0: + // Never flush after BeginObject or BeginArray since we don't know yet + // if the object or array will end up being empty. + return true + case e.Tokens.Last.needObjectValue(): + // Never flush before the object value since we don't know yet + // if the object value will end up being empty. + return true + case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2: + // Never flush after the object value if it does turn out to be empty. + switch string(e.Buf[len(e.Buf)-2:]) { + case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value + return true + } + } + return false +} + +// UnwriteEmptyObjectMember unwrites the last object member if it is empty +// and reports whether it performed an unwrite operation. +func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool { + if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 { + panic("BUG: must be called on an object after writing a value") + } + + // The flushing logic is modified to never flush a trailing empty value. + // The encoder never writes trailing whitespace eagerly. + b := e.unflushedBuffer() + + // Detect whether the last value was empty. + var n int + if len(b) >= 3 { + switch string(b[len(b)-2:]) { + case "ll": // last two bytes of `null` + n = len(`null`) + case `""`: + // It is possible for a non-empty string to have `""` as a suffix + // if the second to the last quote was escaped. + if b[len(b)-3] == '\\' { + return false // e.g., `"\""` is not empty + } + n = len(`""`) + case `{}`: + n = len(`{}`) + case `[]`: + n = len(`[]`) + } + } + if n == 0 { + return false + } + + // Unwrite the value, whitespace, colon, name, whitespace, and comma. + b = b[:len(b)-n] + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ':') + b = jsonwire.TrimSuffixString(b) + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ',') + e.Buf = b // store back truncated unflushed buffer + + // Undo state changes. + e.Tokens.Last.decrement() // for object member value + e.Tokens.Last.decrement() // for object member name + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() + } + } + e.Names.clearLast() + if prevName != nil { + e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName + e.Names.replaceLastUnquotedName(*prevName) + } + return true +} + +// UnwriteOnlyObjectMemberName unwrites the only object member name +// and returns the unquoted name. +func (e *encoderState) UnwriteOnlyObjectMemberName() string { + if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 { + panic("BUG: must be called on an object after writing first name") + } + + // Unwrite the name and whitespace. + b := jsonwire.TrimSuffixString(e.Buf) + isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0 + name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim)) + e.Buf = jsonwire.TrimSuffixWhitespace(b) + + // Undo state changes. + e.Tokens.Last.decrement() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() + } + } + e.Names.clearLast() + return name +} + +// WriteToken writes the next token and advances the internal write offset. +// +// The provided token kind must be consistent with the JSON grammar. +// For example, it is an error to provide a number when the encoder +// is expecting an object name (which is always a string), or +// to provide an end object delimiter when the encoder is finishing an array. +// If the provided token is invalid, then it reports a [SyntacticError] and +// the internal state remains unchanged. The offset reported +// in [SyntacticError] will be relative to the [Encoder.OutputOffset]. +func (e *Encoder) WriteToken(t Token) error { + return e.s.WriteToken(t) +} +func (e *encoderState) WriteToken(t Token) error { + k := t.Kind() + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the token + + // Append the token to the output and to the state machine. + var err error + switch k { + case 'n': + b = append(b, "null"...) + err = e.Tokens.appendLiteral() + case 'f': + b = append(b, "false"...) + err = e.Tokens.appendLiteral() + case 't': + b = append(b, "true"...) + err = e.Tokens.appendLiteral() + case '"': + if b, err = t.appendString(b, &e.Flags); err != nil { + break + } + if e.Tokens.Last.NeedObjectName() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if !e.Tokens.Last.isValidNamespace() { + err = errInvalidNamespace + break + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) { + err = wrapWithObjectName(ErrDuplicateName, b[pos:]) + break + } + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + err = e.Tokens.appendString() + case '0': + if b, err = t.appendNumber(b, &e.Flags); err != nil { + break + } + err = e.Tokens.appendNumber() + case '{': + b = append(b, '{') + if err = e.Tokens.pushObject(); err != nil { + break + } + e.Names.push() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Namespaces.push() + } + case '}': + b = append(b, '}') + if err = e.Tokens.popObject(); err != nil { + break + } + e.Names.pop() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Namespaces.pop() + } + case '[': + b = append(b, '[') + err = e.Tokens.pushArray() + case ']': + b = append(b, ']') + err = e.Tokens.popArray() + default: + err = errInvalidToken + } + if err != nil { + return wrapSyntacticError(e, err, pos, +1) + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// AppendRaw appends either a raw string (without double quotes) or number. +// Specify safeASCII if the string output is guaranteed to be ASCII +// without any characters (including '<', '>', and '&') that need escaping, +// otherwise this will validate whether the string needs escaping. +// The appended bytes for a JSON number must be valid. +// +// This is a specialized implementation of Encoder.WriteValue +// that allows appending directly into the buffer. +// It is only called from marshal logic in the "json" package. +func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error { + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the token + + var err error + switch k { + case '"': + // Append directly into the encoder buffer by assuming that + // most of the time none of the characters need escaping. + b = append(b, '"') + if b, err = appendFn(b); err != nil { + return err + } + b = append(b, '"') + + // Check whether we need to escape the string and if necessary + // copy it to a scratch buffer and then escape it back. + isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)]) + if !isVerbatim { + var err error + b2 := append(e.unusedCache, b[pos+len(`"`):len(b)-len(`"`)]...) + b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags) + e.unusedCache = b2[:0] + if err != nil { + return wrapSyntacticError(e, err, pos, +1) + } + } + + // Update the state machine. + if e.Tokens.Last.NeedObjectName() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if !e.Tokens.Last.isValidNamespace() { + return wrapSyntacticError(e, err, pos, +1) + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) { + err = wrapWithObjectName(ErrDuplicateName, b[pos:]) + return wrapSyntacticError(e, err, pos, +1) + } + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + if err := e.Tokens.appendString(); err != nil { + return wrapSyntacticError(e, err, pos, +1) + } + case '0': + if b, err = appendFn(b); err != nil { + return err + } + if err := e.Tokens.appendNumber(); err != nil { + return wrapSyntacticError(e, err, pos, +1) + } + default: + panic("BUG: invalid kind") + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// WriteValue writes the next raw value and advances the internal write offset. +// The Encoder does not simply copy the provided value verbatim, but +// parses it to ensure that it is syntactically valid and reformats it +// according to how the Encoder is configured to format whitespace and strings. +// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled +// as the Unicode replacement character, U+FFFD. +// +// The provided value kind must be consistent with the JSON grammar +// (see examples on [Encoder.WriteToken]). If the provided value is invalid, +// then it reports a [SyntacticError] and the internal state remains unchanged. +// The offset reported in [SyntacticError] will be relative to the +// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error. +func (e *Encoder) WriteValue(v Value) error { + return e.s.WriteValue(v) +} +func (e *encoderState) WriteValue(v Value) error { + e.maxValue |= len(v) // bitwise OR is a fast approximation of max + + k := v.Kind() + b := e.Buf // use local variable to avoid mutating e in case of error + + // Append any delimiters or optional whitespace. + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.AnyWhitespace) { + b = e.appendWhitespace(b, k) + } + pos := len(b) // offset before the value + + // Append the value the output. + var n int + n += jsonwire.ConsumeWhitespace(v[n:]) + b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth()) + if err != nil { + return wrapSyntacticError(e, err, pos+n+m, +1) + } + n += m + n += jsonwire.ConsumeWhitespace(v[n:]) + if len(v) > n { + err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value") + return wrapSyntacticError(e, err, pos+n, 0) + } + + // Append the kind to the state machine. + switch k { + case 'n', 'f', 't': + err = e.Tokens.appendLiteral() + case '"': + if e.Tokens.Last.NeedObjectName() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if !e.Tokens.Last.isValidNamespace() { + err = errInvalidNamespace + break + } + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) { + err = wrapWithObjectName(ErrDuplicateName, b[pos:]) + break + } + } + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + } + err = e.Tokens.appendString() + case '0': + err = e.Tokens.appendNumber() + case '{': + if err = e.Tokens.pushObject(); err != nil { + break + } + if err = e.Tokens.popObject(); err != nil { + panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) + } + if e.Flags.Get(jsonflags.ReorderRawObjects) { + mustReorderObjects(b[pos:]) + } + case '[': + if err = e.Tokens.pushArray(); err != nil { + break + } + if err = e.Tokens.popArray(); err != nil { + panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) + } + if e.Flags.Get(jsonflags.ReorderRawObjects) { + mustReorderObjects(b[pos:]) + } + } + if err != nil { + return wrapSyntacticError(e, err, pos, +1) + } + + // Finish off the buffer and store it back into e. + e.Buf = b + if e.NeedFlush() { + return e.Flush() + } + return nil +} + +// CountNextDelimWhitespace counts the number of bytes of delimiter and +// whitespace bytes assuming the upcoming token is a JSON value. +// This method is used for error reporting at the semantic layer. +func (e *encoderState) CountNextDelimWhitespace() (n int) { + const next = Kind('"') // arbitrary kind as next JSON value + delim := e.Tokens.needDelim(next) + if delim > 0 { + n += len(",") | len(":") + } + if delim == ':' { + if e.Flags.Get(jsonflags.SpaceAfterColon) { + n += len(" ") + } + } else { + if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) { + n += len(" ") + } + if e.Flags.Get(jsonflags.Multiline) { + if m := e.Tokens.NeedIndent(next); m > 0 { + n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent) + } + } + } + return n +} + +// appendWhitespace appends whitespace that immediately precedes the next token. +func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte { + if delim := e.Tokens.needDelim(next); delim == ':' { + if e.Flags.Get(jsonflags.SpaceAfterColon) { + b = append(b, ' ') + } + } else { + if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) { + b = append(b, ' ') + } + if e.Flags.Get(jsonflags.Multiline) { + b = e.AppendIndent(b, e.Tokens.NeedIndent(next)) + } + } + return b +} + +// AppendIndent appends the appropriate number of indentation characters +// for the current nested level, n. +func (e *encoderState) AppendIndent(b []byte, n int) []byte { + if n == 0 { + return b + } + b = append(b, '\n') + b = append(b, e.IndentPrefix...) + for ; n > 1; n-- { + b = append(b, e.Indent...) + } + return b +} + +// reformatValue parses a JSON value from the start of src and +// appends it to the end of dst, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) { + // TODO: Should this update ValueFlags as input? + if len(src) == 0 { + return dst, 0, io.ErrUnexpectedEOF + } + switch k := Kind(src[0]).normalize(); k { + case 'n': + if jsonwire.ConsumeNull(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "null") + return dst, n, err + } + return append(dst, "null"...), len("null"), nil + case 'f': + if jsonwire.ConsumeFalse(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "false") + return dst, n, err + } + return append(dst, "false"...), len("false"), nil + case 't': + if jsonwire.ConsumeTrue(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "true") + return dst, n, err + } + return append(dst, "true"...), len("true"), nil + case '"': + if n := jsonwire.ConsumeSimpleString(src); n > 0 { + dst = append(dst, src[:n]...) // copy simple strings verbatim + return dst, n, nil + } + return jsonwire.ReformatString(dst, src, &e.Flags) + case '0': + if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) { + dst = append(dst, src[:n]...) // copy simple numbers verbatim + return dst, n, nil + } + return jsonwire.ReformatNumber(dst, src, &e.Flags) + case '{': + return e.reformatObject(dst, src, depth) + case '[': + return e.reformatArray(dst, src, depth) + default: + return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value") + } +} + +// reformatObject parses a JSON object from the start of src and +// appends it to the end of src, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) { + // Append object start. + if len(src) == 0 || src[0] != '{' { + panic("BUG: reformatObject must be called with a buffer that starts with '{'") + } else if depth == maxNestingDepth+1 { + return dst, 0, errMaxDepth + } + dst = append(dst, '{') + n := len("{") + + // Append (possible) object end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + if src[n] == '}' { + dst = append(dst, '}') + n += len("}") + return dst, n, nil + } + + var err error + var names *objectNamespace + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Namespaces.push() + defer e.Namespaces.pop() + names = e.Namespaces.Last() + } + depth++ + for { + // Append optional newline and indentation. + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth) + } + + // Append object name. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + m := jsonwire.ConsumeSimpleString(src[n:]) + isVerbatim := m > 0 + if isVerbatim { + dst = append(dst, src[n:n+m]...) + } else { + dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags) + if err != nil { + return dst, n + m, err + } + } + quotedName := src[n : n+m] + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) { + return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName) + } + n += m + + // Append colon. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName) + } + if src[n] != ':' { + err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')") + return dst, n, wrapWithObjectName(err, quotedName) + } + dst = append(dst, ':') + n += len(":") + if e.Flags.Get(jsonflags.SpaceAfterColon) { + dst = append(dst, ' ') + } + + // Append object value. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName) + } + dst, m, err = e.reformatValue(dst, src[n:], depth) + if err != nil { + return dst, n + m, wrapWithObjectName(err, quotedName) + } + n += m + + // Append comma or object end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + switch src[n] { + case ',': + dst = append(dst, ',') + if e.Flags.Get(jsonflags.SpaceAfterComma) { + dst = append(dst, ' ') + } + n += len(",") + continue + case '}': + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth-1) + } + dst = append(dst, '}') + n += len("}") + return dst, n, nil + default: + return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')") + } + } +} + +// reformatArray parses a JSON array from the start of src and +// appends it to the end of dst, reformatting whitespace and strings as needed. +// It returns the extended dst buffer and the number of consumed input bytes. +func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) { + // Append array start. + if len(src) == 0 || src[0] != '[' { + panic("BUG: reformatArray must be called with a buffer that starts with '['") + } else if depth == maxNestingDepth+1 { + return dst, 0, errMaxDepth + } + dst = append(dst, '[') + n := len("[") + + // Append (possible) array end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + if src[n] == ']' { + dst = append(dst, ']') + n += len("]") + return dst, n, nil + } + + var idx int64 + var err error + depth++ + for { + // Append optional newline and indentation. + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth) + } + + // Append array value. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + var m int + dst, m, err = e.reformatValue(dst, src[n:], depth) + if err != nil { + return dst, n + m, wrapWithArrayIndex(err, idx) + } + n += m + + // Append comma or array end. + n += jsonwire.ConsumeWhitespace(src[n:]) + if uint(len(src)) <= uint(n) { + return dst, n, io.ErrUnexpectedEOF + } + switch src[n] { + case ',': + dst = append(dst, ',') + if e.Flags.Get(jsonflags.SpaceAfterComma) { + dst = append(dst, ' ') + } + n += len(",") + idx++ + continue + case ']': + if e.Flags.Get(jsonflags.Multiline) { + dst = e.AppendIndent(dst, depth-1) + } + dst = append(dst, ']') + n += len("]") + return dst, n, nil + default: + return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')") + } + } +} + +// OutputOffset returns the current output byte offset. It gives the location +// of the next byte immediately after the most recently written token or value. +// The number of bytes actually written to the underlying [io.Writer] may be less +// than this offset due to internal buffering effects. +func (e *Encoder) OutputOffset() int64 { + return e.s.previousOffsetEnd() +} + +// UnusedBuffer returns a zero-length buffer with a possible non-zero capacity. +// This buffer is intended to be used to populate a [Value] +// being passed to an immediately succeeding [Encoder.WriteValue] call. +// +// Example usage: +// +// b := d.UnusedBuffer() +// b = append(b, '"') +// b = appendString(b, v) // append the string formatting of v +// b = append(b, '"') +// ... := d.WriteValue(b) +// +// It is the user's responsibility to ensure that the value is valid JSON. +func (e *Encoder) UnusedBuffer() []byte { + // NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would + // need to take special care to avoid mangling the data while reformatting. + // WriteValue can't easily identify whether the input Value aliases e.buf + // without using unsafe.Pointer. Thus, we just return a different buffer. + // Should this ever alias e.buf, we need to consider how it operates with + // the specialized performance optimization for bytes.Buffer. + n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length + if cap(e.s.unusedCache) < n { + e.s.unusedCache = make([]byte, 0, n) + } + return e.s.unusedCache +} + +// StackDepth returns the depth of the state machine for written JSON data. +// Each level on the stack represents a nested JSON object or array. +// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered +// and decremented whenever an [EndObject] or [EndArray] token is encountered. +// The depth is zero-indexed, where zero represents the top-level JSON value. +func (e *Encoder) StackDepth() int { + // NOTE: Keep in sync with Decoder.StackDepth. + return e.s.Tokens.Depth() - 1 +} + +// StackIndex returns information about the specified stack level. +// It must be a number between 0 and [Encoder.StackDepth], inclusive. +// For each level, it reports the kind: +// +// - 0 for a level of zero, +// - '{' for a level representing a JSON object, and +// - '[' for a level representing a JSON array. +// +// It also reports the length of that JSON object or array. +// Each name and value in a JSON object is counted separately, +// so the effective number of members would be half the length. +// A complete JSON object must have an even length. +func (e *Encoder) StackIndex(i int) (Kind, int64) { + // NOTE: Keep in sync with Decoder.StackIndex. + switch s := e.s.Tokens.index(i); { + case i > 0 && s.isObject(): + return '{', s.Length() + case i > 0 && s.isArray(): + return '[', s.Length() + default: + return 0, s.Length() + } +} + +// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value. +func (e *Encoder) StackPointer() Pointer { + return Pointer(e.s.AppendStackPointer(nil, -1)) +} + +func (e *encoderState) AppendStackPointer(b []byte, where int) []byte { + e.Names.copyQuotedBuffer(e.Buf) + return e.state.appendStackPointer(b, where) +} |
