aboutsummaryrefslogtreecommitdiff
path: root/src/encoding/json/jsontext/decode.go
diff options
context:
space:
mode:
authorDamien Neil <dneil@google.com>2025-04-11 14:19:51 -0700
committerGopher Robot <gobot@golang.org>2025-04-18 08:24:07 -0700
commit0e17905793cb5e0acc323a0cdf3733199d93976a (patch)
treefec117ceb6b56866e6c51e6acd72901cf91717ce /src/encoding/json/jsontext/decode.go
parentc889004615b40535ebd5054cbcf2deebdb3a299a (diff)
downloadgo-0e17905793cb5e0acc323a0cdf3733199d93976a.tar.xz
encoding/json: add json/v2 with GOEXPERIMENT=jsonv2 guard
This imports the proposed new v2 JSON API implemented in github.com/go-json-experiment/json as of commit d3c622f1b874954c355e60c8e6b6baa5f60d2fed. When GOEXPERIMENT=jsonv2 is set, the encoding/json/v2 and encoding/jsontext packages are visible, the encoding/json package is implemented in terms of encoding/json/v2, and the encoding/json package include various additional APIs. (See #71497 for details.) When GOEXPERIMENT=jsonv2 is not set, the new API is not present and the encoding/json package is unchanged. The experimental API is not bound by the Go compatibility promise and is expected to evolve as updates are made to the json/v2 proposal. The contents of encoding/json/internal/jsontest/testdata are compressed with zstd v1.5.7 with the -19 option. Fixes #71845 For #71497 Change-Id: Ib8c94e5f0586b6aaa22833190b41cf6ef59f4f01 Reviewed-on: https://go-review.googlesource.com/c/go/+/665796 Auto-Submit: Damien Neil <dneil@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Joseph Tsai <joetsai@digital-static.net> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src/encoding/json/jsontext/decode.go')
-rw-r--r--src/encoding/json/jsontext/decode.go1168
1 files changed, 1168 insertions, 0 deletions
diff --git a/src/encoding/json/jsontext/decode.go b/src/encoding/json/jsontext/decode.go
new file mode 100644
index 0000000000..784ae4709a
--- /dev/null
+++ b/src/encoding/json/jsontext/decode.go
@@ -0,0 +1,1168 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.jsonv2
+
+package jsontext
+
+import (
+ "bytes"
+ "errors"
+ "io"
+
+ "encoding/json/internal/jsonflags"
+ "encoding/json/internal/jsonopts"
+ "encoding/json/internal/jsonwire"
+)
+
+// NOTE: The logic for decoding is complicated by the fact that reading from
+// an io.Reader into a temporary buffer means that the buffer may contain a
+// truncated portion of some valid input, requiring the need to fetch more data.
+//
+// This file is structured in the following way:
+//
+// - consumeXXX functions parse an exact JSON token from a []byte.
+// If the buffer appears truncated, then it returns io.ErrUnexpectedEOF.
+// The consumeSimpleXXX functions are so named because they only handle
+// a subset of the grammar for the JSON token being parsed.
+// They do not handle the full grammar to keep these functions inlinable.
+//
+// - Decoder.consumeXXX methods parse the next JSON token from Decoder.buf,
+// automatically fetching more input if necessary. These methods take
+// a position relative to the start of Decoder.buf as an argument and
+// return the end of the consumed JSON token as a position,
+// also relative to the start of Decoder.buf.
+//
+// - In the event of an I/O errors or state machine violations,
+// the implementation avoids mutating the state of Decoder
+// (aside from the book-keeping needed to implement Decoder.fetch).
+// For this reason, only Decoder.ReadToken and Decoder.ReadValue are
+// responsible for updated Decoder.prevStart and Decoder.prevEnd.
+//
+// - For performance, much of the implementation uses the pattern of calling
+// the inlinable consumeXXX functions first, and if more work is necessary,
+// then it calls the slower Decoder.consumeXXX methods.
+// TODO: Revisit this pattern if the Go compiler provides finer control
+// over exactly which calls are inlined or not.
+
+// Decoder is a streaming decoder for raw JSON tokens and values.
+// It is used to read a stream of top-level JSON values,
+// each separated by optional whitespace characters.
+//
+// [Decoder.ReadToken] and [Decoder.ReadValue] calls may be interleaved.
+// For example, the following JSON value:
+//
+// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
+//
+// can be parsed with the following calls (ignoring errors for brevity):
+//
+// d.ReadToken() // {
+// d.ReadToken() // "name"
+// d.ReadToken() // "value"
+// d.ReadValue() // "array"
+// d.ReadToken() // [
+// d.ReadToken() // null
+// d.ReadToken() // false
+// d.ReadValue() // true
+// d.ReadToken() // 3.14159
+// d.ReadToken() // ]
+// d.ReadValue() // "object"
+// d.ReadValue() // {"k":"v"}
+// d.ReadToken() // }
+//
+// The above is one of many possible sequence of calls and
+// may not represent the most sensible method to call for any given token/value.
+// For example, it is probably more common to call [Decoder.ReadToken] to obtain a
+// string token for object names.
+type Decoder struct {
+ s decoderState
+}
+
+// decoderState is the low-level state of Decoder.
+// It has exported fields and method for use by the "json" package.
+type decoderState struct {
+ state
+ decodeBuffer
+ jsonopts.Struct
+
+ StringCache *[256]string // only used when unmarshaling; identical to json.stringCache
+}
+
+// decodeBuffer is a buffer split into 4 segments:
+//
+// - buf[0:prevEnd] // already read portion of the buffer
+// - buf[prevStart:prevEnd] // previously read value
+// - buf[prevEnd:len(buf)] // unread portion of the buffer
+// - buf[len(buf):cap(buf)] // unused portion of the buffer
+//
+// Invariants:
+//
+// 0 ≤ prevStart ≤ prevEnd ≤ len(buf) ≤ cap(buf)
+type decodeBuffer struct {
+ peekPos int // non-zero if valid offset into buf for start of next token
+ peekErr error // implies peekPos is -1
+
+ buf []byte // may alias rd if it is a bytes.Buffer
+ prevStart int
+ prevEnd int
+
+ // baseOffset is added to prevStart and prevEnd to obtain
+ // the absolute offset relative to the start of io.Reader stream.
+ baseOffset int64
+
+ rd io.Reader
+}
+
+// NewDecoder constructs a new streaming decoder reading from r.
+//
+// If r is a [bytes.Buffer], then the decoder parses directly from the buffer
+// without first copying the contents to an intermediate buffer.
+// Additional writes to the buffer must not occur while the decoder is in use.
+func NewDecoder(r io.Reader, opts ...Options) *Decoder {
+ d := new(Decoder)
+ d.Reset(r, opts...)
+ return d
+}
+
+// Reset resets a decoder such that it is reading afresh from r and
+// configured with the provided options. Reset must not be called on an
+// a Decoder passed to the [encoding/json/v2.UnmarshalerFrom.UnmarshalJSONFrom] method
+// or the [encoding/json/v2.UnmarshalFromFunc] function.
+func (d *Decoder) Reset(r io.Reader, opts ...Options) {
+ switch {
+ case d == nil:
+ panic("jsontext: invalid nil Decoder")
+ case r == nil:
+ panic("jsontext: invalid nil io.Reader")
+ case d.s.Flags.Get(jsonflags.WithinArshalCall):
+ panic("jsontext: cannot reset Decoder passed to json.UnmarshalerFrom")
+ }
+ d.s.reset(nil, r, opts...)
+}
+
+func (d *decoderState) reset(b []byte, r io.Reader, opts ...Options) {
+ d.state.reset()
+ d.decodeBuffer = decodeBuffer{buf: b, rd: r}
+ opts2 := jsonopts.Struct{} // avoid mutating d.Struct in case it is part of opts
+ opts2.Join(opts...)
+ d.Struct = opts2
+}
+
+// Options returns the options used to construct the encoder and
+// may additionally contain semantic options passed to a
+// [encoding/json/v2.UnmarshalDecode] call.
+//
+// If operating within
+// a [encoding/json/v2.UnmarshalerFrom.UnmarshalJSONFrom] method call or
+// a [encoding/json/v2.UnmarshalFromFunc] function call,
+// then the returned options are only valid within the call.
+func (d *Decoder) Options() Options {
+ return &d.s.Struct
+}
+
+var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next")
+
+// fetch reads at least 1 byte from the underlying io.Reader.
+// It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen.
+func (d *decoderState) fetch() error {
+ if d.rd == nil {
+ return io.ErrUnexpectedEOF
+ }
+
+ // Inform objectNameStack that we are about to fetch new buffer content.
+ d.Names.copyQuotedBuffer(d.buf)
+
+ // Specialize bytes.Buffer for better performance.
+ if bb, ok := d.rd.(*bytes.Buffer); ok {
+ switch {
+ case bb.Len() == 0:
+ return io.ErrUnexpectedEOF
+ case len(d.buf) == 0:
+ d.buf = bb.Next(bb.Len()) // "read" all data in the buffer
+ return nil
+ default:
+ // This only occurs if a partially filled bytes.Buffer was provided
+ // and more data is written to it while Decoder is reading from it.
+ // This practice will lead to data corruption since future writes
+ // may overwrite the contents of the current buffer.
+ //
+ // The user is trying to use a bytes.Buffer as a pipe,
+ // but a bytes.Buffer is poor implementation of a pipe,
+ // the purpose-built io.Pipe should be used instead.
+ return &ioError{action: "read", err: errBufferWriteAfterNext}
+ }
+ }
+
+ // Allocate initial buffer if empty.
+ if cap(d.buf) == 0 {
+ d.buf = make([]byte, 0, 64)
+ }
+
+ // Check whether to grow the buffer.
+ const maxBufferSize = 4 << 10
+ const growthSizeFactor = 2 // higher value is faster
+ const growthRateFactor = 2 // higher value is slower
+ // By default, grow if below the maximum buffer size.
+ grow := cap(d.buf) <= maxBufferSize/growthSizeFactor
+ // Growing can be expensive, so only grow
+ // if a sufficient number of bytes have been processed.
+ grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor
+ // If prevStart==0, then fetch was called in order to fetch more data
+ // to finish consuming a large JSON value contiguously.
+ // Grow if less than 25% of the remaining capacity is available.
+ // Note that this may cause the input buffer to exceed maxBufferSize.
+ grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4)
+
+ if grow {
+ // Allocate a new buffer and copy the contents of the old buffer over.
+ // TODO: Provide a hard limit on the maximum internal buffer size?
+ buf := make([]byte, 0, cap(d.buf)*growthSizeFactor)
+ d.buf = append(buf, d.buf[d.prevStart:]...)
+ } else {
+ // Move unread portion of the data to the front.
+ n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:])
+ d.buf = d.buf[:n]
+ }
+ d.baseOffset += int64(d.prevStart)
+ d.prevEnd -= d.prevStart
+ d.prevStart = 0
+
+ // Read more data into the internal buffer.
+ for {
+ n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)])
+ switch {
+ case n > 0:
+ d.buf = d.buf[:len(d.buf)+n]
+ return nil // ignore errors if any bytes are read
+ case err == io.EOF:
+ return io.ErrUnexpectedEOF
+ case err != nil:
+ return &ioError{action: "read", err: err}
+ default:
+ continue // Read returned (0, nil)
+ }
+ }
+}
+
+const invalidateBufferByte = '#' // invalid starting character for JSON grammar
+
+// invalidatePreviousRead invalidates buffers returned by Peek and Read calls
+// so that the first byte is an invalid character.
+// This Hyrum-proofs the API against faulty application code that assumes
+// values returned by ReadValue remain valid past subsequent Read calls.
+func (d *decodeBuffer) invalidatePreviousRead() {
+ // Avoid mutating the buffer if d.rd is nil which implies that d.buf
+ // is provided by the user code and may not expect mutations.
+ isBytesBuffer := func(r io.Reader) bool {
+ _, ok := r.(*bytes.Buffer)
+ return ok
+ }
+ if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) {
+ d.buf[d.prevStart] = invalidateBufferByte
+ d.prevStart = d.prevEnd
+ }
+}
+
+// needMore reports whether there are no more unread bytes.
+func (d *decodeBuffer) needMore(pos int) bool {
+ // NOTE: The arguments and logic are kept simple to keep this inlinable.
+ return pos == len(d.buf)
+}
+
+func (d *decodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
+func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) }
+func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) }
+func (d *decodeBuffer) previousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] }
+func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] }
+
+// PreviousTokenOrValue returns the previously read token or value
+// unless it has been invalidated by a call to PeekKind.
+// If a token is just a delimiter, then this returns a 1-byte buffer.
+// This method is used for error reporting at the semantic layer.
+func (d *decodeBuffer) PreviousTokenOrValue() []byte {
+ b := d.previousBuffer()
+ // If peek was called, then the previous token or buffer is invalidated.
+ if d.peekPos > 0 || len(b) > 0 && b[0] == invalidateBufferByte {
+ return nil
+ }
+ // ReadToken does not preserve the buffer for null, bools, or delimiters.
+ // Manually re-construct that buffer.
+ if len(b) == 0 {
+ b = d.buf[:d.prevEnd] // entirety of the previous buffer
+ for _, tok := range []string{"null", "false", "true", "{", "}", "[", "]"} {
+ if len(b) >= len(tok) && string(b[len(b)-len(tok):]) == tok {
+ return b[len(b)-len(tok):]
+ }
+ }
+ }
+ return b
+}
+
+// PeekKind retrieves the next token kind, but does not advance the read offset.
+//
+// It returns 0 if an error occurs. Any such error is cached until
+// the next read call and it is the caller's responsibility to eventually
+// follow up a PeekKind call with a read call.
+func (d *Decoder) PeekKind() Kind {
+ return d.s.PeekKind()
+}
+func (d *decoderState) PeekKind() Kind {
+ // Check whether we have a cached peek result.
+ if d.peekPos > 0 {
+ return Kind(d.buf[d.peekPos]).normalize()
+ }
+
+ var err error
+ d.invalidatePreviousRead()
+ pos := d.prevEnd
+
+ // Consume leading whitespace.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
+ err = io.EOF // EOF possibly if no Tokens present after top-level value
+ }
+ d.peekPos, d.peekErr = -1, wrapSyntacticError(d, err, pos, 0)
+ return invalidKind
+ }
+ }
+
+ // Consume colon or comma.
+ var delim byte
+ if c := d.buf[pos]; c == ':' || c == ',' {
+ delim = c
+ pos += 1
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ err = wrapSyntacticError(d, err, pos, 0)
+ d.peekPos, d.peekErr = -1, d.checkDelimBeforeIOError(delim, err)
+ return invalidKind
+ }
+ }
+ }
+ next := Kind(d.buf[pos]).normalize()
+ if d.Tokens.needDelim(next) != delim {
+ d.peekPos, d.peekErr = -1, d.checkDelim(delim, next)
+ return invalidKind
+ }
+
+ // This may set peekPos to zero, which is indistinguishable from
+ // the uninitialized state. While a small hit to performance, it is correct
+ // since ReadValue and ReadToken will disregard the cached result and
+ // recompute the next kind.
+ d.peekPos, d.peekErr = pos, nil
+ return next
+}
+
+// checkDelimBeforeIOError checks whether the delim is even valid
+// before returning an IO error, which occurs after the delim.
+func (d *decoderState) checkDelimBeforeIOError(delim byte, err error) error {
+ // Since an IO error occurred, we do not know what the next kind is.
+ // However, knowing the next kind is necessary to validate
+ // whether the current delim is at least potentially valid.
+ // Since a JSON string is always valid as the next token,
+ // conservatively assume that is the next kind for validation.
+ const next = Kind('"')
+ if d.Tokens.needDelim(next) != delim {
+ err = d.checkDelim(delim, next)
+ }
+ return err
+}
+
+// CountNextDelimWhitespace counts the number of upcoming bytes of
+// delimiter or whitespace characters.
+// This method is used for error reporting at the semantic layer.
+func (d *decoderState) CountNextDelimWhitespace() int {
+ d.PeekKind() // populate unreadBuffer
+ return len(d.unreadBuffer()) - len(bytes.TrimLeft(d.unreadBuffer(), ",: \n\r\t"))
+}
+
+// checkDelim checks whether delim is valid for the given next kind.
+func (d *decoderState) checkDelim(delim byte, next Kind) error {
+ where := "at start of value"
+ switch d.Tokens.needDelim(next) {
+ case delim:
+ return nil
+ case ':':
+ where = "after object name (expecting ':')"
+ case ',':
+ if d.Tokens.Last.isObject() {
+ where = "after object value (expecting ',' or '}')"
+ } else {
+ where = "after array element (expecting ',' or ']')"
+ }
+ }
+ pos := d.prevEnd // restore position to right after leading whitespace
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ err := jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
+ return wrapSyntacticError(d, err, pos, 0)
+}
+
+// SkipValue is semantically equivalent to calling [Decoder.ReadValue] and discarding
+// the result except that memory is not wasted trying to hold the entire result.
+func (d *Decoder) SkipValue() error {
+ return d.s.SkipValue()
+}
+func (d *decoderState) SkipValue() error {
+ switch d.PeekKind() {
+ case '{', '[':
+ // For JSON objects and arrays, keep skipping all tokens
+ // until the depth matches the starting depth.
+ depth := d.Tokens.Depth()
+ for {
+ if _, err := d.ReadToken(); err != nil {
+ return err
+ }
+ if depth >= d.Tokens.Depth() {
+ return nil
+ }
+ }
+ default:
+ // Trying to skip a value when the next token is a '}' or ']'
+ // will result in an error being returned here.
+ var flags jsonwire.ValueFlags
+ if _, err := d.ReadValue(&flags); err != nil {
+ return err
+ }
+ return nil
+ }
+}
+
+// SkipValueRemainder skips the remainder of a value
+// after reading a '{' or '[' token.
+func (d *decoderState) SkipValueRemainder() error {
+ if d.Tokens.Depth()-1 > 0 && d.Tokens.Last.Length() == 0 {
+ for n := d.Tokens.Depth(); d.Tokens.Depth() >= n; {
+ if _, err := d.ReadToken(); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// SkipUntil skips all tokens until the state machine
+// is at or past the specified depth and length.
+func (d *decoderState) SkipUntil(depth int, length int64) error {
+ for d.Tokens.Depth() > depth || (d.Tokens.Depth() == depth && d.Tokens.Last.Length() < length) {
+ if _, err := d.ReadToken(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// ReadToken reads the next [Token], advancing the read offset.
+// The returned token is only valid until the next Peek, Read, or Skip call.
+// It returns [io.EOF] if there are no more tokens.
+func (d *Decoder) ReadToken() (Token, error) {
+ return d.s.ReadToken()
+}
+func (d *decoderState) ReadToken() (Token, error) {
+ // Determine the next kind.
+ var err error
+ var next Kind
+ pos := d.peekPos
+ if pos != 0 {
+ // Use cached peek result.
+ if d.peekErr != nil {
+ err := d.peekErr
+ d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
+ return Token{}, err
+ }
+ next = Kind(d.buf[pos]).normalize()
+ d.peekPos = 0 // reset cache
+ } else {
+ d.invalidatePreviousRead()
+ pos = d.prevEnd
+
+ // Consume leading whitespace.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
+ err = io.EOF // EOF possibly if no Tokens present after top-level value
+ }
+ return Token{}, wrapSyntacticError(d, err, pos, 0)
+ }
+ }
+
+ // Consume colon or comma.
+ var delim byte
+ if c := d.buf[pos]; c == ':' || c == ',' {
+ delim = c
+ pos += 1
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ err = wrapSyntacticError(d, err, pos, 0)
+ return Token{}, d.checkDelimBeforeIOError(delim, err)
+ }
+ }
+ }
+ next = Kind(d.buf[pos]).normalize()
+ if d.Tokens.needDelim(next) != delim {
+ return Token{}, d.checkDelim(delim, next)
+ }
+ }
+
+ // Handle the next token.
+ var n int
+ switch next {
+ case 'n':
+ if jsonwire.ConsumeNull(d.buf[pos:]) == 0 {
+ pos, err = d.consumeLiteral(pos, "null")
+ if err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ } else {
+ pos += len("null")
+ }
+ if err = d.Tokens.appendLiteral(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos-len("null"), +1) // report position at start of literal
+ }
+ d.prevStart, d.prevEnd = pos, pos
+ return Null, nil
+
+ case 'f':
+ if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 {
+ pos, err = d.consumeLiteral(pos, "false")
+ if err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ } else {
+ pos += len("false")
+ }
+ if err = d.Tokens.appendLiteral(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos-len("false"), +1) // report position at start of literal
+ }
+ d.prevStart, d.prevEnd = pos, pos
+ return False, nil
+
+ case 't':
+ if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 {
+ pos, err = d.consumeLiteral(pos, "true")
+ if err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ } else {
+ pos += len("true")
+ }
+ if err = d.Tokens.appendLiteral(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos-len("true"), +1) // report position at start of literal
+ }
+ d.prevStart, d.prevEnd = pos, pos
+ return True, nil
+
+ case '"':
+ var flags jsonwire.ValueFlags // TODO: Preserve this in Token?
+ if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
+ oldAbsPos := d.baseOffset + int64(pos)
+ pos, err = d.consumeString(&flags, pos)
+ newAbsPos := d.baseOffset + int64(pos)
+ n = int(newAbsPos - oldAbsPos)
+ if err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ } else {
+ pos += n
+ }
+ if d.Tokens.Last.NeedObjectName() {
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
+ if !d.Tokens.Last.isValidNamespace() {
+ return Token{}, wrapSyntacticError(d, errInvalidNamespace, pos-n, +1)
+ }
+ if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) {
+ err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos])
+ return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string
+ }
+ }
+ d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
+ }
+ if err = d.Tokens.appendString(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of string
+ }
+ d.prevStart, d.prevEnd = pos-n, pos
+ return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
+
+ case '0':
+ // NOTE: Since JSON numbers are not self-terminating,
+ // we need to make sure that the next byte is not part of a number.
+ if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
+ oldAbsPos := d.baseOffset + int64(pos)
+ pos, err = d.consumeNumber(pos)
+ newAbsPos := d.baseOffset + int64(pos)
+ n = int(newAbsPos - oldAbsPos)
+ if err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ } else {
+ pos += n
+ }
+ if err = d.Tokens.appendNumber(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos-n, +1) // report position at start of number
+ }
+ d.prevStart, d.prevEnd = pos-n, pos
+ return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
+
+ case '{':
+ if err = d.Tokens.pushObject(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ d.Names.push()
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
+ d.Namespaces.push()
+ }
+ pos += 1
+ d.prevStart, d.prevEnd = pos, pos
+ return BeginObject, nil
+
+ case '}':
+ if err = d.Tokens.popObject(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ d.Names.pop()
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
+ d.Namespaces.pop()
+ }
+ pos += 1
+ d.prevStart, d.prevEnd = pos, pos
+ return EndObject, nil
+
+ case '[':
+ if err = d.Tokens.pushArray(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ pos += 1
+ d.prevStart, d.prevEnd = pos, pos
+ return BeginArray, nil
+
+ case ']':
+ if err = d.Tokens.popArray(); err != nil {
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+ pos += 1
+ d.prevStart, d.prevEnd = pos, pos
+ return EndArray, nil
+
+ default:
+ err = jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value")
+ return Token{}, wrapSyntacticError(d, err, pos, +1)
+ }
+}
+
+// ReadValue returns the next raw JSON value, advancing the read offset.
+// The value is stripped of any leading or trailing whitespace and
+// contains the exact bytes of the input, which may contain invalid UTF-8
+// if [AllowInvalidUTF8] is specified.
+//
+// The returned value is only valid until the next Peek, Read, or Skip call and
+// may not be mutated while the Decoder remains in use.
+// If the decoder is currently at the end token for an object or array,
+// then it reports a [SyntacticError] and the internal state remains unchanged.
+// It returns [io.EOF] if there are no more values.
+func (d *Decoder) ReadValue() (Value, error) {
+ var flags jsonwire.ValueFlags
+ return d.s.ReadValue(&flags)
+}
+func (d *decoderState) ReadValue(flags *jsonwire.ValueFlags) (Value, error) {
+ // Determine the next kind.
+ var err error
+ var next Kind
+ pos := d.peekPos
+ if pos != 0 {
+ // Use cached peek result.
+ if d.peekErr != nil {
+ err := d.peekErr
+ d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
+ return nil, err
+ }
+ next = Kind(d.buf[pos]).normalize()
+ d.peekPos = 0 // reset cache
+ } else {
+ d.invalidatePreviousRead()
+ pos = d.prevEnd
+
+ // Consume leading whitespace.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 {
+ err = io.EOF // EOF possibly if no Tokens present after top-level value
+ }
+ return nil, wrapSyntacticError(d, err, pos, 0)
+ }
+ }
+
+ // Consume colon or comma.
+ var delim byte
+ if c := d.buf[pos]; c == ':' || c == ',' {
+ delim = c
+ pos += 1
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ err = wrapSyntacticError(d, err, pos, 0)
+ return nil, d.checkDelimBeforeIOError(delim, err)
+ }
+ }
+ }
+ next = Kind(d.buf[pos]).normalize()
+ if d.Tokens.needDelim(next) != delim {
+ return nil, d.checkDelim(delim, next)
+ }
+ }
+
+ // Handle the next value.
+ oldAbsPos := d.baseOffset + int64(pos)
+ pos, err = d.consumeValue(flags, pos, d.Tokens.Depth())
+ newAbsPos := d.baseOffset + int64(pos)
+ n := int(newAbsPos - oldAbsPos)
+ if err != nil {
+ return nil, wrapSyntacticError(d, err, pos, +1)
+ }
+ switch next {
+ case 'n', 't', 'f':
+ err = d.Tokens.appendLiteral()
+ case '"':
+ if d.Tokens.Last.NeedObjectName() {
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
+ if !d.Tokens.Last.isValidNamespace() {
+ err = errInvalidNamespace
+ break
+ }
+ if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) {
+ err = wrapWithObjectName(ErrDuplicateName, d.buf[pos-n:pos])
+ break
+ }
+ }
+ d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
+ }
+ err = d.Tokens.appendString()
+ case '0':
+ err = d.Tokens.appendNumber()
+ case '{':
+ if err = d.Tokens.pushObject(); err != nil {
+ break
+ }
+ if err = d.Tokens.popObject(); err != nil {
+ panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
+ }
+ case '[':
+ if err = d.Tokens.pushArray(); err != nil {
+ break
+ }
+ if err = d.Tokens.popArray(); err != nil {
+ panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
+ }
+ }
+ if err != nil {
+ return nil, wrapSyntacticError(d, err, pos-n, +1) // report position at start of value
+ }
+ d.prevEnd = pos
+ d.prevStart = pos - n
+ return d.buf[pos-n : pos : pos], nil
+}
+
+// CheckNextValue checks whether the next value is syntactically valid,
+// but does not advance the read offset.
+func (d *decoderState) CheckNextValue() error {
+ d.PeekKind() // populates d.peekPos and d.peekErr
+ pos, err := d.peekPos, d.peekErr
+ d.peekPos, d.peekErr = 0, nil
+ if err != nil {
+ return err
+ }
+
+ var flags jsonwire.ValueFlags
+ if pos, err := d.consumeValue(&flags, pos, d.Tokens.Depth()); err != nil {
+ return wrapSyntacticError(d, err, pos, +1)
+ }
+ return nil
+}
+
+// CheckEOF verifies that the input has no more data.
+func (d *decoderState) CheckEOF() error {
+ switch pos, err := d.consumeWhitespace(d.prevEnd); err {
+ case nil:
+ err := jsonwire.NewInvalidCharacterError(d.buf[pos:], "after top-level value")
+ return wrapSyntacticError(d, err, pos, 0)
+ case io.ErrUnexpectedEOF:
+ return nil
+ default:
+ return err
+ }
+}
+
+// consumeWhitespace consumes all whitespace starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the last whitespace.
+// If it returns nil, there is guaranteed to at least be one unread byte.
+//
+// The following pattern is common in this implementation:
+//
+// pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+// if d.needMore(pos) {
+// if pos, err = d.consumeWhitespace(pos); err != nil {
+// return ...
+// }
+// }
+//
+// It is difficult to simplify this without sacrificing performance since
+// consumeWhitespace must be inlined. The body of the if statement is
+// executed only in rare situations where we need to fetch more data.
+// Since fetching may return an error, we also need to check the error.
+func (d *decoderState) consumeWhitespace(pos int) (newPos int, err error) {
+ for {
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ absPos := d.baseOffset + int64(pos)
+ err = d.fetch() // will mutate d.buf and invalidate pos
+ pos = int(absPos - d.baseOffset)
+ if err != nil {
+ return pos, err
+ }
+ continue
+ }
+ return pos, nil
+ }
+}
+
+// consumeValue consumes a single JSON value starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the value.
+func (d *decoderState) consumeValue(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
+ for {
+ var n int
+ var err error
+ switch next := Kind(d.buf[pos]).normalize(); next {
+ case 'n':
+ if n = jsonwire.ConsumeNull(d.buf[pos:]); n == 0 {
+ n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "null")
+ }
+ case 'f':
+ if n = jsonwire.ConsumeFalse(d.buf[pos:]); n == 0 {
+ n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "false")
+ }
+ case 't':
+ if n = jsonwire.ConsumeTrue(d.buf[pos:]); n == 0 {
+ n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "true")
+ }
+ case '"':
+ if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
+ return d.consumeString(flags, pos)
+ }
+ case '0':
+ // NOTE: Since JSON numbers are not self-terminating,
+ // we need to make sure that the next byte is not part of a number.
+ if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
+ return d.consumeNumber(pos)
+ }
+ case '{':
+ return d.consumeObject(flags, pos, depth)
+ case '[':
+ return d.consumeArray(flags, pos, depth)
+ default:
+ if (d.Tokens.Last.isObject() && next == ']') || (d.Tokens.Last.isArray() && next == '}') {
+ return pos, errMismatchDelim
+ }
+ return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "at start of value")
+ }
+ if err == io.ErrUnexpectedEOF {
+ absPos := d.baseOffset + int64(pos)
+ err = d.fetch() // will mutate d.buf and invalidate pos
+ pos = int(absPos - d.baseOffset)
+ if err != nil {
+ return pos + n, err
+ }
+ continue
+ }
+ return pos + n, err
+ }
+}
+
+// consumeLiteral consumes a single JSON literal starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the literal.
+func (d *decoderState) consumeLiteral(pos int, lit string) (newPos int, err error) {
+ for {
+ n, err := jsonwire.ConsumeLiteral(d.buf[pos:], lit)
+ if err == io.ErrUnexpectedEOF {
+ absPos := d.baseOffset + int64(pos)
+ err = d.fetch() // will mutate d.buf and invalidate pos
+ pos = int(absPos - d.baseOffset)
+ if err != nil {
+ return pos + n, err
+ }
+ continue
+ }
+ return pos + n, err
+ }
+}
+
+// consumeString consumes a single JSON string starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the string.
+func (d *decoderState) consumeString(flags *jsonwire.ValueFlags, pos int) (newPos int, err error) {
+ var n int
+ for {
+ n, err = jsonwire.ConsumeStringResumable(flags, d.buf[pos:], n, !d.Flags.Get(jsonflags.AllowInvalidUTF8))
+ if err == io.ErrUnexpectedEOF {
+ absPos := d.baseOffset + int64(pos)
+ err = d.fetch() // will mutate d.buf and invalidate pos
+ pos = int(absPos - d.baseOffset)
+ if err != nil {
+ return pos + n, err
+ }
+ continue
+ }
+ return pos + n, err
+ }
+}
+
+// consumeNumber consumes a single JSON number starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the number.
+func (d *decoderState) consumeNumber(pos int) (newPos int, err error) {
+ var n int
+ var state jsonwire.ConsumeNumberState
+ for {
+ n, state, err = jsonwire.ConsumeNumberResumable(d.buf[pos:], n, state)
+ // NOTE: Since JSON numbers are not self-terminating,
+ // we need to make sure that the next byte is not part of a number.
+ if err == io.ErrUnexpectedEOF || d.needMore(pos+n) {
+ mayTerminate := err == nil
+ absPos := d.baseOffset + int64(pos)
+ err = d.fetch() // will mutate d.buf and invalidate pos
+ pos = int(absPos - d.baseOffset)
+ if err != nil {
+ if mayTerminate && err == io.ErrUnexpectedEOF {
+ return pos + n, nil
+ }
+ return pos, err
+ }
+ continue
+ }
+ return pos + n, err
+ }
+}
+
+// consumeObject consumes a single JSON object starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the object.
+func (d *decoderState) consumeObject(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
+ var n int
+ var names *objectNamespace
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) {
+ d.Namespaces.push()
+ defer d.Namespaces.pop()
+ names = d.Namespaces.Last()
+ }
+
+ // Handle before start.
+ if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '{' {
+ panic("BUG: consumeObject must be called with a buffer that starts with '{'")
+ } else if depth == maxNestingDepth+1 {
+ return pos, errMaxDepth
+ }
+ pos++
+
+ // Handle after start.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ if d.buf[pos] == '}' {
+ pos++
+ return pos, nil
+ }
+
+ depth++
+ for {
+ // Handle before name.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ var flags2 jsonwire.ValueFlags
+ if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 {
+ oldAbsPos := d.baseOffset + int64(pos)
+ pos, err = d.consumeString(&flags2, pos)
+ newAbsPos := d.baseOffset + int64(pos)
+ n = int(newAbsPos - oldAbsPos)
+ flags.Join(flags2)
+ if err != nil {
+ return pos, err
+ }
+ } else {
+ pos += n
+ }
+ quotedName := d.buf[pos-n : pos]
+ if !d.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, flags2.IsVerbatim()) {
+ return pos - n, wrapWithObjectName(ErrDuplicateName, quotedName)
+ }
+
+ // Handle after name.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, wrapWithObjectName(err, quotedName)
+ }
+ }
+ if d.buf[pos] != ':' {
+ err := jsonwire.NewInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')")
+ return pos, wrapWithObjectName(err, quotedName)
+ }
+ pos++
+
+ // Handle before value.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, wrapWithObjectName(err, quotedName)
+ }
+ }
+ pos, err = d.consumeValue(flags, pos, depth)
+ if err != nil {
+ return pos, wrapWithObjectName(err, quotedName)
+ }
+
+ // Handle after value.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ switch d.buf[pos] {
+ case ',':
+ pos++
+ continue
+ case '}':
+ pos++
+ return pos, nil
+ default:
+ return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')")
+ }
+ }
+}
+
+// consumeArray consumes a single JSON array starting at d.buf[pos:].
+// It returns the new position in d.buf immediately after the array.
+func (d *decoderState) consumeArray(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) {
+ // Handle before start.
+ if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '[' {
+ panic("BUG: consumeArray must be called with a buffer that starts with '['")
+ } else if depth == maxNestingDepth+1 {
+ return pos, errMaxDepth
+ }
+ pos++
+
+ // Handle after start.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ if d.buf[pos] == ']' {
+ pos++
+ return pos, nil
+ }
+
+ var idx int64
+ depth++
+ for {
+ // Handle before value.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ pos, err = d.consumeValue(flags, pos, depth)
+ if err != nil {
+ return pos, wrapWithArrayIndex(err, idx)
+ }
+
+ // Handle after value.
+ pos += jsonwire.ConsumeWhitespace(d.buf[pos:])
+ if d.needMore(pos) {
+ if pos, err = d.consumeWhitespace(pos); err != nil {
+ return pos, err
+ }
+ }
+ switch d.buf[pos] {
+ case ',':
+ pos++
+ idx++
+ continue
+ case ']':
+ pos++
+ return pos, nil
+ default:
+ return pos, jsonwire.NewInvalidCharacterError(d.buf[pos:], "after array element (expecting ',' or ']')")
+ }
+ }
+}
+
+// InputOffset returns the current input byte offset. It gives the location
+// of the next byte immediately after the most recently returned token or value.
+// The number of bytes actually read from the underlying [io.Reader] may be more
+// than this offset due to internal buffering effects.
+func (d *Decoder) InputOffset() int64 {
+ return d.s.previousOffsetEnd()
+}
+
+// UnreadBuffer returns the data remaining in the unread buffer,
+// which may contain zero or more bytes.
+// The returned buffer must not be mutated while Decoder continues to be used.
+// The buffer contents are valid until the next Peek, Read, or Skip call.
+func (d *Decoder) UnreadBuffer() []byte {
+ return d.s.unreadBuffer()
+}
+
+// StackDepth returns the depth of the state machine for read JSON data.
+// Each level on the stack represents a nested JSON object or array.
+// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
+// and decremented whenever an [EndObject] or [EndArray] token is encountered.
+// The depth is zero-indexed, where zero represents the top-level JSON value.
+func (d *Decoder) StackDepth() int {
+ // NOTE: Keep in sync with Encoder.StackDepth.
+ return d.s.Tokens.Depth() - 1
+}
+
+// StackIndex returns information about the specified stack level.
+// It must be a number between 0 and [Decoder.StackDepth], inclusive.
+// For each level, it reports the kind:
+//
+// - 0 for a level of zero,
+// - '{' for a level representing a JSON object, and
+// - '[' for a level representing a JSON array.
+//
+// It also reports the length of that JSON object or array.
+// Each name and value in a JSON object is counted separately,
+// so the effective number of members would be half the length.
+// A complete JSON object must have an even length.
+func (d *Decoder) StackIndex(i int) (Kind, int64) {
+ // NOTE: Keep in sync with Encoder.StackIndex.
+ switch s := d.s.Tokens.index(i); {
+ case i > 0 && s.isObject():
+ return '{', s.Length()
+ case i > 0 && s.isArray():
+ return '[', s.Length()
+ default:
+ return 0, s.Length()
+ }
+}
+
+// StackPointer returns a JSON Pointer (RFC 6901) to the most recently read value.
+func (d *Decoder) StackPointer() Pointer {
+ return Pointer(d.s.AppendStackPointer(nil, -1))
+}
+
+func (d *decoderState) AppendStackPointer(b []byte, where int) []byte {
+ d.Names.copyQuotedBuffer(d.buf)
+ return d.state.appendStackPointer(b, where)
+}