state.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"errors"
    11  	"iter"
    12  	"math"
    13  	"strconv"
    14  	"strings"
    15  	"unicode/utf8"
    16  
    17  	"encoding/json/internal/jsonwire"
    18  )
    19  
    20  // ErrDuplicateName indicates that a JSON token could not be
    21  // encoded or decoded because it results in a duplicate JSON object name.
    22  // This error is directly wrapped within a [SyntacticError] when produced.
    23  //
    24  // The name of a duplicate JSON object member can be extracted as:
    25  //
    26  //	err := ...
    27  //	var serr jsontext.SyntacticError
    28  //	if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
    29  //		ptr := serr.JSONPointer // JSON pointer to duplicate name
    30  //		name := ptr.LastToken() // duplicate name itself
    31  //		...
    32  //	}
    33  //
    34  // This error is only returned if [AllowDuplicateNames] is false.
    35  var ErrDuplicateName = errors.New("duplicate object member name")
    36  
    37  // ErrNonStringName indicates that a JSON token could not be
    38  // encoded or decoded because it is not a string,
    39  // as required for JSON object names according to RFC 8259, section 4.
    40  // This error is directly wrapped within a [SyntacticError] when produced.
    41  var ErrNonStringName = errors.New("object member name must be a string")
    42  
    43  var (
    44  	errMissingValue  = errors.New("missing value after object name")
    45  	errMismatchDelim = errors.New("mismatching structural token for object or array")
    46  	errMaxDepth      = errors.New("exceeded max depth")
    47  
    48  	errInvalidNamespace = errors.New("object namespace is in an invalid state")
    49  )
    50  
    51  // Per RFC 8259, section 9, implementations may enforce a maximum depth.
    52  // Such a limit is necessary to prevent stack overflows.
    53  const maxNestingDepth = 10000
    54  
    55  type state struct {
    56  	// Tokens validates whether the next token kind is valid.
    57  	Tokens stateMachine
    58  
    59  	// Names is a stack of object names.
    60  	Names objectNameStack
    61  
    62  	// Namespaces is a stack of object namespaces.
    63  	// For performance reasons, Encoder or Decoder may not update this
    64  	// if Marshal or Unmarshal is able to track names in a more efficient way.
    65  	// See makeMapArshaler and makeStructArshaler.
    66  	// Not used if AllowDuplicateNames is true.
    67  	Namespaces objectNamespaceStack
    68  }
    69  
    70  // needObjectValue reports whether the next token should be an object value.
    71  // This method is used by [wrapSyntacticError].
    72  func (s *state) needObjectValue() bool {
    73  	return s.Tokens.Last.needObjectValue()
    74  }
    75  
    76  func (s *state) reset() {
    77  	s.Tokens.reset()
    78  	s.Names.reset()
    79  	s.Namespaces.reset()
    80  }
    81  
    82  // Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
    83  // relative to the root of the top-level JSON value.
    84  //
    85  // A Pointer is a slash-separated list of tokens, where each token is
    86  // either a JSON object name or an index to a JSON array element
    87  // encoded as a base-10 integer value.
    88  // It is impossible to distinguish between an array index and an object name
    89  // (that happens to be an base-10 encoded integer) without also knowing
    90  // the structure of the top-level JSON value that the pointer refers to.
    91  //
    92  // There is exactly one representation of a pointer to a particular value,
    93  // so comparability of Pointer values is equivalent to checking whether
    94  // they both point to the exact same value.
    95  type Pointer string
    96  
    97  // IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
    98  // Note that the concatenation of two valid pointers produces a valid pointer.
    99  func (p Pointer) IsValid() bool {
   100  	for i, r := range p {
   101  		switch {
   102  		case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
   103  			return false // invalid escape
   104  		case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
   105  			return false // invalid UTF-8
   106  		}
   107  	}
   108  	return len(p) == 0 || p[0] == '/'
   109  }
   110  
   111  // Contains reports whether the JSON value that p points to
   112  // is equal to or contains the JSON value that pc points to.
   113  func (p Pointer) Contains(pc Pointer) bool {
   114  	// Invariant: len(p) <= len(pc) if p.Contains(pc)
   115  	suffix, ok := strings.CutPrefix(string(pc), string(p))
   116  	return ok && (suffix == "" || suffix[0] == '/')
   117  }
   118  
   119  // Parent strips off the last token and returns the remaining pointer.
   120  // The parent of an empty p is an empty string.
   121  func (p Pointer) Parent() Pointer {
   122  	return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
   123  }
   124  
   125  // LastToken returns the last token in the pointer.
   126  // The last token of an empty p is an empty string.
   127  func (p Pointer) LastToken() string {
   128  	last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
   129  	return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
   130  }
   131  
   132  // AppendToken appends a token to the end of p and returns the full pointer.
   133  func (p Pointer) AppendToken(tok string) Pointer {
   134  	return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
   135  }
   136  
   137  // TODO: Add Pointer.AppendTokens,
   138  // but should this take in a ...string or an iter.Seq[string]?
   139  
   140  // Tokens returns an iterator over the reference tokens in the JSON pointer,
   141  // starting from the first token until the last token (unless stopped early).
   142  func (p Pointer) Tokens() iter.Seq[string] {
   143  	return func(yield func(string) bool) {
   144  		for len(p) > 0 {
   145  			p = Pointer(strings.TrimPrefix(string(p), "/"))
   146  			i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
   147  			if !yield(unescapePointerToken(string(p)[:i])) {
   148  				return
   149  			}
   150  			p = p[i:]
   151  		}
   152  	}
   153  }
   154  
   155  func unescapePointerToken(token string) string {
   156  	if strings.Contains(token, "~") {
   157  		// Per RFC 6901, section 3, unescape '~' and '/' characters.
   158  		token = strings.ReplaceAll(token, "~1", "/")
   159  		token = strings.ReplaceAll(token, "~0", "~")
   160  	}
   161  	return token
   162  }
   163  
   164  // appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
   165  //
   166  //   - If where is -1, then it points to the previously processed token.
   167  //
   168  //   - If where is 0, then it points to the parent JSON object or array,
   169  //     or an object member if in-between an object member key and value.
   170  //     This is useful when the position is ambiguous whether
   171  //     we are interested in the previous or next token, or
   172  //     when we are uncertain whether the next token
   173  //     continues or terminates the current object or array.
   174  //
   175  //   - If where is +1, then it points to the next expected value,
   176  //     assuming that it continues the current JSON object or array.
   177  //     As a special case, if the next token is a JSON object name,
   178  //     then it points to the parent JSON object.
   179  //
   180  // Invariant: Must call s.names.copyQuotedBuffer beforehand.
   181  func (s state) appendStackPointer(b []byte, where int) []byte {
   182  	var objectDepth int
   183  	for i := 1; i < s.Tokens.Depth(); i++ {
   184  		e := s.Tokens.index(i)
   185  		arrayDelta := -1 // by default point to previous array element
   186  		if isLast := i == s.Tokens.Depth()-1; isLast {
   187  			switch {
   188  			case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
   189  				return b
   190  			case where > 0 && e.isArray():
   191  				arrayDelta = 0 // point to next array element
   192  			}
   193  		}
   194  		switch {
   195  		case e.isObject():
   196  			b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
   197  			objectDepth++
   198  		case e.isArray():
   199  			b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
   200  		}
   201  	}
   202  	return b
   203  }
   204  
   205  func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
   206  	for _, r := range string(name) {
   207  		// Per RFC 6901, section 3, escape '~' and '/' characters.
   208  		switch r {
   209  		case '~':
   210  			b = append(b, "~0"...)
   211  		case '/':
   212  			b = append(b, "~1"...)
   213  		default:
   214  			b = utf8.AppendRune(b, r)
   215  		}
   216  	}
   217  	return b
   218  }
   219  
   220  // stateMachine is a push-down automaton that validates whether
   221  // a sequence of tokens is valid or not according to the JSON grammar.
   222  // It is useful for both encoding and decoding.
   223  //
   224  // It is a stack where each entry represents a nested JSON object or array.
   225  // The stack has a minimum depth of 1 where the first level is a
   226  // virtual JSON array to handle a stream of top-level JSON values.
   227  // The top-level virtual JSON array is special in that it doesn't require commas
   228  // between each JSON value.
   229  //
   230  // For performance, most methods are carefully written to be inlinable.
   231  // The zero value is a valid state machine ready for use.
   232  type stateMachine struct {
   233  	Stack []stateEntry
   234  	Last  stateEntry
   235  }
   236  
   237  // reset resets the state machine.
   238  // The machine always starts with a minimum depth of 1.
   239  func (m *stateMachine) reset() {
   240  	m.Stack = m.Stack[:0]
   241  	if cap(m.Stack) > 1<<10 {
   242  		m.Stack = nil
   243  	}
   244  	m.Last = stateTypeArray
   245  }
   246  
   247  // Depth is the current nested depth of JSON objects and arrays.
   248  // It is one-indexed (i.e., top-level values have a depth of 1).
   249  func (m stateMachine) Depth() int {
   250  	return len(m.Stack) + 1
   251  }
   252  
   253  // index returns a reference to the ith entry.
   254  // It is only valid until the next push method call.
   255  func (m *stateMachine) index(i int) *stateEntry {
   256  	if i == len(m.Stack) {
   257  		return &m.Last
   258  	}
   259  	return &m.Stack[i]
   260  }
   261  
   262  // DepthLength reports the current nested depth and
   263  // the length of the last JSON object or array.
   264  func (m stateMachine) DepthLength() (int, int64) {
   265  	return m.Depth(), m.Last.Length()
   266  }
   267  
   268  // appendLiteral appends a JSON literal as the next token in the sequence.
   269  // If an error is returned, the state is not mutated.
   270  func (m *stateMachine) appendLiteral() error {
   271  	switch {
   272  	case m.Last.NeedObjectName():
   273  		return ErrNonStringName
   274  	case !m.Last.isValidNamespace():
   275  		return errInvalidNamespace
   276  	default:
   277  		m.Last.Increment()
   278  		return nil
   279  	}
   280  }
   281  
   282  // appendString appends a JSON string as the next token in the sequence.
   283  // If an error is returned, the state is not mutated.
   284  func (m *stateMachine) appendString() error {
   285  	switch {
   286  	case !m.Last.isValidNamespace():
   287  		return errInvalidNamespace
   288  	default:
   289  		m.Last.Increment()
   290  		return nil
   291  	}
   292  }
   293  
   294  // appendNumber appends a JSON number as the next token in the sequence.
   295  // If an error is returned, the state is not mutated.
   296  func (m *stateMachine) appendNumber() error {
   297  	return m.appendLiteral()
   298  }
   299  
   300  // pushObject appends a JSON begin object token as next in the sequence.
   301  // If an error is returned, the state is not mutated.
   302  func (m *stateMachine) pushObject() error {
   303  	switch {
   304  	case m.Last.NeedObjectName():
   305  		return ErrNonStringName
   306  	case !m.Last.isValidNamespace():
   307  		return errInvalidNamespace
   308  	case len(m.Stack) == maxNestingDepth:
   309  		return errMaxDepth
   310  	default:
   311  		m.Last.Increment()
   312  		m.Stack = append(m.Stack, m.Last)
   313  		m.Last = stateTypeObject
   314  		return nil
   315  	}
   316  }
   317  
   318  // popObject appends a JSON end object token as next in the sequence.
   319  // If an error is returned, the state is not mutated.
   320  func (m *stateMachine) popObject() error {
   321  	switch {
   322  	case !m.Last.isObject():
   323  		return errMismatchDelim
   324  	case m.Last.needObjectValue():
   325  		return errMissingValue
   326  	case !m.Last.isValidNamespace():
   327  		return errInvalidNamespace
   328  	default:
   329  		m.Last = m.Stack[len(m.Stack)-1]
   330  		m.Stack = m.Stack[:len(m.Stack)-1]
   331  		return nil
   332  	}
   333  }
   334  
   335  // pushArray appends a JSON begin array token as next in the sequence.
   336  // If an error is returned, the state is not mutated.
   337  func (m *stateMachine) pushArray() error {
   338  	switch {
   339  	case m.Last.NeedObjectName():
   340  		return ErrNonStringName
   341  	case !m.Last.isValidNamespace():
   342  		return errInvalidNamespace
   343  	case len(m.Stack) == maxNestingDepth:
   344  		return errMaxDepth
   345  	default:
   346  		m.Last.Increment()
   347  		m.Stack = append(m.Stack, m.Last)
   348  		m.Last = stateTypeArray
   349  		return nil
   350  	}
   351  }
   352  
   353  // popArray appends a JSON end array token as next in the sequence.
   354  // If an error is returned, the state is not mutated.
   355  func (m *stateMachine) popArray() error {
   356  	switch {
   357  	case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
   358  		return errMismatchDelim
   359  	case !m.Last.isValidNamespace():
   360  		return errInvalidNamespace
   361  	default:
   362  		m.Last = m.Stack[len(m.Stack)-1]
   363  		m.Stack = m.Stack[:len(m.Stack)-1]
   364  		return nil
   365  	}
   366  }
   367  
   368  // NeedIndent reports whether indent whitespace should be injected.
   369  // A zero value means that no whitespace should be injected.
   370  // A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
   371  // should be appended to the output immediately before the next token.
   372  func (m stateMachine) NeedIndent(next Kind) (n int) {
   373  	willEnd := next == '}' || next == ']'
   374  	switch {
   375  	case m.Depth() == 1:
   376  		return 0 // top-level values are never indented
   377  	case m.Last.Length() == 0 && willEnd:
   378  		return 0 // an empty object or array is never indented
   379  	case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
   380  		return m.Depth()
   381  	case willEnd:
   382  		return m.Depth() - 1
   383  	default:
   384  		return 0
   385  	}
   386  }
   387  
   388  // MayAppendDelim appends a colon or comma that may precede the next token.
   389  func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
   390  	switch {
   391  	case m.Last.needImplicitColon():
   392  		return append(b, ':')
   393  	case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
   394  		return append(b, ',')
   395  	default:
   396  		return b
   397  	}
   398  }
   399  
   400  // needDelim reports whether a colon or comma token should be implicitly emitted
   401  // before the next token of the specified kind.
   402  // A zero value means no delimiter should be emitted.
   403  func (m stateMachine) needDelim(next Kind) (delim byte) {
   404  	switch {
   405  	case m.Last.needImplicitColon():
   406  		return ':'
   407  	case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
   408  		return ','
   409  	default:
   410  		return 0
   411  	}
   412  }
   413  
   414  // InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
   415  //
   416  // For efficiency, Marshal and Unmarshal may disable namespaces since there are
   417  // more efficient ways to track duplicate names. However, if an error occurs,
   418  // the namespaces in Encoder or Decoder will be left in an inconsistent state.
   419  // Mark the namespaces as invalid so that future method calls on
   420  // Encoder or Decoder will return an error.
   421  func (m *stateMachine) InvalidateDisabledNamespaces() {
   422  	for i := range m.Depth() {
   423  		e := m.index(i)
   424  		if !e.isActiveNamespace() {
   425  			e.invalidateNamespace()
   426  		}
   427  	}
   428  }
   429  
   430  // stateEntry encodes several artifacts within a single unsigned integer:
   431  //   - whether this represents a JSON object or array,
   432  //   - whether this object should check for duplicate names, and
   433  //   - how many elements are in this JSON object or array.
   434  type stateEntry uint64
   435  
   436  const (
   437  	// The type mask (1 bit) records whether this is a JSON object or array.
   438  	stateTypeMask   stateEntry = 0x8000_0000_0000_0000
   439  	stateTypeObject stateEntry = 0x8000_0000_0000_0000
   440  	stateTypeArray  stateEntry = 0x0000_0000_0000_0000
   441  
   442  	// The name check mask (2 bit) records whether to update
   443  	// the namespaces for the current JSON object and
   444  	// whether the namespace is valid.
   445  	stateNamespaceMask    stateEntry = 0x6000_0000_0000_0000
   446  	stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
   447  	stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
   448  
   449  	// The count mask (61 bits) records the number of elements.
   450  	stateCountMask    stateEntry = 0x1fff_ffff_ffff_ffff
   451  	stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
   452  	stateCountOdd     stateEntry = 0x0000_0000_0000_0001
   453  	stateCountEven    stateEntry = 0x0000_0000_0000_0000
   454  )
   455  
   456  // Length reports the number of elements in the JSON object or array.
   457  // Each name and value in an object entry is treated as a separate element.
   458  func (e stateEntry) Length() int64 {
   459  	return int64(e & stateCountMask)
   460  }
   461  
   462  // isObject reports whether this is a JSON object.
   463  func (e stateEntry) isObject() bool {
   464  	return e&stateTypeMask == stateTypeObject
   465  }
   466  
   467  // isArray reports whether this is a JSON array.
   468  func (e stateEntry) isArray() bool {
   469  	return e&stateTypeMask == stateTypeArray
   470  }
   471  
   472  // NeedObjectName reports whether the next token must be a JSON string,
   473  // which is necessary for JSON object names.
   474  func (e stateEntry) NeedObjectName() bool {
   475  	return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
   476  }
   477  
   478  // needImplicitColon reports whether an colon should occur next,
   479  // which always occurs after JSON object names.
   480  func (e stateEntry) needImplicitColon() bool {
   481  	return e.needObjectValue()
   482  }
   483  
   484  // needObjectValue reports whether the next token must be a JSON value,
   485  // which is necessary after every JSON object name.
   486  func (e stateEntry) needObjectValue() bool {
   487  	return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
   488  }
   489  
   490  // needImplicitComma reports whether an comma should occur next,
   491  // which always occurs after a value in a JSON object or array
   492  // before the next value (or name).
   493  func (e stateEntry) needImplicitComma(next Kind) bool {
   494  	return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
   495  }
   496  
   497  // Increment increments the number of elements for the current object or array.
   498  // This assumes that overflow won't practically be an issue since
   499  // 1<<bits.OnesCount(stateCountMask) is sufficiently large.
   500  func (e *stateEntry) Increment() {
   501  	(*e)++
   502  }
   503  
   504  // decrement decrements the number of elements for the current object or array.
   505  // It is the callers responsibility to ensure that e.length > 0.
   506  func (e *stateEntry) decrement() {
   507  	(*e)--
   508  }
   509  
   510  // DisableNamespace disables the JSON object namespace such that the
   511  // Encoder or Decoder no longer updates the namespace.
   512  func (e *stateEntry) DisableNamespace() {
   513  	*e |= stateDisableNamespace
   514  }
   515  
   516  // isActiveNamespace reports whether the JSON object namespace is actively
   517  // being updated and used for duplicate name checks.
   518  func (e stateEntry) isActiveNamespace() bool {
   519  	return e&(stateDisableNamespace) == 0
   520  }
   521  
   522  // invalidateNamespace marks the JSON object namespace as being invalid.
   523  func (e *stateEntry) invalidateNamespace() {
   524  	*e |= stateInvalidNamespace
   525  }
   526  
   527  // isValidNamespace reports whether the JSON object namespace is valid.
   528  func (e stateEntry) isValidNamespace() bool {
   529  	return e&(stateInvalidNamespace) == 0
   530  }
   531  
   532  // objectNameStack is a stack of names when descending into a JSON object.
   533  // In contrast to objectNamespaceStack, this only has to remember a single name
   534  // per JSON object.
   535  //
   536  // This data structure may contain offsets to encodeBuffer or decodeBuffer.
   537  // It violates clean abstraction of layers, but is significantly more efficient.
   538  // This ensures that popping and pushing in the common case is a trivial
   539  // push/pop of an offset integer.
   540  //
   541  // The zero value is an empty names stack ready for use.
   542  type objectNameStack struct {
   543  	// offsets is a stack of offsets for each name.
   544  	// A non-negative offset is the ending offset into the local names buffer.
   545  	// A negative offset is the bit-wise inverse of a starting offset into
   546  	// a remote buffer (e.g., encodeBuffer or decodeBuffer).
   547  	// A math.MinInt offset at the end implies that the last object is empty.
   548  	// Invariant: Positive offsets always occur before negative offsets.
   549  	offsets []int
   550  	// unquotedNames is a back-to-back concatenation of names.
   551  	unquotedNames []byte
   552  }
   553  
   554  func (ns *objectNameStack) reset() {
   555  	ns.offsets = ns.offsets[:0]
   556  	ns.unquotedNames = ns.unquotedNames[:0]
   557  	if cap(ns.offsets) > 1<<6 {
   558  		ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
   559  	}
   560  	if cap(ns.unquotedNames) > 1<<10 {
   561  		ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
   562  	}
   563  }
   564  
   565  func (ns *objectNameStack) length() int {
   566  	return len(ns.offsets)
   567  }
   568  
   569  // getUnquoted retrieves the ith unquoted name in the stack.
   570  // It returns an empty string if the last object is empty.
   571  //
   572  // Invariant: Must call copyQuotedBuffer beforehand.
   573  func (ns *objectNameStack) getUnquoted(i int) []byte {
   574  	ns.ensureCopiedBuffer()
   575  	if i == 0 {
   576  		return ns.unquotedNames[:ns.offsets[0]]
   577  	} else {
   578  		return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
   579  	}
   580  }
   581  
   582  // invalidOffset indicates that the last JSON object currently has no name.
   583  const invalidOffset = math.MinInt
   584  
   585  // push descends into a nested JSON object.
   586  func (ns *objectNameStack) push() {
   587  	ns.offsets = append(ns.offsets, invalidOffset)
   588  }
   589  
   590  // ReplaceLastQuotedOffset replaces the last name with the starting offset
   591  // to the quoted name in some remote buffer. All offsets provided must be
   592  // relative to the same buffer until copyQuotedBuffer is called.
   593  func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
   594  	// Use bit-wise inversion instead of naive multiplication by -1 to avoid
   595  	// ambiguity regarding zero (which is a valid offset into the names field).
   596  	// Bit-wise inversion is mathematically equivalent to -i-1,
   597  	// such that 0 becomes -1, 1 becomes -2, and so forth.
   598  	// This ensures that remote offsets are always negative.
   599  	ns.offsets[len(ns.offsets)-1] = ^i
   600  }
   601  
   602  // replaceLastUnquotedName replaces the last name with the provided name.
   603  //
   604  // Invariant: Must call copyQuotedBuffer beforehand.
   605  func (ns *objectNameStack) replaceLastUnquotedName(s string) {
   606  	ns.ensureCopiedBuffer()
   607  	var startOffset int
   608  	if len(ns.offsets) > 1 {
   609  		startOffset = ns.offsets[len(ns.offsets)-2]
   610  	}
   611  	ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
   612  	ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
   613  }
   614  
   615  // clearLast removes any name in the last JSON object.
   616  // It is semantically equivalent to ns.push followed by ns.pop.
   617  func (ns *objectNameStack) clearLast() {
   618  	ns.offsets[len(ns.offsets)-1] = invalidOffset
   619  }
   620  
   621  // pop ascends out of a nested JSON object.
   622  func (ns *objectNameStack) pop() {
   623  	ns.offsets = ns.offsets[:len(ns.offsets)-1]
   624  }
   625  
   626  // copyQuotedBuffer copies names from the remote buffer into the local names
   627  // buffer so that there are no more offset references into the remote buffer.
   628  // This allows the remote buffer to change contents without affecting
   629  // the names that this data structure is trying to remember.
   630  func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
   631  	// Find the first negative offset.
   632  	var i int
   633  	for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
   634  		continue
   635  	}
   636  
   637  	// Copy each name from the remote buffer into the local buffer.
   638  	for i = i + 1; i < len(ns.offsets); i++ {
   639  		if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
   640  			if i == 0 {
   641  				ns.offsets[i] = 0
   642  			} else {
   643  				ns.offsets[i] = ns.offsets[i-1]
   644  			}
   645  			break // last JSON object had a push without any names
   646  		}
   647  
   648  		// As a form of Hyrum proofing, we write an invalid character into the
   649  		// buffer to make misuse of Decoder.ReadToken more obvious.
   650  		// We need to undo that mutation here.
   651  		quotedName := b[^ns.offsets[i]:]
   652  		if quotedName[0] == invalidateBufferByte {
   653  			quotedName[0] = '"'
   654  		}
   655  
   656  		// Append the unquoted name to the local buffer.
   657  		var startOffset int
   658  		if i > 0 {
   659  			startOffset = ns.offsets[i-1]
   660  		}
   661  		if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
   662  			ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
   663  		} else {
   664  			ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
   665  		}
   666  		ns.offsets[i] = len(ns.unquotedNames)
   667  	}
   668  }
   669  
   670  func (ns *objectNameStack) ensureCopiedBuffer() {
   671  	if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
   672  		panic("BUG: copyQuotedBuffer not called beforehand")
   673  	}
   674  }
   675  
   676  // objectNamespaceStack is a stack of object namespaces.
   677  // This data structure assists in detecting duplicate names.
   678  type objectNamespaceStack []objectNamespace
   679  
   680  // reset resets the object namespace stack.
   681  func (nss *objectNamespaceStack) reset() {
   682  	if cap(*nss) > 1<<10 {
   683  		*nss = nil
   684  	}
   685  	*nss = (*nss)[:0]
   686  }
   687  
   688  // push starts a new namespace for a nested JSON object.
   689  func (nss *objectNamespaceStack) push() {
   690  	if cap(*nss) > len(*nss) {
   691  		*nss = (*nss)[:len(*nss)+1]
   692  		nss.Last().reset()
   693  	} else {
   694  		*nss = append(*nss, objectNamespace{})
   695  	}
   696  }
   697  
   698  // Last returns a pointer to the last JSON object namespace.
   699  func (nss objectNamespaceStack) Last() *objectNamespace {
   700  	return &nss[len(nss)-1]
   701  }
   702  
   703  // pop terminates the namespace for a nested JSON object.
   704  func (nss *objectNamespaceStack) pop() {
   705  	*nss = (*nss)[:len(*nss)-1]
   706  }
   707  
   708  // objectNamespace is the namespace for a JSON object.
   709  // In contrast to objectNameStack, this needs to remember a all names
   710  // per JSON object.
   711  //
   712  // The zero value is an empty namespace ready for use.
   713  type objectNamespace struct {
   714  	// It relies on a linear search over all the names before switching
   715  	// to use a Go map for direct lookup.
   716  
   717  	// endOffsets is a list of offsets to the end of each name in buffers.
   718  	// The length of offsets is the number of names in the namespace.
   719  	endOffsets []uint
   720  	// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
   721  	allUnquotedNames []byte
   722  	// mapNames is a Go map containing every name in the namespace.
   723  	// Only valid if non-nil.
   724  	mapNames map[string]struct{}
   725  }
   726  
   727  // reset resets the namespace to be empty.
   728  func (ns *objectNamespace) reset() {
   729  	ns.endOffsets = ns.endOffsets[:0]
   730  	ns.allUnquotedNames = ns.allUnquotedNames[:0]
   731  	ns.mapNames = nil
   732  	if cap(ns.endOffsets) > 1<<6 {
   733  		ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
   734  	}
   735  	if cap(ns.allUnquotedNames) > 1<<10 {
   736  		ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
   737  	}
   738  }
   739  
   740  // length reports the number of names in the namespace.
   741  func (ns *objectNamespace) length() int {
   742  	return len(ns.endOffsets)
   743  }
   744  
   745  // getUnquoted retrieves the ith unquoted name in the namespace.
   746  func (ns *objectNamespace) getUnquoted(i int) []byte {
   747  	if i == 0 {
   748  		return ns.allUnquotedNames[:ns.endOffsets[0]]
   749  	} else {
   750  		return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
   751  	}
   752  }
   753  
   754  // lastUnquoted retrieves the last name in the namespace.
   755  func (ns *objectNamespace) lastUnquoted() []byte {
   756  	return ns.getUnquoted(ns.length() - 1)
   757  }
   758  
   759  // insertQuoted inserts a name and reports whether it was inserted,
   760  // which only occurs if name is not already in the namespace.
   761  // The provided name must be a valid JSON string.
   762  func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
   763  	if isVerbatim {
   764  		name = name[len(`"`) : len(name)-len(`"`)]
   765  	}
   766  	return ns.insert(name, !isVerbatim)
   767  }
   768  func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
   769  	return ns.insert(name, false)
   770  }
   771  func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
   772  	var allNames []byte
   773  	if quoted {
   774  		allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
   775  	} else {
   776  		allNames = append(ns.allUnquotedNames, name...)
   777  	}
   778  	name = allNames[len(ns.allUnquotedNames):]
   779  
   780  	// Switch to a map if the buffer is too large for linear search.
   781  	// This does not add the current name to the map.
   782  	if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
   783  		ns.mapNames = make(map[string]struct{})
   784  		var startOffset uint
   785  		for _, endOffset := range ns.endOffsets {
   786  			name := ns.allUnquotedNames[startOffset:endOffset]
   787  			ns.mapNames[string(name)] = struct{}{} // allocates a new string
   788  			startOffset = endOffset
   789  		}
   790  	}
   791  
   792  	if ns.mapNames == nil {
   793  		// Perform linear search over the buffer to find matching names.
   794  		// It provides O(n) lookup, but does not require any allocations.
   795  		var startOffset uint
   796  		for _, endOffset := range ns.endOffsets {
   797  			if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
   798  				return false
   799  			}
   800  			startOffset = endOffset
   801  		}
   802  	} else {
   803  		// Use the map if it is populated.
   804  		// It provides O(1) lookup, but requires a string allocation per name.
   805  		if _, ok := ns.mapNames[string(name)]; ok {
   806  			return false
   807  		}
   808  		ns.mapNames[string(name)] = struct{}{} // allocates a new string
   809  	}
   810  
   811  	ns.allUnquotedNames = allNames
   812  	ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
   813  	return true
   814  }
   815  
   816  // removeLast removes the last name in the namespace.
   817  func (ns *objectNamespace) removeLast() {
   818  	if ns.mapNames != nil {
   819  		delete(ns.mapNames, string(ns.lastUnquoted()))
   820  	}
   821  	if ns.length()-1 == 0 {
   822  		ns.endOffsets = ns.endOffsets[:0]
   823  		ns.allUnquotedNames = ns.allUnquotedNames[:0]
   824  	} else {
   825  		ns.endOffsets = ns.endOffsets[:ns.length()-1]
   826  		ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
   827  	}
   828  }
   829
View as plain text