Source file src/encoding/json/jsontext/state.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"errors"
    11  	"iter"
    12  	"math"
    13  	"strconv"
    14  	"strings"
    15  	"unicode/utf8"
    16  
    17  	"encoding/json/internal/jsonwire"
    18  )
    19  
    20  // ErrDuplicateName indicates that a JSON token could not be
    21  // encoded or decoded because it results in a duplicate JSON object name.
    22  // This error is directly wrapped within a [SyntacticError] when produced.
    23  //
    24  // The name of a duplicate JSON object member can be extracted as:
    25  //
    26  //	err := ...
    27  //	if serr, ok := errors.AsType[jsontext.SyntacticError](err); ok && serr.Err == jsontext.ErrDuplicateName {
    28  //		ptr := serr.JSONPointer // JSON pointer to duplicate name
    29  //		name := ptr.LastToken() // duplicate name itself
    30  //		...
    31  //	}
    32  //
    33  // This error is only returned if [AllowDuplicateNames] is false.
    34  var ErrDuplicateName = errors.New("duplicate object member name")
    35  
    36  // ErrNonStringName indicates that a JSON token could not be
    37  // encoded or decoded because it is not a string,
    38  // as required for JSON object names according to RFC 8259, section 4.
    39  // This error is directly wrapped within a [SyntacticError] when produced.
    40  var ErrNonStringName = errors.New("object member name must be a string")
    41  
    42  var (
    43  	errMissingValue  = errors.New("missing value after object name")
    44  	errMismatchDelim = errors.New("mismatching structural token for object or array")
    45  	errMaxDepth      = errors.New("exceeded max depth")
    46  
    47  	errInvalidNamespace = errors.New("object namespace is in an invalid state")
    48  )
    49  
    50  // Per RFC 8259, section 9, implementations may enforce a maximum depth.
    51  // Such a limit is necessary to prevent stack overflows.
    52  const maxNestingDepth = 10000
    53  
    54  type state struct {
    55  	// Tokens validates whether the next token kind is valid.
    56  	Tokens stateMachine
    57  
    58  	// Names is a stack of object names.
    59  	Names objectNameStack
    60  
    61  	// Namespaces is a stack of object namespaces.
    62  	// For performance reasons, Encoder or Decoder may not update this
    63  	// if Marshal or Unmarshal is able to track names in a more efficient way.
    64  	// See makeMapArshaler and makeStructArshaler.
    65  	// Not used if AllowDuplicateNames is true.
    66  	Namespaces objectNamespaceStack
    67  }
    68  
    69  // needObjectValue reports whether the next token should be an object value.
    70  // This method is used by [wrapSyntacticError].
    71  func (s *state) needObjectValue() bool {
    72  	return s.Tokens.Last.needObjectValue()
    73  }
    74  
    75  func (s *state) reset() {
    76  	s.Tokens.reset()
    77  	s.Names.reset()
    78  	s.Namespaces.reset()
    79  }
    80  
    81  // Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
    82  // relative to the root of the top-level JSON value.
    83  //
    84  // A Pointer is a slash-separated list of tokens, where each token is
    85  // either a JSON object name or an index to a JSON array element
    86  // encoded as a base-10 integer value.
    87  // It is impossible to distinguish between an array index and an object name
    88  // (that happens to be an base-10 encoded integer) without also knowing
    89  // the structure of the top-level JSON value that the pointer refers to.
    90  //
    91  // There is exactly one representation of a pointer to a particular value,
    92  // so comparability of Pointer values is equivalent to checking whether
    93  // they both point to the exact same value.
    94  type Pointer string
    95  
    96  // IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
    97  // Note that the concatenation of two valid pointers produces a valid pointer.
    98  func (p Pointer) IsValid() bool {
    99  	for i, r := range p {
   100  		switch {
   101  		case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
   102  			return false // invalid escape
   103  		case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
   104  			return false // invalid UTF-8
   105  		}
   106  	}
   107  	return len(p) == 0 || p[0] == '/'
   108  }
   109  
   110  // Contains reports whether the JSON value that p points to
   111  // is equal to or contains the JSON value that pc points to.
   112  func (p Pointer) Contains(pc Pointer) bool {
   113  	// Invariant: len(p) <= len(pc) if p.Contains(pc)
   114  	suffix, ok := strings.CutPrefix(string(pc), string(p))
   115  	return ok && (suffix == "" || suffix[0] == '/')
   116  }
   117  
   118  // Parent strips off the last token and returns the remaining pointer.
   119  // The parent of an empty p is an empty string.
   120  func (p Pointer) Parent() Pointer {
   121  	return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
   122  }
   123  
   124  // LastToken returns the last token in the pointer.
   125  // The last token of an empty p is an empty string.
   126  func (p Pointer) LastToken() string {
   127  	last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
   128  	return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
   129  }
   130  
   131  // AppendToken appends a token to the end of p and returns the full pointer.
   132  func (p Pointer) AppendToken(tok string) Pointer {
   133  	return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
   134  }
   135  
   136  // TODO: Add Pointer.AppendTokens,
   137  // but should this take in a ...string or an iter.Seq[string]?
   138  
   139  // Tokens returns an iterator over the reference tokens in the JSON pointer,
   140  // starting from the first token until the last token (unless stopped early).
   141  func (p Pointer) Tokens() iter.Seq[string] {
   142  	return func(yield func(string) bool) {
   143  		for len(p) > 0 {
   144  			p = Pointer(strings.TrimPrefix(string(p), "/"))
   145  			i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
   146  			if !yield(unescapePointerToken(string(p)[:i])) {
   147  				return
   148  			}
   149  			p = p[i:]
   150  		}
   151  	}
   152  }
   153  
   154  func unescapePointerToken(token string) string {
   155  	if strings.Contains(token, "~") {
   156  		// Per RFC 6901, section 3, unescape '~' and '/' characters.
   157  		token = strings.ReplaceAll(token, "~1", "/")
   158  		token = strings.ReplaceAll(token, "~0", "~")
   159  	}
   160  	return token
   161  }
   162  
   163  // appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
   164  //
   165  //   - If where is -1, then it points to the previously processed token.
   166  //
   167  //   - If where is 0, then it points to the parent JSON object or array,
   168  //     or an object member if in-between an object member key and value.
   169  //     This is useful when the position is ambiguous whether
   170  //     we are interested in the previous or next token, or
   171  //     when we are uncertain whether the next token
   172  //     continues or terminates the current object or array.
   173  //
   174  //   - If where is +1, then it points to the next expected value,
   175  //     assuming that it continues the current JSON object or array.
   176  //     As a special case, if the next token is a JSON object name,
   177  //     then it points to the parent JSON object.
   178  //
   179  // Invariant: Must call s.names.copyQuotedBuffer beforehand.
   180  func (s state) appendStackPointer(b []byte, where int) []byte {
   181  	var objectDepth int
   182  	for i := 1; i < s.Tokens.Depth(); i++ {
   183  		e := s.Tokens.index(i)
   184  		arrayDelta := -1 // by default point to previous array element
   185  		if isLast := i == s.Tokens.Depth()-1; isLast {
   186  			switch {
   187  			case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
   188  				return b
   189  			case where > 0 && e.isArray():
   190  				arrayDelta = 0 // point to next array element
   191  			}
   192  		}
   193  		switch {
   194  		case e.isObject():
   195  			b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
   196  			objectDepth++
   197  		case e.isArray():
   198  			b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
   199  		}
   200  	}
   201  	return b
   202  }
   203  
   204  func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
   205  	for _, r := range string(name) {
   206  		// Per RFC 6901, section 3, escape '~' and '/' characters.
   207  		switch r {
   208  		case '~':
   209  			b = append(b, "~0"...)
   210  		case '/':
   211  			b = append(b, "~1"...)
   212  		default:
   213  			b = utf8.AppendRune(b, r)
   214  		}
   215  	}
   216  	return b
   217  }
   218  
   219  // stateMachine is a push-down automaton that validates whether
   220  // a sequence of tokens is valid or not according to the JSON grammar.
   221  // It is useful for both encoding and decoding.
   222  //
   223  // It is a stack where each entry represents a nested JSON object or array.
   224  // The stack has a minimum depth of 1 where the first level is a
   225  // virtual JSON array to handle a stream of top-level JSON values.
   226  // The top-level virtual JSON array is special in that it doesn't require commas
   227  // between each JSON value.
   228  //
   229  // For performance, most methods are carefully written to be inlinable.
   230  // The zero value is a valid state machine ready for use.
   231  type stateMachine struct {
   232  	Stack []stateEntry
   233  	Last  stateEntry
   234  }
   235  
   236  // reset resets the state machine.
   237  // The machine always starts with a minimum depth of 1.
   238  func (m *stateMachine) reset() {
   239  	m.Stack = m.Stack[:0]
   240  	if cap(m.Stack) > 1<<10 {
   241  		m.Stack = nil
   242  	}
   243  	m.Last = stateTypeArray
   244  }
   245  
   246  // Depth is the current nested depth of JSON objects and arrays.
   247  // It is one-indexed (i.e., top-level values have a depth of 1).
   248  func (m stateMachine) Depth() int {
   249  	return len(m.Stack) + 1
   250  }
   251  
   252  // index returns a reference to the ith entry.
   253  // It is only valid until the next push method call.
   254  func (m *stateMachine) index(i int) *stateEntry {
   255  	if i == len(m.Stack) {
   256  		return &m.Last
   257  	}
   258  	return &m.Stack[i]
   259  }
   260  
   261  // DepthLength reports the current nested depth and
   262  // the length of the last JSON object or array.
   263  func (m stateMachine) DepthLength() (int, int64) {
   264  	return m.Depth(), m.Last.Length()
   265  }
   266  
   267  // appendLiteral appends a JSON literal as the next token in the sequence.
   268  // If an error is returned, the state is not mutated.
   269  func (m *stateMachine) appendLiteral() error {
   270  	switch {
   271  	case m.Last.NeedObjectName():
   272  		return ErrNonStringName
   273  	case !m.Last.isValidNamespace():
   274  		return errInvalidNamespace
   275  	default:
   276  		m.Last.Increment()
   277  		return nil
   278  	}
   279  }
   280  
   281  // appendString appends a JSON string as the next token in the sequence.
   282  // If an error is returned, the state is not mutated.
   283  func (m *stateMachine) appendString() error {
   284  	switch {
   285  	case !m.Last.isValidNamespace():
   286  		return errInvalidNamespace
   287  	default:
   288  		m.Last.Increment()
   289  		return nil
   290  	}
   291  }
   292  
   293  // appendNumber appends a JSON number as the next token in the sequence.
   294  // If an error is returned, the state is not mutated.
   295  func (m *stateMachine) appendNumber() error {
   296  	return m.appendLiteral()
   297  }
   298  
   299  // pushObject appends a JSON begin object token as next in the sequence.
   300  // If an error is returned, the state is not mutated.
   301  func (m *stateMachine) pushObject() error {
   302  	switch {
   303  	case m.Last.NeedObjectName():
   304  		return ErrNonStringName
   305  	case !m.Last.isValidNamespace():
   306  		return errInvalidNamespace
   307  	case len(m.Stack) == maxNestingDepth:
   308  		return errMaxDepth
   309  	default:
   310  		m.Last.Increment()
   311  		m.Stack = append(m.Stack, m.Last)
   312  		m.Last = stateTypeObject
   313  		return nil
   314  	}
   315  }
   316  
   317  // popObject appends a JSON end object token as next in the sequence.
   318  // If an error is returned, the state is not mutated.
   319  func (m *stateMachine) popObject() error {
   320  	switch {
   321  	case !m.Last.isObject():
   322  		return errMismatchDelim
   323  	case m.Last.needObjectValue():
   324  		return errMissingValue
   325  	case !m.Last.isValidNamespace():
   326  		return errInvalidNamespace
   327  	default:
   328  		m.Last = m.Stack[len(m.Stack)-1]
   329  		m.Stack = m.Stack[:len(m.Stack)-1]
   330  		return nil
   331  	}
   332  }
   333  
   334  // pushArray appends a JSON begin array token as next in the sequence.
   335  // If an error is returned, the state is not mutated.
   336  func (m *stateMachine) pushArray() error {
   337  	switch {
   338  	case m.Last.NeedObjectName():
   339  		return ErrNonStringName
   340  	case !m.Last.isValidNamespace():
   341  		return errInvalidNamespace
   342  	case len(m.Stack) == maxNestingDepth:
   343  		return errMaxDepth
   344  	default:
   345  		m.Last.Increment()
   346  		m.Stack = append(m.Stack, m.Last)
   347  		m.Last = stateTypeArray
   348  		return nil
   349  	}
   350  }
   351  
   352  // popArray appends a JSON end array token as next in the sequence.
   353  // If an error is returned, the state is not mutated.
   354  func (m *stateMachine) popArray() error {
   355  	switch {
   356  	case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
   357  		return errMismatchDelim
   358  	case !m.Last.isValidNamespace():
   359  		return errInvalidNamespace
   360  	default:
   361  		m.Last = m.Stack[len(m.Stack)-1]
   362  		m.Stack = m.Stack[:len(m.Stack)-1]
   363  		return nil
   364  	}
   365  }
   366  
   367  // NeedIndent reports whether indent whitespace should be injected.
   368  // A zero value means that no whitespace should be injected.
   369  // A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
   370  // should be appended to the output immediately before the next token.
   371  func (m stateMachine) NeedIndent(next Kind) (n int) {
   372  	willEnd := next == '}' || next == ']'
   373  	switch {
   374  	case m.Depth() == 1:
   375  		return 0 // top-level values are never indented
   376  	case m.Last.Length() == 0 && willEnd:
   377  		return 0 // an empty object or array is never indented
   378  	case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
   379  		return m.Depth()
   380  	case willEnd:
   381  		return m.Depth() - 1
   382  	default:
   383  		return 0
   384  	}
   385  }
   386  
   387  // MayAppendDelim appends a colon or comma that may precede the next token.
   388  func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
   389  	switch {
   390  	case m.Last.needImplicitColon():
   391  		return append(b, ':')
   392  	case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
   393  		return append(b, ',')
   394  	default:
   395  		return b
   396  	}
   397  }
   398  
   399  // needDelim reports whether a colon or comma token should be implicitly emitted
   400  // before the next token of the specified kind.
   401  // A zero value means no delimiter should be emitted.
   402  func (m stateMachine) needDelim(next Kind) (delim byte) {
   403  	switch {
   404  	case m.Last.needImplicitColon():
   405  		return ':'
   406  	case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
   407  		return ','
   408  	default:
   409  		return 0
   410  	}
   411  }
   412  
   413  // InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
   414  //
   415  // For efficiency, Marshal and Unmarshal may disable namespaces since there are
   416  // more efficient ways to track duplicate names. However, if an error occurs,
   417  // the namespaces in Encoder or Decoder will be left in an inconsistent state.
   418  // Mark the namespaces as invalid so that future method calls on
   419  // Encoder or Decoder will return an error.
   420  func (m *stateMachine) InvalidateDisabledNamespaces() {
   421  	for i := range m.Depth() {
   422  		e := m.index(i)
   423  		if !e.isActiveNamespace() {
   424  			e.invalidateNamespace()
   425  		}
   426  	}
   427  }
   428  
   429  // stateEntry encodes several artifacts within a single unsigned integer:
   430  //   - whether this represents a JSON object or array,
   431  //   - whether this object should check for duplicate names, and
   432  //   - how many elements are in this JSON object or array.
   433  type stateEntry uint64
   434  
   435  const (
   436  	// The type mask (1 bit) records whether this is a JSON object or array.
   437  	stateTypeMask   stateEntry = 0x8000_0000_0000_0000
   438  	stateTypeObject stateEntry = 0x8000_0000_0000_0000
   439  	stateTypeArray  stateEntry = 0x0000_0000_0000_0000
   440  
   441  	// The name check mask (2 bit) records whether to update
   442  	// the namespaces for the current JSON object and
   443  	// whether the namespace is valid.
   444  	stateNamespaceMask    stateEntry = 0x6000_0000_0000_0000
   445  	stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
   446  	stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
   447  
   448  	// The count mask (61 bits) records the number of elements.
   449  	stateCountMask    stateEntry = 0x1fff_ffff_ffff_ffff
   450  	stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
   451  	stateCountOdd     stateEntry = 0x0000_0000_0000_0001
   452  	stateCountEven    stateEntry = 0x0000_0000_0000_0000
   453  )
   454  
   455  // Length reports the number of elements in the JSON object or array.
   456  // Each name and value in an object entry is treated as a separate element.
   457  func (e stateEntry) Length() int64 {
   458  	return int64(e & stateCountMask)
   459  }
   460  
   461  // isObject reports whether this is a JSON object.
   462  func (e stateEntry) isObject() bool {
   463  	return e&stateTypeMask == stateTypeObject
   464  }
   465  
   466  // isArray reports whether this is a JSON array.
   467  func (e stateEntry) isArray() bool {
   468  	return e&stateTypeMask == stateTypeArray
   469  }
   470  
   471  // NeedObjectName reports whether the next token must be a JSON string,
   472  // which is necessary for JSON object names.
   473  func (e stateEntry) NeedObjectName() bool {
   474  	return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
   475  }
   476  
   477  // needImplicitColon reports whether an colon should occur next,
   478  // which always occurs after JSON object names.
   479  func (e stateEntry) needImplicitColon() bool {
   480  	return e.needObjectValue()
   481  }
   482  
   483  // needObjectValue reports whether the next token must be a JSON value,
   484  // which is necessary after every JSON object name.
   485  func (e stateEntry) needObjectValue() bool {
   486  	return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
   487  }
   488  
   489  // needImplicitComma reports whether an comma should occur next,
   490  // which always occurs after a value in a JSON object or array
   491  // before the next value (or name).
   492  func (e stateEntry) needImplicitComma(next Kind) bool {
   493  	return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
   494  }
   495  
   496  // Increment increments the number of elements for the current object or array.
   497  // This assumes that overflow won't practically be an issue since
   498  // 1<<bits.OnesCount(stateCountMask) is sufficiently large.
   499  func (e *stateEntry) Increment() {
   500  	(*e)++
   501  }
   502  
   503  // decrement decrements the number of elements for the current object or array.
   504  // It is the callers responsibility to ensure that e.length > 0.
   505  func (e *stateEntry) decrement() {
   506  	(*e)--
   507  }
   508  
   509  // DisableNamespace disables the JSON object namespace such that the
   510  // Encoder or Decoder no longer updates the namespace.
   511  func (e *stateEntry) DisableNamespace() {
   512  	*e |= stateDisableNamespace
   513  }
   514  
   515  // isActiveNamespace reports whether the JSON object namespace is actively
   516  // being updated and used for duplicate name checks.
   517  func (e stateEntry) isActiveNamespace() bool {
   518  	return e&(stateDisableNamespace) == 0
   519  }
   520  
   521  // invalidateNamespace marks the JSON object namespace as being invalid.
   522  func (e *stateEntry) invalidateNamespace() {
   523  	*e |= stateInvalidNamespace
   524  }
   525  
   526  // isValidNamespace reports whether the JSON object namespace is valid.
   527  func (e stateEntry) isValidNamespace() bool {
   528  	return e&(stateInvalidNamespace) == 0
   529  }
   530  
   531  // objectNameStack is a stack of names when descending into a JSON object.
   532  // In contrast to objectNamespaceStack, this only has to remember a single name
   533  // per JSON object.
   534  //
   535  // This data structure may contain offsets to encodeBuffer or decodeBuffer.
   536  // It violates clean abstraction of layers, but is significantly more efficient.
   537  // This ensures that popping and pushing in the common case is a trivial
   538  // push/pop of an offset integer.
   539  //
   540  // The zero value is an empty names stack ready for use.
   541  type objectNameStack struct {
   542  	// offsets is a stack of offsets for each name.
   543  	// A non-negative offset is the ending offset into the local names buffer.
   544  	// A negative offset is the bit-wise inverse of a starting offset into
   545  	// a remote buffer (e.g., encodeBuffer or decodeBuffer).
   546  	// A math.MinInt offset at the end implies that the last object is empty.
   547  	// Invariant: Positive offsets always occur before negative offsets.
   548  	offsets []int
   549  	// unquotedNames is a back-to-back concatenation of names.
   550  	unquotedNames []byte
   551  }
   552  
   553  func (ns *objectNameStack) reset() {
   554  	ns.offsets = ns.offsets[:0]
   555  	ns.unquotedNames = ns.unquotedNames[:0]
   556  	if cap(ns.offsets) > 1<<6 {
   557  		ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
   558  	}
   559  	if cap(ns.unquotedNames) > 1<<10 {
   560  		ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
   561  	}
   562  }
   563  
   564  func (ns *objectNameStack) length() int {
   565  	return len(ns.offsets)
   566  }
   567  
   568  // getUnquoted retrieves the ith unquoted name in the stack.
   569  // It returns an empty string if the last object is empty.
   570  //
   571  // Invariant: Must call copyQuotedBuffer beforehand.
   572  func (ns *objectNameStack) getUnquoted(i int) []byte {
   573  	ns.ensureCopiedBuffer()
   574  	if i == 0 {
   575  		return ns.unquotedNames[:ns.offsets[0]]
   576  	} else {
   577  		return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
   578  	}
   579  }
   580  
   581  // invalidOffset indicates that the last JSON object currently has no name.
   582  const invalidOffset = math.MinInt
   583  
   584  // push descends into a nested JSON object.
   585  func (ns *objectNameStack) push() {
   586  	ns.offsets = append(ns.offsets, invalidOffset)
   587  }
   588  
   589  // ReplaceLastQuotedOffset replaces the last name with the starting offset
   590  // to the quoted name in some remote buffer. All offsets provided must be
   591  // relative to the same buffer until copyQuotedBuffer is called.
   592  func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
   593  	// Use bit-wise inversion instead of naive multiplication by -1 to avoid
   594  	// ambiguity regarding zero (which is a valid offset into the names field).
   595  	// Bit-wise inversion is mathematically equivalent to -i-1,
   596  	// such that 0 becomes -1, 1 becomes -2, and so forth.
   597  	// This ensures that remote offsets are always negative.
   598  	ns.offsets[len(ns.offsets)-1] = ^i
   599  }
   600  
   601  // replaceLastUnquotedName replaces the last name with the provided name.
   602  //
   603  // Invariant: Must call copyQuotedBuffer beforehand.
   604  func (ns *objectNameStack) replaceLastUnquotedName(s string) {
   605  	ns.ensureCopiedBuffer()
   606  	var startOffset int
   607  	if len(ns.offsets) > 1 {
   608  		startOffset = ns.offsets[len(ns.offsets)-2]
   609  	}
   610  	ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
   611  	ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
   612  }
   613  
   614  // clearLast removes any name in the last JSON object.
   615  // It is semantically equivalent to ns.push followed by ns.pop.
   616  func (ns *objectNameStack) clearLast() {
   617  	ns.offsets[len(ns.offsets)-1] = invalidOffset
   618  }
   619  
   620  // pop ascends out of a nested JSON object.
   621  func (ns *objectNameStack) pop() {
   622  	ns.offsets = ns.offsets[:len(ns.offsets)-1]
   623  }
   624  
   625  // copyQuotedBuffer copies names from the remote buffer into the local names
   626  // buffer so that there are no more offset references into the remote buffer.
   627  // This allows the remote buffer to change contents without affecting
   628  // the names that this data structure is trying to remember.
   629  func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
   630  	// Find the first negative offset.
   631  	var i int
   632  	for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
   633  		continue
   634  	}
   635  
   636  	// Copy each name from the remote buffer into the local buffer.
   637  	for i = i + 1; i < len(ns.offsets); i++ {
   638  		if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
   639  			if i == 0 {
   640  				ns.offsets[i] = 0
   641  			} else {
   642  				ns.offsets[i] = ns.offsets[i-1]
   643  			}
   644  			break // last JSON object had a push without any names
   645  		}
   646  
   647  		// As a form of Hyrum proofing, we write an invalid character into the
   648  		// buffer to make misuse of Decoder.ReadToken more obvious.
   649  		// We need to undo that mutation here.
   650  		quotedName := b[^ns.offsets[i]:]
   651  		if quotedName[0] == invalidateBufferByte {
   652  			quotedName[0] = '"'
   653  		}
   654  
   655  		// Append the unquoted name to the local buffer.
   656  		var startOffset int
   657  		if i > 0 {
   658  			startOffset = ns.offsets[i-1]
   659  		}
   660  		if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
   661  			ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
   662  		} else {
   663  			ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
   664  		}
   665  		ns.offsets[i] = len(ns.unquotedNames)
   666  	}
   667  }
   668  
   669  func (ns *objectNameStack) ensureCopiedBuffer() {
   670  	if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
   671  		panic("BUG: copyQuotedBuffer not called beforehand")
   672  	}
   673  }
   674  
   675  // objectNamespaceStack is a stack of object namespaces.
   676  // This data structure assists in detecting duplicate names.
   677  type objectNamespaceStack []objectNamespace
   678  
   679  // reset resets the object namespace stack.
   680  func (nss *objectNamespaceStack) reset() {
   681  	if cap(*nss) > 1<<10 {
   682  		*nss = nil
   683  	}
   684  	*nss = (*nss)[:0]
   685  }
   686  
   687  // push starts a new namespace for a nested JSON object.
   688  func (nss *objectNamespaceStack) push() {
   689  	if cap(*nss) > len(*nss) {
   690  		*nss = (*nss)[:len(*nss)+1]
   691  		nss.Last().reset()
   692  	} else {
   693  		*nss = append(*nss, objectNamespace{})
   694  	}
   695  }
   696  
   697  // Last returns a pointer to the last JSON object namespace.
   698  func (nss objectNamespaceStack) Last() *objectNamespace {
   699  	return &nss[len(nss)-1]
   700  }
   701  
   702  // pop terminates the namespace for a nested JSON object.
   703  func (nss *objectNamespaceStack) pop() {
   704  	*nss = (*nss)[:len(*nss)-1]
   705  }
   706  
   707  // objectNamespace is the namespace for a JSON object.
   708  // In contrast to objectNameStack, this needs to remember a all names
   709  // per JSON object.
   710  //
   711  // The zero value is an empty namespace ready for use.
   712  type objectNamespace struct {
   713  	// It relies on a linear search over all the names before switching
   714  	// to use a Go map for direct lookup.
   715  
   716  	// endOffsets is a list of offsets to the end of each name in buffers.
   717  	// The length of offsets is the number of names in the namespace.
   718  	endOffsets []uint
   719  	// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
   720  	allUnquotedNames []byte
   721  	// mapNames is a Go map containing every name in the namespace.
   722  	// Only valid if non-nil.
   723  	mapNames map[string]struct{}
   724  }
   725  
   726  // reset resets the namespace to be empty.
   727  func (ns *objectNamespace) reset() {
   728  	ns.endOffsets = ns.endOffsets[:0]
   729  	ns.allUnquotedNames = ns.allUnquotedNames[:0]
   730  	ns.mapNames = nil
   731  	if cap(ns.endOffsets) > 1<<6 {
   732  		ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
   733  	}
   734  	if cap(ns.allUnquotedNames) > 1<<10 {
   735  		ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
   736  	}
   737  }
   738  
   739  // length reports the number of names in the namespace.
   740  func (ns *objectNamespace) length() int {
   741  	return len(ns.endOffsets)
   742  }
   743  
   744  // getUnquoted retrieves the ith unquoted name in the namespace.
   745  func (ns *objectNamespace) getUnquoted(i int) []byte {
   746  	if i == 0 {
   747  		return ns.allUnquotedNames[:ns.endOffsets[0]]
   748  	} else {
   749  		return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
   750  	}
   751  }
   752  
   753  // lastUnquoted retrieves the last name in the namespace.
   754  func (ns *objectNamespace) lastUnquoted() []byte {
   755  	return ns.getUnquoted(ns.length() - 1)
   756  }
   757  
   758  // insertQuoted inserts a name and reports whether it was inserted,
   759  // which only occurs if name is not already in the namespace.
   760  // The provided name must be a valid JSON string.
   761  func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
   762  	if isVerbatim {
   763  		name = name[len(`"`) : len(name)-len(`"`)]
   764  	}
   765  	return ns.insert(name, !isVerbatim)
   766  }
   767  func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
   768  	return ns.insert(name, false)
   769  }
   770  func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
   771  	var allNames []byte
   772  	if quoted {
   773  		allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
   774  	} else {
   775  		allNames = append(ns.allUnquotedNames, name...)
   776  	}
   777  	name = allNames[len(ns.allUnquotedNames):]
   778  
   779  	// Switch to a map if the buffer is too large for linear search.
   780  	// This does not add the current name to the map.
   781  	if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
   782  		ns.mapNames = make(map[string]struct{})
   783  		var startOffset uint
   784  		for _, endOffset := range ns.endOffsets {
   785  			name := ns.allUnquotedNames[startOffset:endOffset]
   786  			ns.mapNames[string(name)] = struct{}{} // allocates a new string
   787  			startOffset = endOffset
   788  		}
   789  	}
   790  
   791  	if ns.mapNames == nil {
   792  		// Perform linear search over the buffer to find matching names.
   793  		// It provides O(n) lookup, but does not require any allocations.
   794  		var startOffset uint
   795  		for _, endOffset := range ns.endOffsets {
   796  			if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
   797  				return false
   798  			}
   799  			startOffset = endOffset
   800  		}
   801  	} else {
   802  		// Use the map if it is populated.
   803  		// It provides O(1) lookup, but requires a string allocation per name.
   804  		if _, ok := ns.mapNames[string(name)]; ok {
   805  			return false
   806  		}
   807  		ns.mapNames[string(name)] = struct{}{} // allocates a new string
   808  	}
   809  
   810  	ns.allUnquotedNames = allNames
   811  	ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
   812  	return true
   813  }
   814  
   815  // removeLast removes the last name in the namespace.
   816  func (ns *objectNamespace) removeLast() {
   817  	if ns.mapNames != nil {
   818  		delete(ns.mapNames, string(ns.lastUnquoted()))
   819  	}
   820  	if ns.length()-1 == 0 {
   821  		ns.endOffsets = ns.endOffsets[:0]
   822  		ns.allUnquotedNames = ns.allUnquotedNames[:0]
   823  	} else {
   824  		ns.endOffsets = ns.endOffsets[:ns.length()-1]
   825  		ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
   826  	}
   827  }
   828  

View as plain text