Source file src/encoding/json/jsontext/encode.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"bytes"
    11  	"io"
    12  	"math/bits"
    13  
    14  	"encoding/json/internal/jsonflags"
    15  	"encoding/json/internal/jsonopts"
    16  	"encoding/json/internal/jsonwire"
    17  )
    18  
    19  // Encoder is a streaming encoder from raw JSON tokens and values.
    20  // It is used to write a stream of top-level JSON values,
    21  // each terminated with a newline character.
    22  //
    23  // [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
    24  // For example, the following JSON value:
    25  //
    26  //	{"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
    27  //
    28  // can be composed with the following calls (ignoring errors for brevity):
    29  //
    30  //	e.WriteToken(BeginObject)        // {
    31  //	e.WriteToken(String("name"))     // "name"
    32  //	e.WriteToken(String("value"))    // "value"
    33  //	e.WriteValue(Value(`"array"`))   // "array"
    34  //	e.WriteToken(BeginArray)         // [
    35  //	e.WriteToken(Null)               // null
    36  //	e.WriteToken(False)              // false
    37  //	e.WriteValue(Value("true"))      // true
    38  //	e.WriteToken(Float(3.14159))     // 3.14159
    39  //	e.WriteToken(EndArray)           // ]
    40  //	e.WriteValue(Value(`"object"`))  // "object"
    41  //	e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
    42  //	e.WriteToken(EndObject)          // }
    43  //
    44  // The above is one of many possible sequences of calls and
    45  // may not represent the most sensible method to call for any given token/value.
    46  // For example, it is probably more common to call [Encoder.WriteToken] with a string
    47  // for object names.
    48  type Encoder struct {
    49  	s encoderState
    50  }
    51  
    52  // encoderState is the low-level state of Encoder.
    53  // It has exported fields and methods for use by the "json" package.
    54  type encoderState struct {
    55  	state
    56  	encodeBuffer
    57  	jsonopts.Struct
    58  
    59  	SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
    60  }
    61  
    62  // encodeBuffer is a buffer split into 2 segments:
    63  //
    64  //   - buf[0:len(buf)]        // written (but unflushed) portion of the buffer
    65  //   - buf[len(buf):cap(buf)] // unused portion of the buffer
    66  type encodeBuffer struct {
    67  	Buf []byte // may alias wr if it is a bytes.Buffer
    68  
    69  	// baseOffset is added to len(buf) to obtain the absolute offset
    70  	// relative to the start of io.Writer stream.
    71  	baseOffset int64
    72  
    73  	wr io.Writer
    74  
    75  	// maxValue is the approximate maximum Value size passed to WriteValue.
    76  	maxValue int
    77  	// availBuffer is the buffer returned by the AvailableBuffer method.
    78  	availBuffer []byte // always has zero length
    79  	// bufStats is statistics about buffer utilization.
    80  	// It is only used with pooled encoders in pools.go.
    81  	bufStats bufferStatistics
    82  }
    83  
    84  // NewEncoder constructs a new streaming encoder writing to w
    85  // configured with the provided options.
    86  // It flushes the internal buffer when the buffer is sufficiently full or
    87  // when a top-level value has been written.
    88  //
    89  // If w is a [bytes.Buffer], then the encoder appends directly into the buffer
    90  // without copying the contents from an intermediate buffer.
    91  func NewEncoder(w io.Writer, opts ...Options) *Encoder {
    92  	e := new(Encoder)
    93  	e.Reset(w, opts...)
    94  	return e
    95  }
    96  
    97  // Reset resets an encoder such that it is writing afresh to w and
    98  // configured with the provided options. Reset must not be called on
    99  // an Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
   100  // or the [encoding/json/v2.MarshalToFunc] function.
   101  func (e *Encoder) Reset(w io.Writer, opts ...Options) {
   102  	switch {
   103  	case e == nil:
   104  		panic("jsontext: invalid nil Encoder")
   105  	case w == nil:
   106  		panic("jsontext: invalid nil io.Writer")
   107  	case e.s.Flags.Get(jsonflags.WithinArshalCall):
   108  		panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
   109  	}
   110  	// Reuse the buffer if it does not alias a previous [bytes.Buffer].
   111  	b := e.s.Buf[:0]
   112  	if _, ok := e.s.wr.(*bytes.Buffer); ok {
   113  		b = nil
   114  	}
   115  	e.s.reset(b, w, opts...)
   116  }
   117  
   118  func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
   119  	e.state.reset()
   120  	e.encodeBuffer = encodeBuffer{Buf: b, wr: w, availBuffer: e.availBuffer, bufStats: e.bufStats}
   121  	if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
   122  		e.Buf = bb.AvailableBuffer() // alias the unused buffer of bb
   123  	}
   124  	opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
   125  	opts2.Join(opts...)
   126  	e.Struct = opts2
   127  	if e.Struct.Flags.Get(jsonflags.Multiline) {
   128  		e.Struct.InitializeMultiline()
   129  	}
   130  }
   131  
   132  // Options returns the options used to construct the encoder and
   133  // may additionally contain semantic options passed to a
   134  // [encoding/json/v2.MarshalEncode] call.
   135  //
   136  // If operating within
   137  // a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
   138  // a [encoding/json/v2.MarshalToFunc] function call,
   139  // then the returned options are only valid within the call.
   140  func (e *Encoder) Options() Options {
   141  	return &e.s.Struct
   142  }
   143  
   144  func (e *encoderState) options() *jsonopts.Struct { return &e.Struct }
   145  
   146  // NeedFlush determines whether to flush at this point.
   147  func (e *encoderState) NeedFlush() bool {
   148  	// NOTE: This function is carefully written to be inlinable.
   149  
   150  	// Avoid flushing if e.wr is nil since there is no underlying writer.
   151  	// Flush if less than 25% of the capacity remains.
   152  	// Flushing at some constant fraction ensures that the buffer stops growing
   153  	// so long as the largest Token or Value fits within that unused capacity.
   154  	return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
   155  }
   156  
   157  // Flush flushes the buffer to the underlying io.Writer.
   158  // It may append a trailing newline after the top-level value.
   159  func (e *encoderState) Flush() error {
   160  	if e.wr == nil || e.avoidFlush() {
   161  		return nil
   162  	}
   163  
   164  	// In streaming mode, always emit a newline after the top-level value.
   165  	if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
   166  		e.Buf = append(e.Buf, '\n')
   167  	}
   168  
   169  	// Inform objectNameStack that we are about to flush the buffer content.
   170  	e.Names.copyQuotedBuffer(e.Buf)
   171  
   172  	// Specialize bytes.Buffer for better performance.
   173  	if bb, ok := e.wr.(*bytes.Buffer); ok {
   174  		// If e.Buf already aliases the internal buffer of bb,
   175  		// then the Write call simply increments the internal offset,
   176  		// otherwise Write operates as expected.
   177  		// See https://go.dev/issue/42986.
   178  		n, _ := bb.Write(e.Buf) // never fails unless bb is nil
   179  		e.baseOffset += int64(n)
   180  
   181  		// If the internal buffer of bytes.Buffer is too small,
   182  		// append operations elsewhere in the Encoder may grow the buffer.
   183  		// This would be semantically correct, but hurts performance.
   184  		// As such, ensure 25% of the current length is always available
   185  		// to reduce the probability that other appends must allocate.
   186  		if avail := bb.Available(); avail < bb.Len()/4 {
   187  			bb.Grow(avail + 1)
   188  		}
   189  
   190  		e.Buf = bb.AvailableBuffer()
   191  		return nil
   192  	}
   193  
   194  	// Flush the internal buffer to the underlying io.Writer.
   195  	n, err := e.wr.Write(e.Buf)
   196  	e.baseOffset += int64(n)
   197  	if err != nil {
   198  		// In the event of an error, preserve the unflushed portion.
   199  		// Thus, write errors aren't fatal so long as the io.Writer
   200  		// maintains consistent state after errors.
   201  		if n > 0 {
   202  			e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
   203  		}
   204  		return &ioError{action: "write", err: err}
   205  	}
   206  	e.Buf = e.Buf[:0]
   207  
   208  	// Check whether to grow the buffer.
   209  	// Note that cap(e.Buf) may already exceed maxBufferSize since
   210  	// an append elsewhere already grew it to store a large token.
   211  	const maxBufferSize = 4 << 10
   212  	const growthSizeFactor = 2 // higher value is faster
   213  	const growthRateFactor = 2 // higher value is slower
   214  	// By default, grow if below the maximum buffer size.
   215  	grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
   216  	// Growing can be expensive, so only grow
   217  	// if a sufficient number of bytes have been processed.
   218  	grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
   219  	if grow {
   220  		e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
   221  	}
   222  
   223  	return nil
   224  }
   225  func (e *encodeBuffer) offsetAt(pos int) int64   { return e.baseOffset + int64(pos) }
   226  func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
   227  func (e *encodeBuffer) unflushedBuffer() []byte  { return e.Buf }
   228  
   229  // avoidFlush indicates whether to avoid flushing to ensure there is always
   230  // enough in the buffer to unwrite the last object member if it were empty.
   231  func (e *encoderState) avoidFlush() bool {
   232  	switch {
   233  	case e.Tokens.Last.Length() == 0:
   234  		// Never flush after BeginObject or BeginArray since we don't know yet
   235  		// if the object or array will end up being empty.
   236  		return true
   237  	case e.Tokens.Last.needObjectValue():
   238  		// Never flush before the object value since we don't know yet
   239  		// if the object value will end up being empty.
   240  		return true
   241  	case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
   242  		// Never flush after the object value if it does turn out to be empty.
   243  		switch string(e.Buf[len(e.Buf)-2:]) {
   244  		case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
   245  			return true
   246  		}
   247  	}
   248  	return false
   249  }
   250  
   251  // UnwriteEmptyObjectMember unwrites the last object member if it is empty
   252  // and reports whether it performed an unwrite operation.
   253  func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
   254  	if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
   255  		panic("BUG: must be called on an object after writing a value")
   256  	}
   257  
   258  	// The flushing logic is modified to never flush a trailing empty value.
   259  	// The encoder never writes trailing whitespace eagerly.
   260  	b := e.unflushedBuffer()
   261  
   262  	// Detect whether the last value was empty.
   263  	var n int
   264  	if len(b) >= 3 {
   265  		switch string(b[len(b)-2:]) {
   266  		case "ll": // last two bytes of `null`
   267  			n = len(`null`)
   268  		case `""`:
   269  			// It is possible for a non-empty string to have `""` as a suffix
   270  			// if the second to the last quote was escaped.
   271  			if b[len(b)-3] == '\\' {
   272  				return false // e.g., `"\""` is not empty
   273  			}
   274  			n = len(`""`)
   275  		case `{}`:
   276  			n = len(`{}`)
   277  		case `[]`:
   278  			n = len(`[]`)
   279  		}
   280  	}
   281  	if n == 0 {
   282  		return false
   283  	}
   284  
   285  	// Unwrite the value, whitespace, colon, name, whitespace, and comma.
   286  	b = b[:len(b)-n]
   287  	b = jsonwire.TrimSuffixWhitespace(b)
   288  	b = jsonwire.TrimSuffixByte(b, ':')
   289  	b = jsonwire.TrimSuffixString(b)
   290  	b = jsonwire.TrimSuffixWhitespace(b)
   291  	b = jsonwire.TrimSuffixByte(b, ',')
   292  	e.Buf = b // store back truncated unflushed buffer
   293  
   294  	// Undo state changes.
   295  	e.Tokens.Last.decrement() // for object member value
   296  	e.Tokens.Last.decrement() // for object member name
   297  	if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   298  		if e.Tokens.Last.isActiveNamespace() {
   299  			e.Namespaces.Last().removeLast()
   300  		}
   301  	}
   302  	e.Names.clearLast()
   303  	if prevName != nil {
   304  		e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
   305  		e.Names.replaceLastUnquotedName(*prevName)
   306  	}
   307  	return true
   308  }
   309  
   310  // UnwriteOnlyObjectMemberName unwrites the only object member name
   311  // and returns the unquoted name.
   312  func (e *encoderState) UnwriteOnlyObjectMemberName() string {
   313  	if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
   314  		panic("BUG: must be called on an object after writing first name")
   315  	}
   316  
   317  	// Unwrite the name and whitespace.
   318  	b := jsonwire.TrimSuffixString(e.Buf)
   319  	isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
   320  	name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
   321  	e.Buf = jsonwire.TrimSuffixWhitespace(b)
   322  
   323  	// Undo state changes.
   324  	e.Tokens.Last.decrement()
   325  	if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   326  		if e.Tokens.Last.isActiveNamespace() {
   327  			e.Namespaces.Last().removeLast()
   328  		}
   329  	}
   330  	e.Names.clearLast()
   331  	return name
   332  }
   333  
   334  // WriteToken writes the next token and advances the internal write offset.
   335  //
   336  // The provided token kind must be consistent with the JSON grammar.
   337  // For example, it is an error to provide a number when the encoder
   338  // is expecting an object name (which is always a string), or
   339  // to provide an end object delimiter when the encoder is finishing an array.
   340  // If the provided token is invalid, then it reports a [SyntacticError] and
   341  // the internal state remains unchanged. The offset reported
   342  // in [SyntacticError] will be the [Encoder.OutputOffset] plus any delimiter
   343  // or whitespace characters that would have preceded the provided token.
   344  func (e *Encoder) WriteToken(t Token) error {
   345  	return e.s.WriteToken(t)
   346  }
   347  func (e *encoderState) WriteToken(t Token) error {
   348  	k := t.Kind()
   349  	b := e.Buf // use local variable to avoid mutating e in case of error
   350  
   351  	// Append any delimiters or optional whitespace.
   352  	b = e.Tokens.MayAppendDelim(b, k)
   353  	if e.Flags.Get(jsonflags.AnyWhitespace) {
   354  		b = e.appendWhitespace(b, k)
   355  	}
   356  	pos := len(b) // offset before the token
   357  
   358  	// Append the token to the output and to the state machine.
   359  	var err error
   360  	switch k {
   361  	case 'n':
   362  		b = append(b, "null"...)
   363  		err = e.Tokens.appendLiteral()
   364  	case 'f':
   365  		b = append(b, "false"...)
   366  		err = e.Tokens.appendLiteral()
   367  	case 't':
   368  		b = append(b, "true"...)
   369  		err = e.Tokens.appendLiteral()
   370  	case '"':
   371  		if b, err = t.appendString(b, &e.Flags); err != nil {
   372  			break
   373  		}
   374  		if e.Tokens.Last.NeedObjectName() {
   375  			if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   376  				if !e.Tokens.Last.isValidNamespace() {
   377  					err = errInvalidNamespace
   378  					break
   379  				}
   380  				if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
   381  					err = wrapWithObjectName(ErrDuplicateName, b[pos:])
   382  					break
   383  				}
   384  			}
   385  			e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
   386  		}
   387  		err = e.Tokens.appendString()
   388  	case '0':
   389  		if b, err = t.appendNumber(b, &e.Flags); err != nil {
   390  			break
   391  		}
   392  		err = e.Tokens.appendNumber()
   393  	case '{':
   394  		b = append(b, '{')
   395  		if err = e.Tokens.pushObject(); err != nil {
   396  			break
   397  		}
   398  		e.Names.push()
   399  		if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   400  			e.Namespaces.push()
   401  		}
   402  		e.Flags.Clear(jsonflags.TagFlags) // tags only apply to current depth
   403  	case '}':
   404  		b = append(b, '}')
   405  		if err = e.Tokens.popObject(); err != nil {
   406  			break
   407  		}
   408  		e.Names.pop()
   409  		if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   410  			e.Namespaces.pop()
   411  		}
   412  	case '[':
   413  		b = append(b, '[')
   414  		err = e.Tokens.pushArray()
   415  		e.Flags.Clear(jsonflags.TagFlags) // tags only apply to current depth
   416  	case ']':
   417  		b = append(b, ']')
   418  		err = e.Tokens.popArray()
   419  	default:
   420  		err = errInvalidToken
   421  	}
   422  	if err != nil {
   423  		return wrapSyntacticError(e, err, pos, +1)
   424  	}
   425  
   426  	// Finish off the buffer and store it back into e.
   427  	e.Buf = b
   428  	if e.NeedFlush() {
   429  		return e.Flush()
   430  	}
   431  	return nil
   432  }
   433  
   434  // AppendRaw appends either a raw string (without double quotes) or number.
   435  // Specify safeASCII if the string output is guaranteed to be ASCII
   436  // without any characters (including '<', '>', and '&') that need escaping,
   437  // otherwise this will validate whether the string needs escaping.
   438  // The appended bytes for a JSON number must be valid.
   439  //
   440  // This is a specialized implementation of Encoder.WriteValue
   441  // that allows appending directly into the buffer.
   442  // It is only called from marshal logic in the "json" package.
   443  func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
   444  	b := e.Buf // use local variable to avoid mutating e in case of error
   445  
   446  	// Append any delimiters or optional whitespace.
   447  	b = e.Tokens.MayAppendDelim(b, k)
   448  	if e.Flags.Get(jsonflags.AnyWhitespace) {
   449  		b = e.appendWhitespace(b, k)
   450  	}
   451  	pos := len(b) // offset before the token
   452  
   453  	var err error
   454  	switch k {
   455  	case '"':
   456  		// Append directly into the encoder buffer by assuming that
   457  		// most of the time none of the characters need escaping.
   458  		b = append(b, '"')
   459  		if b, err = appendFn(b); err != nil {
   460  			return err
   461  		}
   462  		b = append(b, '"')
   463  
   464  		// Check whether we need to escape the string and if necessary
   465  		// copy it to a scratch buffer and then escape it back.
   466  		isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
   467  		if !isVerbatim {
   468  			var err error
   469  			b2 := append(e.availBuffer, b[pos+len(`"`):len(b)-len(`"`)]...)
   470  			b, err = jsonwire.AppendQuote(b[:pos], b2, &e.Flags)
   471  			e.availBuffer = b2[:0]
   472  			if err != nil {
   473  				return wrapSyntacticError(e, err, pos, +1)
   474  			}
   475  		}
   476  
   477  		// Update the state machine.
   478  		if e.Tokens.Last.NeedObjectName() {
   479  			if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   480  				if !e.Tokens.Last.isValidNamespace() {
   481  					return wrapSyntacticError(e, errInvalidNamespace, pos, +1)
   482  				}
   483  				if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
   484  					err = wrapWithObjectName(ErrDuplicateName, b[pos:])
   485  					return wrapSyntacticError(e, err, pos, +1)
   486  				}
   487  			}
   488  			e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
   489  		}
   490  		if err := e.Tokens.appendString(); err != nil {
   491  			return wrapSyntacticError(e, err, pos, +1)
   492  		}
   493  	case '0':
   494  		if b, err = appendFn(b); err != nil {
   495  			return err
   496  		}
   497  		if err := e.Tokens.appendNumber(); err != nil {
   498  			return wrapSyntacticError(e, err, pos, +1)
   499  		}
   500  	default:
   501  		panic("BUG: invalid kind")
   502  	}
   503  
   504  	// Finish off the buffer and store it back into e.
   505  	e.Buf = b
   506  	if e.NeedFlush() {
   507  		return e.Flush()
   508  	}
   509  	return nil
   510  }
   511  
   512  // WriteValue writes the next raw value and advances the internal write offset.
   513  // The Encoder does not simply copy the provided value verbatim, but
   514  // parses it to ensure that it is syntactically valid and reformats it
   515  // according to how the Encoder is configured to format whitespace and strings.
   516  // If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
   517  // as the Unicode replacement character, U+FFFD.
   518  //
   519  // The provided value kind must be consistent with the JSON grammar
   520  // (see examples on [Encoder.WriteToken]). If the provided value is invalid,
   521  // then it reports a [SyntacticError] and the internal state remains unchanged.
   522  // The offset reported in [SyntacticError] will be the [Encoder.OutputOffset]
   523  // plus the offset into v of any encountered syntax error.
   524  func (e *Encoder) WriteValue(v Value) error {
   525  	return e.s.WriteValue(v)
   526  }
   527  func (e *encoderState) WriteValue(v Value) error {
   528  	e.maxValue |= len(v) // bitwise OR is a fast approximation of max
   529  
   530  	k := v.Kind()
   531  	b := e.Buf // use local variable to avoid mutating e in case of error
   532  
   533  	// Append any delimiters or optional whitespace.
   534  	b = e.Tokens.MayAppendDelim(b, k)
   535  	if e.Flags.Get(jsonflags.AnyWhitespace) {
   536  		b = e.appendWhitespace(b, k)
   537  	}
   538  	pos := len(b) // offset before the value
   539  
   540  	// Append the value to the output.
   541  	var n int
   542  	n += jsonwire.ConsumeWhitespace(v[n:])
   543  	b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
   544  	if err != nil {
   545  		return wrapSyntacticError(e, err, pos+n+m, +1)
   546  	}
   547  	n += m
   548  	n += jsonwire.ConsumeWhitespace(v[n:])
   549  	if len(v) > n {
   550  		err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
   551  		return wrapSyntacticError(e, err, pos+n, 0)
   552  	}
   553  
   554  	// Append the kind to the state machine.
   555  	switch k {
   556  	case 'n', 'f', 't':
   557  		err = e.Tokens.appendLiteral()
   558  	case '"':
   559  		if e.Tokens.Last.NeedObjectName() {
   560  			if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   561  				if !e.Tokens.Last.isValidNamespace() {
   562  					err = errInvalidNamespace
   563  					break
   564  				}
   565  				if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
   566  					err = wrapWithObjectName(ErrDuplicateName, b[pos:])
   567  					break
   568  				}
   569  			}
   570  			e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
   571  		}
   572  		err = e.Tokens.appendString()
   573  	case '0':
   574  		err = e.Tokens.appendNumber()
   575  	case '{':
   576  		if err = e.Tokens.pushObject(); err != nil {
   577  			break
   578  		}
   579  		if err = e.Tokens.popObject(); err != nil {
   580  			panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
   581  		}
   582  		if e.Flags.Get(jsonflags.ReorderRawObjects) {
   583  			mustReorderObjects(b[pos:])
   584  		}
   585  	case '[':
   586  		if err = e.Tokens.pushArray(); err != nil {
   587  			break
   588  		}
   589  		if err = e.Tokens.popArray(); err != nil {
   590  			panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
   591  		}
   592  		if e.Flags.Get(jsonflags.ReorderRawObjects) {
   593  			mustReorderObjects(b[pos:])
   594  		}
   595  	}
   596  	if err != nil {
   597  		return wrapSyntacticError(e, err, pos, +1)
   598  	}
   599  
   600  	// Finish off the buffer and store it back into e.
   601  	e.Buf = b
   602  	if e.NeedFlush() {
   603  		return e.Flush()
   604  	}
   605  	return nil
   606  }
   607  
   608  // CountNextDelimWhitespace counts the number of bytes of delimiter and
   609  // whitespace bytes assuming the upcoming token is a JSON value.
   610  // This method is used for error reporting at the semantic layer.
   611  func (e *encoderState) CountNextDelimWhitespace() (n int) {
   612  	const next = Kind('"') // arbitrary kind as next JSON value
   613  	delim := e.Tokens.needDelim(next)
   614  	if delim > 0 {
   615  		n += len(",") | len(":")
   616  	}
   617  	if delim == ':' {
   618  		if e.Flags.Get(jsonflags.SpaceAfterColon) {
   619  			n += len(" ")
   620  		}
   621  	} else {
   622  		if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
   623  			n += len(" ")
   624  		}
   625  		if e.Flags.Get(jsonflags.Multiline) {
   626  			if m := e.Tokens.NeedIndent(next); m > 0 {
   627  				n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
   628  			}
   629  		}
   630  	}
   631  	return n
   632  }
   633  
   634  // appendWhitespace appends whitespace that immediately precedes the next token.
   635  func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
   636  	if delim := e.Tokens.needDelim(next); delim == ':' {
   637  		if e.Flags.Get(jsonflags.SpaceAfterColon) {
   638  			b = append(b, ' ')
   639  		}
   640  	} else {
   641  		if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
   642  			b = append(b, ' ')
   643  		}
   644  		if e.Flags.Get(jsonflags.Multiline) {
   645  			b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
   646  		}
   647  	}
   648  	return b
   649  }
   650  
   651  // AppendIndent appends the appropriate number of indentation characters
   652  // for the current nested level, n.
   653  func (e *encoderState) AppendIndent(b []byte, n int) []byte {
   654  	if n == 0 {
   655  		return b
   656  	}
   657  	b = append(b, '\n')
   658  	b = append(b, e.IndentPrefix...)
   659  	for ; n > 1; n-- {
   660  		b = append(b, e.Indent...)
   661  	}
   662  	return b
   663  }
   664  
   665  // reformatValue parses a JSON value from the start of src and
   666  // appends it to the end of dst, reformatting whitespace and strings as needed.
   667  // It returns the extended dst buffer and the number of consumed input bytes.
   668  func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
   669  	// TODO: Should this update ValueFlags as input?
   670  	if len(src) == 0 {
   671  		return dst, 0, io.ErrUnexpectedEOF
   672  	}
   673  	switch k := Kind(src[0]).normalize(); k {
   674  	case 'n':
   675  		if jsonwire.ConsumeNull(src) == 0 {
   676  			n, err := jsonwire.ConsumeLiteral(src, "null")
   677  			return dst, n, err
   678  		}
   679  		return append(dst, "null"...), len("null"), nil
   680  	case 'f':
   681  		if jsonwire.ConsumeFalse(src) == 0 {
   682  			n, err := jsonwire.ConsumeLiteral(src, "false")
   683  			return dst, n, err
   684  		}
   685  		return append(dst, "false"...), len("false"), nil
   686  	case 't':
   687  		if jsonwire.ConsumeTrue(src) == 0 {
   688  			n, err := jsonwire.ConsumeLiteral(src, "true")
   689  			return dst, n, err
   690  		}
   691  		return append(dst, "true"...), len("true"), nil
   692  	case '"':
   693  		if n := jsonwire.ConsumeSimpleString(src); n > 0 {
   694  			dst = append(dst, src[:n]...) // copy simple strings verbatim
   695  			return dst, n, nil
   696  		}
   697  		return jsonwire.ReformatString(dst, src, &e.Flags)
   698  	case '0':
   699  		if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
   700  			dst = append(dst, src[:n]...) // copy simple numbers verbatim
   701  			return dst, n, nil
   702  		}
   703  		return jsonwire.ReformatNumber(dst, src, &e.Flags)
   704  	case '{':
   705  		return e.reformatObject(dst, src, depth)
   706  	case '[':
   707  		return e.reformatArray(dst, src, depth)
   708  	default:
   709  		return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
   710  	}
   711  }
   712  
   713  // reformatObject parses a JSON object from the start of src and
   714  // appends it to the end of dst, reformatting whitespace and strings as needed.
   715  // It returns the extended dst buffer and the number of consumed input bytes.
   716  func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
   717  	// Append object begin.
   718  	if len(src) == 0 || src[0] != '{' {
   719  		panic("BUG: reformatObject must be called with a buffer that starts with '{'")
   720  	} else if depth == maxNestingDepth+1 {
   721  		return dst, 0, errMaxDepth
   722  	}
   723  	dst = append(dst, '{')
   724  	n := len("{")
   725  
   726  	// Append (possible) object end.
   727  	n += jsonwire.ConsumeWhitespace(src[n:])
   728  	if uint(len(src)) <= uint(n) {
   729  		return dst, n, io.ErrUnexpectedEOF
   730  	}
   731  	if src[n] == '}' {
   732  		dst = append(dst, '}')
   733  		n += len("}")
   734  		return dst, n, nil
   735  	}
   736  
   737  	var err error
   738  	var names *objectNamespace
   739  	if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
   740  		e.Namespaces.push()
   741  		defer e.Namespaces.pop()
   742  		names = e.Namespaces.Last()
   743  	}
   744  	depth++
   745  	for {
   746  		// Append optional newline and indentation.
   747  		if e.Flags.Get(jsonflags.Multiline) {
   748  			dst = e.AppendIndent(dst, depth)
   749  		}
   750  
   751  		// Append object name.
   752  		n += jsonwire.ConsumeWhitespace(src[n:])
   753  		if uint(len(src)) <= uint(n) {
   754  			return dst, n, io.ErrUnexpectedEOF
   755  		}
   756  		m := jsonwire.ConsumeSimpleString(src[n:])
   757  		isVerbatim := m > 0
   758  		if isVerbatim {
   759  			dst = append(dst, src[n:n+m]...)
   760  		} else {
   761  			dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
   762  			if err != nil {
   763  				return dst, n + m, err
   764  			}
   765  		}
   766  		quotedName := src[n : n+m]
   767  		if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
   768  			return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
   769  		}
   770  		n += m
   771  
   772  		// Append colon.
   773  		n += jsonwire.ConsumeWhitespace(src[n:])
   774  		if uint(len(src)) <= uint(n) {
   775  			return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
   776  		}
   777  		if src[n] != ':' {
   778  			err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
   779  			return dst, n, wrapWithObjectName(err, quotedName)
   780  		}
   781  		dst = append(dst, ':')
   782  		n += len(":")
   783  		if e.Flags.Get(jsonflags.SpaceAfterColon) {
   784  			dst = append(dst, ' ')
   785  		}
   786  
   787  		// Append object value.
   788  		n += jsonwire.ConsumeWhitespace(src[n:])
   789  		if uint(len(src)) <= uint(n) {
   790  			return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
   791  		}
   792  		dst, m, err = e.reformatValue(dst, src[n:], depth)
   793  		if err != nil {
   794  			return dst, n + m, wrapWithObjectName(err, quotedName)
   795  		}
   796  		n += m
   797  
   798  		// Append comma or object end.
   799  		n += jsonwire.ConsumeWhitespace(src[n:])
   800  		if uint(len(src)) <= uint(n) {
   801  			return dst, n, io.ErrUnexpectedEOF
   802  		}
   803  		switch src[n] {
   804  		case ',':
   805  			dst = append(dst, ',')
   806  			if e.Flags.Get(jsonflags.SpaceAfterComma) {
   807  				dst = append(dst, ' ')
   808  			}
   809  			n += len(",")
   810  			continue
   811  		case '}':
   812  			if e.Flags.Get(jsonflags.Multiline) {
   813  				dst = e.AppendIndent(dst, depth-1)
   814  			}
   815  			dst = append(dst, '}')
   816  			n += len("}")
   817  			return dst, n, nil
   818  		default:
   819  			return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
   820  		}
   821  	}
   822  }
   823  
   824  // reformatArray parses a JSON array from the start of src and
   825  // appends it to the end of dst, reformatting whitespace and strings as needed.
   826  // It returns the extended dst buffer and the number of consumed input bytes.
   827  func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
   828  	// Append array begin.
   829  	if len(src) == 0 || src[0] != '[' {
   830  		panic("BUG: reformatArray must be called with a buffer that starts with '['")
   831  	} else if depth == maxNestingDepth+1 {
   832  		return dst, 0, errMaxDepth
   833  	}
   834  	dst = append(dst, '[')
   835  	n := len("[")
   836  
   837  	// Append (possible) array end.
   838  	n += jsonwire.ConsumeWhitespace(src[n:])
   839  	if uint(len(src)) <= uint(n) {
   840  		return dst, n, io.ErrUnexpectedEOF
   841  	}
   842  	if src[n] == ']' {
   843  		dst = append(dst, ']')
   844  		n += len("]")
   845  		return dst, n, nil
   846  	}
   847  
   848  	var idx int64
   849  	var err error
   850  	depth++
   851  	for {
   852  		// Append optional newline and indentation.
   853  		if e.Flags.Get(jsonflags.Multiline) {
   854  			dst = e.AppendIndent(dst, depth)
   855  		}
   856  
   857  		// Append array value.
   858  		n += jsonwire.ConsumeWhitespace(src[n:])
   859  		if uint(len(src)) <= uint(n) {
   860  			return dst, n, io.ErrUnexpectedEOF
   861  		}
   862  		var m int
   863  		dst, m, err = e.reformatValue(dst, src[n:], depth)
   864  		if err != nil {
   865  			return dst, n + m, wrapWithArrayIndex(err, idx)
   866  		}
   867  		n += m
   868  
   869  		// Append comma or array end.
   870  		n += jsonwire.ConsumeWhitespace(src[n:])
   871  		if uint(len(src)) <= uint(n) {
   872  			return dst, n, io.ErrUnexpectedEOF
   873  		}
   874  		switch src[n] {
   875  		case ',':
   876  			dst = append(dst, ',')
   877  			if e.Flags.Get(jsonflags.SpaceAfterComma) {
   878  				dst = append(dst, ' ')
   879  			}
   880  			n += len(",")
   881  			idx++
   882  			continue
   883  		case ']':
   884  			if e.Flags.Get(jsonflags.Multiline) {
   885  				dst = e.AppendIndent(dst, depth-1)
   886  			}
   887  			dst = append(dst, ']')
   888  			n += len("]")
   889  			return dst, n, nil
   890  		default:
   891  			return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
   892  		}
   893  	}
   894  }
   895  
   896  // OutputOffset returns the current output byte offset. It gives the location
   897  // of the next byte immediately after the most recently written token or value.
   898  // The number of bytes actually written to the underlying [io.Writer] may be less
   899  // than this offset due to internal buffering effects.
   900  func (e *Encoder) OutputOffset() int64 {
   901  	return e.s.previousOffsetEnd()
   902  }
   903  
   904  // AvailableBuffer returns a zero-length buffer with a possible non-zero capacity.
   905  // This buffer is intended to be used to populate a [Value]
   906  // being passed to an immediately succeeding [Encoder.WriteValue] call.
   907  //
   908  // Example usage:
   909  //
   910  //	b := e.AvailableBuffer()
   911  //	b = append(b, '"')
   912  //	b = appendString(b, v) // append the string formatting of v
   913  //	b = append(b, '"')
   914  //	... := e.WriteValue(b)
   915  //
   916  // WriteValue expects a JSON value. Using AvailableBuffer to manually construct
   917  // a value requires caution to avoid producing an invalid JSON value that would
   918  // then cause WriteValue to fail.
   919  func (e *Encoder) AvailableBuffer() []byte {
   920  	// NOTE: We don't return e.Buf[len(e.Buf):cap(e.Buf)] since WriteValue would
   921  	// need to take special care to avoid mangling the data while reformatting.
   922  	// WriteValue can't easily identify whether the input Value aliases e.Buf
   923  	// without using unsafe.Pointer. Thus, we just return a different buffer.
   924  	// Should this ever alias e.Buf, we need to consider how it operates with
   925  	// the specialized performance optimization for bytes.Buffer.
   926  	n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
   927  	if cap(e.s.availBuffer) < n {
   928  		e.s.availBuffer = make([]byte, 0, n)
   929  	}
   930  	return e.s.availBuffer
   931  }
   932  
   933  // StackDepth returns the depth of the state machine for written JSON data.
   934  // Each level on the stack represents a nested JSON object or array.
   935  // It is incremented whenever a [BeginObject] or [BeginArray] token is encountered
   936  // and decremented whenever an [EndObject] or [EndArray] token is encountered.
   937  //
   938  // StackDepth returns 0 when not inside any object or array.
   939  // In particular, it returns 0 before any tokens have been written,
   940  // after any top-level value has been written, and between values
   941  // when encoding a stream of top-level values (e.g., NDJSON).
   942  // StackDepth returns 1 inside a top-level object or array,
   943  // 2 inside a nested object or array, and so on.
   944  //
   945  // For example, consider encoding the following JSON:
   946  //
   947  //	{"a": [1, 2], "b": {"c": 3}}
   948  //
   949  // While encoding, StackDepth would report the following:
   950  //
   951  //   - At the start, StackDepth reports 0.
   952  //   - After encoding the outer '{', StackDepth reports 1.
   953  //   - After encoding the inner '[', StackDepth reports 2.
   954  //   - After encoding the inner ']', StackDepth reports 1.
   955  //   - After encoding the outer '}', StackDepth reports 0.
   956  func (e *Encoder) StackDepth() int {
   957  	// NOTE: Keep in sync with Decoder.StackDepth.
   958  	return e.s.Tokens.Depth() - 1
   959  }
   960  
   961  // StackIndex returns information about the specified stack level.
   962  // It must be a number between 0 and [Encoder.StackDepth], inclusive.
   963  // For each level, it reports the kind:
   964  //
   965  //   - [KindInvalid] for a level of zero,
   966  //   - [KindBeginObject] for a level representing a JSON object, and
   967  //   - [KindBeginArray] for a level representing a JSON array.
   968  //
   969  // It also reports the length of that JSON object or array encoded so far.
   970  // Each name and value in a JSON object is counted separately,
   971  // so the effective number of members would be half the length.
   972  // A complete JSON object must have an even length.
   973  func (e *Encoder) StackIndex(i int) (Kind, int64) {
   974  	// NOTE: Keep in sync with Decoder.StackIndex.
   975  	switch s := e.s.Tokens.index(i); {
   976  	case i > 0 && s.isObject():
   977  		return '{', s.Length()
   978  	case i > 0 && s.isArray():
   979  		return '[', s.Length()
   980  	default:
   981  		return 0, s.Length()
   982  	}
   983  }
   984  
   985  // StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
   986  func (e *Encoder) StackPointer() Pointer {
   987  	return Pointer(e.s.AppendStackPointer(nil, -1))
   988  }
   989  
   990  func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
   991  	e.Names.copyQuotedBuffer(e.Buf)
   992  	return e.state.appendStackPointer(b, where)
   993  }
   994  

View as plain text