Source file src/encoding/json/v2_stream.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package json
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"io"
    13  
    14  	"encoding/json/jsontext"
    15  	jsonv2 "encoding/json/v2"
    16  )
    17  
    18  // A Decoder reads and decodes JSON values from an input stream.
    19  type Decoder struct {
    20  	dec  *jsontext.Decoder
    21  	opts jsonv2.Options
    22  	err  error
    23  
    24  	// hadPeeked reports whether [Decoder.More] was called.
    25  	// It is reset by [Decoder.Decode] and [Decoder.Token].
    26  	hadPeeked bool
    27  }
    28  
    29  // NewDecoder returns a new decoder that reads from r.
    30  //
    31  // The decoder introduces its own buffering and may
    32  // read data from r beyond the JSON values requested.
    33  func NewDecoder(r io.Reader) *Decoder {
    34  	// Hide bytes.Buffer from jsontext since it implements optimizations that
    35  	// also limits certain ways it could be used. For example, one cannot write
    36  	// to the bytes.Buffer while it is in use by jsontext.Decoder.
    37  	if _, ok := r.(*bytes.Buffer); ok {
    38  		r = struct{ io.Reader }{r}
    39  	}
    40  
    41  	dec := new(Decoder)
    42  	dec.opts = DefaultOptionsV1()
    43  	dec.dec = jsontext.NewDecoder(r, dec.opts)
    44  	return dec
    45  }
    46  
    47  // UseNumber causes the Decoder to unmarshal a number into an
    48  // interface value as a [Number] instead of as a float64.
    49  func (dec *Decoder) UseNumber() {
    50  	if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber {
    51  		dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true))
    52  	}
    53  }
    54  
    55  // DisallowUnknownFields causes the Decoder to return an error when the destination
    56  // is a struct and the input contains object keys which do not match any
    57  // non-ignored, exported fields in the destination.
    58  func (dec *Decoder) DisallowUnknownFields() {
    59  	if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject {
    60  		dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true))
    61  	}
    62  }
    63  
    64  // Decode reads the next JSON-encoded value from its
    65  // input and stores it in the value pointed to by v.
    66  //
    67  // See the documentation for [Unmarshal] for details about
    68  // the conversion of JSON into a Go value.
    69  func (dec *Decoder) Decode(v any) error {
    70  	if dec.err != nil {
    71  		return dec.err
    72  	}
    73  	b, err := dec.dec.ReadValue()
    74  	if err != nil {
    75  		dec.err = transformSyntacticError(err)
    76  		if dec.err.Error() == errUnexpectedEnd.Error() {
    77  			// NOTE: Decode has always been inconsistent with Unmarshal
    78  			// with regard to the exact error value for truncated input.
    79  			dec.err = io.ErrUnexpectedEOF
    80  		}
    81  		return dec.err
    82  	}
    83  	dec.hadPeeked = false
    84  	return jsonv2.Unmarshal(b, v, dec.opts)
    85  }
    86  
    87  // Buffered returns a reader of the data remaining in the Decoder's
    88  // buffer. The reader is valid until the next call to [Decoder.Decode].
    89  func (dec *Decoder) Buffered() io.Reader {
    90  	return bytes.NewReader(dec.dec.UnreadBuffer())
    91  }
    92  
    93  // An Encoder writes JSON values to an output stream.
    94  type Encoder struct {
    95  	w    io.Writer
    96  	opts jsonv2.Options
    97  	err  error
    98  
    99  	indentBuf bytes.Buffer
   100  
   101  	indentPrefix string
   102  	indentValue  string
   103  }
   104  
   105  // NewEncoder returns a new encoder that writes to w.
   106  func NewEncoder(w io.Writer) *Encoder {
   107  	enc := new(Encoder)
   108  	enc.w = w
   109  	enc.opts = DefaultOptionsV1()
   110  	return enc
   111  }
   112  
   113  // Encode writes the JSON encoding of v to the stream,
   114  // followed by a newline character.
   115  //
   116  // See the documentation for [Marshal] for details about the
   117  // conversion of Go values to JSON.
   118  func (enc *Encoder) Encode(v any) error {
   119  	if enc.err != nil {
   120  		return enc.err
   121  	}
   122  
   123  	e := export.GetBufferedEncoder(enc.opts)
   124  	defer export.PutBufferedEncoder(e)
   125  	if err := jsonv2.MarshalEncode(e, v); err != nil {
   126  		return err
   127  	}
   128  	b := export.Encoder(e).Buf // b must not leak current scope
   129  	if len(enc.indentPrefix)+len(enc.indentValue) > 0 {
   130  		enc.indentBuf.Reset()
   131  		if err := Indent(&enc.indentBuf, b, enc.indentPrefix, enc.indentValue); err != nil {
   132  			return err
   133  		}
   134  		b = enc.indentBuf.Bytes()
   135  	}
   136  	b = append(b, '\n')
   137  
   138  	if _, err := enc.w.Write(b); err != nil {
   139  		enc.err = err
   140  		return err
   141  	}
   142  	return nil
   143  }
   144  
   145  // SetIndent instructs the encoder to format each subsequent encoded
   146  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   147  // Calling SetIndent("", "") disables indentation.
   148  func (enc *Encoder) SetIndent(prefix, indent string) {
   149  	// NOTE: Do not rely on the newer [jsontext.WithIndent] option since
   150  	// the v1 [Indent] behavior has historical bugs that cannot be changed
   151  	// for backward compatibility reasons.
   152  	enc.indentPrefix = prefix
   153  	enc.indentValue = indent
   154  }
   155  
   156  // SetEscapeHTML specifies whether problematic HTML characters
   157  // should be escaped inside JSON quoted strings.
   158  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   159  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   160  //
   161  // In non-HTML settings where the escaping interferes with the readability
   162  // of the output, SetEscapeHTML(false) disables this behavior.
   163  func (enc *Encoder) SetEscapeHTML(on bool) {
   164  	if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on {
   165  		enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on))
   166  	}
   167  }
   168  
   169  // RawMessage is a raw encoded JSON value.
   170  // It implements [Marshaler] and [Unmarshaler] and can
   171  // be used to delay JSON decoding or precompute a JSON encoding.
   172  type RawMessage = jsontext.Value
   173  
   174  // A Token holds a value of one of these types:
   175  //
   176  //   - [Delim], for the four JSON delimiters [ ] { }
   177  //   - bool, for JSON booleans
   178  //   - float64, for JSON numbers
   179  //   - [Number], for JSON numbers
   180  //   - string, for JSON string literals
   181  //   - nil, for JSON null
   182  type Token any
   183  
   184  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   185  type Delim rune
   186  
   187  func (d Delim) String() string {
   188  	return string(d)
   189  }
   190  
   191  // Token returns the next JSON token in the input stream.
   192  // At the end of the input stream, Token returns nil, [io.EOF].
   193  //
   194  // Token guarantees that the delimiters [ ] { } it returns are
   195  // properly nested and matched: if Token encounters an unexpected
   196  // delimiter in the input, it will return an error.
   197  //
   198  // The input stream consists of basic JSON values—bool, string,
   199  // number, and null—along with delimiters [ ] { } of type [Delim]
   200  // to mark the start and end of arrays and objects.
   201  // Commas and colons are elided.
   202  func (dec *Decoder) Token() (Token, error) {
   203  	if dec.err != nil {
   204  		return nil, dec.err
   205  	}
   206  	tok, err := dec.dec.ReadToken()
   207  	if err != nil {
   208  		// Historically, v1 would report just [io.EOF] if
   209  		// the stream is a prefix of a valid JSON value.
   210  		// It reports an unwrapped [io.ErrUnexpectedEOF] if
   211  		// truncated within a JSON token such as a literal, number, or string.
   212  		if errors.Is(err, io.ErrUnexpectedEOF) {
   213  			if len(bytes.Trim(dec.dec.UnreadBuffer(), " \r\n\t,:")) == 0 {
   214  				return nil, io.EOF
   215  			}
   216  			return nil, io.ErrUnexpectedEOF
   217  		}
   218  		return nil, transformSyntacticError(err)
   219  	}
   220  	dec.hadPeeked = false
   221  	switch k := tok.Kind(); k {
   222  	case 'n':
   223  		return nil, nil
   224  	case 'f':
   225  		return false, nil
   226  	case 't':
   227  		return true, nil
   228  	case '"':
   229  		return tok.String(), nil
   230  	case '0':
   231  		if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber {
   232  			return Number(tok.String()), nil
   233  		}
   234  		return tok.Float(), nil
   235  	case '{', '}', '[', ']':
   236  		return Delim(k), nil
   237  	default:
   238  		panic("unreachable")
   239  	}
   240  }
   241  
   242  // More reports whether there is another element in the
   243  // current array or object being parsed.
   244  func (dec *Decoder) More() bool {
   245  	dec.hadPeeked = true
   246  	k := dec.dec.PeekKind()
   247  	if k == 0 {
   248  		if dec.err == nil {
   249  			// PeekKind doesn't distinguish between EOF and error,
   250  			// so read the next token to see which we get.
   251  			_, err := dec.dec.ReadToken()
   252  			if err == nil {
   253  				// This is only possible if jsontext violates its documentation.
   254  				err = errors.New("json: successful read after failed peek")
   255  			}
   256  			dec.err = transformSyntacticError(err)
   257  		}
   258  		return dec.err != io.EOF
   259  	}
   260  	return k != ']' && k != '}'
   261  }
   262  
   263  // InputOffset returns the input stream byte offset of the current decoder position.
   264  // The offset gives the location of the end of the most recently returned token
   265  // and the beginning of the next token.
   266  func (dec *Decoder) InputOffset() int64 {
   267  	offset := dec.dec.InputOffset()
   268  	if dec.hadPeeked {
   269  		// Historically, InputOffset reported the location of
   270  		// the end of the most recently returned token
   271  		// unless [Decoder.More] is called, in which case, it reported
   272  		// the beginning of the next token.
   273  		unreadBuffer := dec.dec.UnreadBuffer()
   274  		trailingTokens := bytes.TrimLeft(unreadBuffer, " \n\r\t")
   275  		if len(trailingTokens) > 0 {
   276  			leadingWhitespace := len(unreadBuffer) - len(trailingTokens)
   277  			offset += int64(leadingWhitespace)
   278  		}
   279  	}
   280  	return offset
   281  }
   282  

View as plain text