Source file src/encoding/json/v2_stream.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package json
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"io"
    13  	"reflect"
    14  
    15  	"encoding/json/jsontext"
    16  	jsonv2 "encoding/json/v2"
    17  )
    18  
    19  // A Decoder reads and decodes JSON values from an input stream.
    20  type Decoder struct {
    21  	dec  *jsontext.Decoder
    22  	opts jsonv2.Options
    23  	err  error
    24  
    25  	// hadPeeked reports whether [Decoder.More] was called.
    26  	// It is reset by [Decoder.Decode] and [Decoder.Token].
    27  	hadPeeked bool
    28  }
    29  
    30  // NewDecoder returns a new decoder that reads from r.
    31  //
    32  // The decoder introduces its own buffering and may
    33  // read data from r beyond the JSON values requested.
    34  func NewDecoder(r io.Reader) *Decoder {
    35  	// Hide bytes.Buffer from jsontext since it implements optimizations that
    36  	// also limits certain ways it could be used. For example, one cannot write
    37  	// to the bytes.Buffer while it is in use by jsontext.Decoder.
    38  	if _, ok := r.(*bytes.Buffer); ok {
    39  		r = struct{ io.Reader }{r}
    40  	}
    41  
    42  	dec := new(Decoder)
    43  	dec.opts = DefaultOptionsV1()
    44  	dec.dec = jsontext.NewDecoder(r, dec.opts)
    45  	return dec
    46  }
    47  
    48  // UseNumber causes the Decoder to unmarshal a number into an
    49  // interface value as a [Number] instead of as a float64.
    50  func (dec *Decoder) UseNumber() {
    51  	if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber {
    52  		dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true))
    53  	}
    54  }
    55  
    56  // DisallowUnknownFields causes the Decoder to return an error when the destination
    57  // is a struct and the input contains object keys which do not match any
    58  // non-ignored, exported fields in the destination.
    59  func (dec *Decoder) DisallowUnknownFields() {
    60  	if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject {
    61  		dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true))
    62  	}
    63  }
    64  
    65  // Decode reads the next JSON-encoded value from its
    66  // input and stores it in the value pointed to by v.
    67  //
    68  // See the documentation for [Unmarshal] for details about
    69  // the conversion of JSON into a Go value.
    70  func (dec *Decoder) Decode(v any) error {
    71  	if dec.err != nil {
    72  		return dec.err
    73  	}
    74  	b, err := dec.dec.ReadValue()
    75  	if err != nil {
    76  		dec.err = transformSyntacticError(err)
    77  		if dec.err.Error() == errUnexpectedEnd.Error() {
    78  			// NOTE: Decode has always been inconsistent with Unmarshal
    79  			// with regard to the exact error value for truncated input.
    80  			dec.err = io.ErrUnexpectedEOF
    81  		}
    82  		return dec.err
    83  	}
    84  	dec.hadPeeked = false
    85  	return jsonv2.Unmarshal(b, v, dec.opts)
    86  }
    87  
    88  // Buffered returns a reader of the data remaining in the Decoder's
    89  // buffer. The reader is valid until the next call to [Decoder.Decode].
    90  func (dec *Decoder) Buffered() io.Reader {
    91  	return bytes.NewReader(dec.dec.UnreadBuffer())
    92  }
    93  
    94  // An Encoder writes JSON values to an output stream.
    95  type Encoder struct {
    96  	w    io.Writer
    97  	opts jsonv2.Options
    98  	err  error
    99  
   100  	indentBuf bytes.Buffer
   101  
   102  	indentPrefix string
   103  	indentValue  string
   104  }
   105  
   106  // NewEncoder returns a new encoder that writes to w.
   107  func NewEncoder(w io.Writer) *Encoder {
   108  	enc := new(Encoder)
   109  	enc.w = w
   110  	enc.opts = DefaultOptionsV1()
   111  	return enc
   112  }
   113  
   114  // Encode writes the JSON encoding of v to the stream,
   115  // followed by a newline character.
   116  //
   117  // See the documentation for [Marshal] for details about the
   118  // conversion of Go values to JSON.
   119  func (enc *Encoder) Encode(v any) error {
   120  	if enc.err != nil {
   121  		return enc.err
   122  	}
   123  
   124  	e := export.GetBufferedEncoder(enc.opts)
   125  	defer export.PutBufferedEncoder(e)
   126  	if err := jsonv2.MarshalEncode(e, v); err != nil {
   127  		return err
   128  	}
   129  	b := export.Encoder(e).Buf // b must not leak current scope
   130  	if len(enc.indentPrefix)+len(enc.indentValue) > 0 {
   131  		enc.indentBuf.Reset()
   132  		if err := Indent(&enc.indentBuf, b, enc.indentPrefix, enc.indentValue); err != nil {
   133  			return err
   134  		}
   135  		b = enc.indentBuf.Bytes()
   136  	}
   137  	b = append(b, '\n')
   138  
   139  	if _, err := enc.w.Write(b); err != nil {
   140  		enc.err = err
   141  		return err
   142  	}
   143  	return nil
   144  }
   145  
   146  // SetIndent instructs the encoder to format each subsequent encoded
   147  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   148  // Calling SetIndent("", "") disables indentation.
   149  func (enc *Encoder) SetIndent(prefix, indent string) {
   150  	// NOTE: Do not rely on the newer [jsontext.WithIndent] option since
   151  	// the v1 [Indent] behavior has historical bugs that cannot be changed
   152  	// for backward compatibility reasons.
   153  	enc.indentPrefix = prefix
   154  	enc.indentValue = indent
   155  }
   156  
   157  // SetEscapeHTML specifies whether problematic HTML characters
   158  // should be escaped inside JSON quoted strings.
   159  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   160  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   161  //
   162  // In non-HTML settings where the escaping interferes with the readability
   163  // of the output, SetEscapeHTML(false) disables this behavior.
   164  func (enc *Encoder) SetEscapeHTML(on bool) {
   165  	if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on {
   166  		enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on))
   167  	}
   168  }
   169  
   170  // RawMessage is a raw encoded JSON value.
   171  // It implements [Marshaler] and [Unmarshaler] and can
   172  // be used to delay JSON decoding or precompute a JSON encoding.
   173  type RawMessage = jsontext.Value
   174  
   175  // A Token holds a value of one of these types:
   176  //
   177  //   - [Delim], for the four JSON delimiters [ ] { }
   178  //   - bool, for JSON booleans
   179  //   - float64, for JSON numbers
   180  //   - [Number], for JSON numbers
   181  //   - string, for JSON string literals
   182  //   - nil, for JSON null
   183  type Token any
   184  
   185  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   186  type Delim rune
   187  
   188  func (d Delim) String() string {
   189  	return string(d)
   190  }
   191  
   192  // Token returns the next JSON token in the input stream.
   193  // At the end of the input stream, Token returns nil, [io.EOF].
   194  //
   195  // Token guarantees that the delimiters [ ] { } it returns are
   196  // properly nested and matched: if Token encounters an unexpected
   197  // delimiter in the input, it will return an error.
   198  //
   199  // The input stream consists of basic JSON values—bool, string,
   200  // number, and null—along with delimiters [ ] { } of type [Delim]
   201  // to mark the start and end of arrays and objects.
   202  // Commas and colons are elided.
   203  func (dec *Decoder) Token() (Token, error) {
   204  	if dec.err != nil {
   205  		return nil, dec.err
   206  	}
   207  	tok, err := dec.dec.ReadToken()
   208  	if err != nil {
   209  		// Historically, v1 would report just [io.EOF] if
   210  		// the stream is a prefix of a valid JSON value.
   211  		// It reports an unwrapped [io.ErrUnexpectedEOF] if
   212  		// truncated within a JSON token such as a literal, number, or string.
   213  		if errors.Is(err, io.ErrUnexpectedEOF) {
   214  			if len(bytes.Trim(dec.dec.UnreadBuffer(), " \r\n\t,:")) == 0 {
   215  				return nil, io.EOF
   216  			}
   217  			return nil, io.ErrUnexpectedEOF
   218  		}
   219  		return nil, transformSyntacticError(err)
   220  	}
   221  	dec.hadPeeked = false
   222  	switch k := tok.Kind(); k {
   223  	case 'n':
   224  		return nil, nil
   225  	case 'f':
   226  		return false, nil
   227  	case 't':
   228  		return true, nil
   229  	case '"':
   230  		return tok.String(), nil
   231  	case '0':
   232  		if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber {
   233  			return Number(tok.String()), nil
   234  		}
   235  		v, err := tok.Float()
   236  		if err != nil {
   237  			return nil, &UnmarshalTypeError{Value: "number " + tok.String(), Type: reflect.TypeFor[float64](), Offset: dec.InputOffset() - int64(len(tok.String()))}
   238  		}
   239  		return v, nil
   240  	case '{', '}', '[', ']':
   241  		return Delim(k), nil
   242  	default:
   243  		panic("unreachable")
   244  	}
   245  }
   246  
   247  // More reports whether there is another element in the
   248  // current array or object being parsed.
   249  func (dec *Decoder) More() bool {
   250  	dec.hadPeeked = true
   251  	k := dec.dec.PeekKind()
   252  	if k == 0 {
   253  		if dec.err == nil {
   254  			// PeekKind doesn't distinguish between EOF and error,
   255  			// so read the next token to see which we get.
   256  			_, err := dec.dec.ReadToken()
   257  			if err == nil {
   258  				// This is only possible if jsontext violates its documentation.
   259  				err = errors.New("json: successful read after failed peek")
   260  			}
   261  			dec.err = transformSyntacticError(err)
   262  		}
   263  		return dec.err != io.EOF
   264  	}
   265  	return k != ']' && k != '}'
   266  }
   267  
   268  // InputOffset returns the input stream byte offset of the current decoder position.
   269  // The offset gives the location of the end of the most recently returned token
   270  // and the beginning of the next token.
   271  func (dec *Decoder) InputOffset() int64 {
   272  	offset := dec.dec.InputOffset()
   273  	if dec.hadPeeked {
   274  		// Historically, InputOffset reported the location of
   275  		// the end of the most recently returned token
   276  		// unless [Decoder.More] is called, in which case, it reported
   277  		// the beginning of the next token.
   278  		unreadBuffer := dec.dec.UnreadBuffer()
   279  		trailingTokens := bytes.TrimLeft(unreadBuffer, " \n\r\t")
   280  		if len(trailingTokens) > 0 {
   281  			leadingWhitespace := len(unreadBuffer) - len(trailingTokens)
   282  			offset += int64(leadingWhitespace)
   283  		}
   284  	}
   285  	return offset
   286  }
   287  

View as plain text