Source file src/encoding/json/jsontext/value.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"io"
    13  	"slices"
    14  	"sync"
    15  
    16  	"encoding/json/internal/jsonflags"
    17  	"encoding/json/internal/jsonwire"
    18  )
    19  
    20  // NOTE: Value is analogous to v1 json.RawMessage.
    21  
    22  // AppendFormat formats the JSON value in src and appends it to dst
    23  // according to the specified options.
    24  // See [Value.Format] for more details about the formatting behavior.
    25  //
    26  // The dst and src may overlap.
    27  // If an error is reported, then the entirety of src is appended to dst.
    28  func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
    29  	e := getBufferedEncoder(opts...)
    30  	defer putBufferedEncoder(e)
    31  	e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
    32  	if err := e.s.WriteValue(src); err != nil {
    33  		return append(dst, src...), err
    34  	}
    35  	return append(dst, e.s.Buf...), nil
    36  }
    37  
    38  // Value represents a single raw JSON value, which may be one of the following:
    39  //   - a JSON literal (i.e., null, true, or false)
    40  //   - a JSON string (e.g., "hello, world!")
    41  //   - a JSON number (e.g., 123.456)
    42  //   - an entire JSON object (e.g., {"fizz":"buzz"} )
    43  //   - an entire JSON array (e.g., [1,2,3] )
    44  //
    45  // Value can represent entire array or object values, while [Token] cannot.
    46  // Value may contain leading and/or trailing whitespace.
    47  type Value []byte
    48  
    49  // Clone returns a copy of v.
    50  func (v Value) Clone() Value {
    51  	return bytes.Clone(v)
    52  }
    53  
    54  // String returns the string formatting of v.
    55  func (v Value) String() string {
    56  	if v == nil {
    57  		return "null"
    58  	}
    59  	return string(v)
    60  }
    61  
    62  // IsValid reports whether the raw JSON value is syntactically valid
    63  // according to the specified options.
    64  //
    65  // By default (if no options are specified), it validates according to RFC 7493.
    66  // It verifies whether the input is properly encoded as UTF-8,
    67  // that escape sequences within strings decode to valid Unicode codepoints, and
    68  // that all names in each object are unique.
    69  // It does not verify whether numbers are representable within the limits
    70  // of any common numeric type (e.g., float64, int64, or uint64).
    71  //
    72  // Relevant options include:
    73  //   - [AllowDuplicateNames]
    74  //   - [AllowInvalidUTF8]
    75  //
    76  // All other options are ignored.
    77  func (v Value) IsValid(opts ...Options) bool {
    78  	// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
    79  	d := getBufferedDecoder(v, opts...)
    80  	defer putBufferedDecoder(d)
    81  	_, errVal := d.ReadValue()
    82  	_, errEOF := d.ReadToken()
    83  	return errVal == nil && errEOF == io.EOF
    84  }
    85  
    86  // Format formats the raw JSON value in place.
    87  //
    88  // By default (if no options are specified), it validates according to RFC 7493
    89  // and produces the minimal JSON representation, where
    90  // all whitespace is elided and JSON strings use the shortest encoding.
    91  //
    92  // Relevant options include:
    93  //   - [AllowDuplicateNames]
    94  //   - [AllowInvalidUTF8]
    95  //   - [EscapeForHTML]
    96  //   - [EscapeForJS]
    97  //   - [PreserveRawStrings]
    98  //   - [CanonicalizeRawInts]
    99  //   - [CanonicalizeRawFloats]
   100  //   - [ReorderRawObjects]
   101  //   - [SpaceAfterColon]
   102  //   - [SpaceAfterComma]
   103  //   - [Multiline]
   104  //   - [WithIndent]
   105  //   - [WithIndentPrefix]
   106  //
   107  // All other options are ignored.
   108  //
   109  // It is guaranteed to succeed if the value is valid according to the same options.
   110  // If the value is already formatted, then the buffer is not mutated.
   111  func (v *Value) Format(opts ...Options) error {
   112  	// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
   113  	return v.format(opts, nil)
   114  }
   115  
   116  // format accepts two []Options to avoid the allocation appending them together.
   117  // It is equivalent to v.Format(append(opts1, opts2...)...).
   118  func (v *Value) format(opts1, opts2 []Options) error {
   119  	e := getBufferedEncoder(opts1...)
   120  	defer putBufferedEncoder(e)
   121  	e.s.Join(opts2...)
   122  	e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
   123  	if err := e.s.WriteValue(*v); err != nil {
   124  		return err
   125  	}
   126  	if !bytes.Equal(*v, e.s.Buf) {
   127  		*v = append((*v)[:0], e.s.Buf...)
   128  	}
   129  	return nil
   130  }
   131  
   132  // Compact removes all whitespace from the raw JSON value.
   133  //
   134  // It does not reformat JSON strings or numbers to use any other representation.
   135  // To maximize the set of JSON values that can be formatted,
   136  // this permits values with duplicate names and invalid UTF-8.
   137  //
   138  // Compact is equivalent to calling [Value.Format] with the following options:
   139  //   - [AllowDuplicateNames](true)
   140  //   - [AllowInvalidUTF8](true)
   141  //   - [PreserveRawStrings](true)
   142  //
   143  // Any options specified by the caller are applied after the initial set
   144  // and may deliberately override prior options.
   145  func (v *Value) Compact(opts ...Options) error {
   146  	return v.format([]Options{
   147  		AllowDuplicateNames(true),
   148  		AllowInvalidUTF8(true),
   149  		PreserveRawStrings(true),
   150  	}, opts)
   151  }
   152  
   153  // Indent reformats the whitespace in the raw JSON value so that each element
   154  // in a JSON object or array begins on a indented line according to the nesting.
   155  //
   156  // It does not reformat JSON strings or numbers to use any other representation.
   157  // To maximize the set of JSON values that can be formatted,
   158  // this permits values with duplicate names and invalid UTF-8.
   159  //
   160  // Indent is equivalent to calling [Value.Format] with the following options:
   161  //   - [AllowDuplicateNames](true)
   162  //   - [AllowInvalidUTF8](true)
   163  //   - [PreserveRawStrings](true)
   164  //   - [Multiline](true)
   165  //
   166  // Any options specified by the caller are applied after the initial set
   167  // and may deliberately override prior options.
   168  func (v *Value) Indent(opts ...Options) error {
   169  	return v.format([]Options{
   170  		AllowDuplicateNames(true),
   171  		AllowInvalidUTF8(true),
   172  		PreserveRawStrings(true),
   173  		Multiline(true),
   174  	}, opts)
   175  }
   176  
   177  // Canonicalize canonicalizes the raw JSON value according to the
   178  // JSON Canonicalization Scheme (JCS) as defined by RFC 8785
   179  // where it produces a stable representation of a JSON value.
   180  //
   181  // JSON strings are formatted to use their minimal representation,
   182  // JSON numbers are formatted as double precision numbers according
   183  // to some stable serialization algorithm.
   184  // JSON object members are sorted in ascending order by name.
   185  // All whitespace is removed.
   186  //
   187  // The output stability is dependent on the stability of the application data
   188  // (see RFC 8785, Appendix E). It cannot produce stable output from
   189  // fundamentally unstable input. For example, if the JSON value
   190  // contains ephemeral data (e.g., a frequently changing timestamp),
   191  // then the value is still unstable regardless of whether this is called.
   192  //
   193  // Canonicalize is equivalent to calling [Value.Format] with the following options:
   194  //   - [CanonicalizeRawInts](true)
   195  //   - [CanonicalizeRawFloats](true)
   196  //   - [ReorderRawObjects](true)
   197  //
   198  // Any options specified by the caller are applied after the initial set
   199  // and may deliberately override prior options.
   200  //
   201  // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
   202  // Any numbers with precision beyond what is representable by that form
   203  // will lose their precision when canonicalized. For example, integer values
   204  // beyond ±2⁵³ will lose their precision. To preserve the original representation
   205  // of JSON integers, additionally set [CanonicalizeRawInts] to false:
   206  //
   207  //	v.Canonicalize(jsontext.CanonicalizeRawInts(false))
   208  func (v *Value) Canonicalize(opts ...Options) error {
   209  	return v.format([]Options{
   210  		CanonicalizeRawInts(true),
   211  		CanonicalizeRawFloats(true),
   212  		ReorderRawObjects(true),
   213  	}, opts)
   214  }
   215  
   216  // MarshalJSON returns v as the JSON encoding of v.
   217  // It returns the stored value as the raw JSON output without any validation.
   218  // If v is nil, then this returns a JSON null.
   219  func (v Value) MarshalJSON() ([]byte, error) {
   220  	// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
   221  	if v == nil {
   222  		return []byte("null"), nil
   223  	}
   224  	return v, nil
   225  }
   226  
   227  // UnmarshalJSON sets v as the JSON encoding of b.
   228  // It stores a copy of the provided raw JSON input without any validation.
   229  func (v *Value) UnmarshalJSON(b []byte) error {
   230  	// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
   231  	if v == nil {
   232  		return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
   233  	}
   234  	*v = append((*v)[:0], b...)
   235  	return nil
   236  }
   237  
   238  // Kind returns the starting token kind.
   239  // For a valid value, this will never include '}' or ']'.
   240  func (v Value) Kind() Kind {
   241  	if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
   242  		return Kind(v[0]).normalize()
   243  	}
   244  	return invalidKind
   245  }
   246  
   247  const commaAndWhitespace = ", \n\r\t"
   248  
   249  type objectMember struct {
   250  	// name is the unquoted name.
   251  	name []byte // e.g., "name"
   252  	// buffer is the entirety of the raw JSON object member
   253  	// starting from right after the previous member (or opening '{')
   254  	// until right after the member value.
   255  	buffer []byte // e.g., `, \n\r\t"name": "value"`
   256  }
   257  
   258  func (x objectMember) Compare(y objectMember) int {
   259  	if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
   260  		return c
   261  	}
   262  	// With [AllowDuplicateNames] or [AllowInvalidUTF8],
   263  	// names could be identical, so also sort using the member value.
   264  	return jsonwire.CompareUTF16(
   265  		bytes.TrimLeft(x.buffer, commaAndWhitespace),
   266  		bytes.TrimLeft(y.buffer, commaAndWhitespace))
   267  }
   268  
   269  var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
   270  
   271  func getObjectMembers() *[]objectMember {
   272  	ns := objectMemberPool.Get().(*[]objectMember)
   273  	*ns = (*ns)[:0]
   274  	return ns
   275  }
   276  func putObjectMembers(ns *[]objectMember) {
   277  	if cap(*ns) < 1<<10 {
   278  		clear(*ns) // avoid pinning name and buffer
   279  		objectMemberPool.Put(ns)
   280  	}
   281  }
   282  
   283  // mustReorderObjects reorders in-place all object members in a JSON value,
   284  // which must be valid otherwise it panics.
   285  func mustReorderObjects(b []byte) {
   286  	// Obtain a buffered encoder just to use its internal buffer as
   287  	// a scratch buffer for reordering object members.
   288  	e2 := getBufferedEncoder()
   289  	defer putBufferedEncoder(e2)
   290  
   291  	// Disable unnecessary checks to syntactically parse the JSON value.
   292  	d := getBufferedDecoder(b)
   293  	defer putBufferedDecoder(d)
   294  	d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
   295  	mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
   296  }
   297  
   298  // mustReorderObjectsFromDecoder recursively reorders all object members in place
   299  // according to the ordering specified in RFC 8785, section 3.2.3.
   300  //
   301  // Pre-conditions:
   302  //   - The value is valid (i.e., no decoder errors should ever occur).
   303  //   - Initial call is provided a Decoder reading from the start of v.
   304  //
   305  // Post-conditions:
   306  //   - Exactly one JSON value is read from the Decoder.
   307  //   - All fully-parsed JSON objects are reordered by directly moving
   308  //     the members in the value buffer.
   309  //
   310  // The runtime is approximately O(n·log(n)) + O(m·log(m)),
   311  // where n is len(v) and m is the total number of object members.
   312  func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
   313  	switch tok, err := d.ReadToken(); tok.Kind() {
   314  	case '{':
   315  		// Iterate and collect the name and offsets for every object member.
   316  		members := getObjectMembers()
   317  		defer putObjectMembers(members)
   318  		var prevMember objectMember
   319  		isSorted := true
   320  
   321  		beforeBody := d.InputOffset() // offset after '{'
   322  		for d.PeekKind() != '}' {
   323  			beforeName := d.InputOffset()
   324  			var flags jsonwire.ValueFlags
   325  			name, _ := d.s.ReadValue(&flags)
   326  			name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
   327  			mustReorderObjectsFromDecoder(d, scratch)
   328  			afterValue := d.InputOffset()
   329  
   330  			currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
   331  			if isSorted && len(*members) > 0 {
   332  				isSorted = objectMember.Compare(prevMember, currMember) < 0
   333  			}
   334  			*members = append(*members, currMember)
   335  			prevMember = currMember
   336  		}
   337  		afterBody := d.InputOffset() // offset before '}'
   338  		d.ReadToken()
   339  
   340  		// Sort the members; return early if it's already sorted.
   341  		if isSorted {
   342  			return
   343  		}
   344  		firstBufferBeforeSorting := (*members)[0].buffer
   345  		slices.SortFunc(*members, objectMember.Compare)
   346  		firstBufferAfterSorting := (*members)[0].buffer
   347  
   348  		// Append the reordered members to a new buffer,
   349  		// then copy the reordered members back over the original members.
   350  		// Avoid swapping in place since each member may be a different size
   351  		// where moving a member over a smaller member may corrupt the data
   352  		// for subsequent members before they have been moved.
   353  		//
   354  		// The following invariant must hold:
   355  		//	sum([m.after-m.before for m in members]) == afterBody-beforeBody
   356  		commaAndWhitespacePrefix := func(b []byte) []byte {
   357  			return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
   358  		}
   359  		sorted := (*scratch)[:0]
   360  		for i, member := range *members {
   361  			switch {
   362  			case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
   363  				// First member after sorting is not the first member before sorting,
   364  				// so use the prefix of the first member before sorting.
   365  				sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
   366  				sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
   367  			case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
   368  				// Later member after sorting is the first member before sorting,
   369  				// so use the prefix of the first member after sorting.
   370  				sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
   371  				sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
   372  			default:
   373  				sorted = append(sorted, member.buffer...)
   374  			}
   375  		}
   376  		if int(afterBody-beforeBody) != len(sorted) {
   377  			panic("BUG: length invariant violated")
   378  		}
   379  		copy(d.s.buf[beforeBody:afterBody], sorted)
   380  
   381  		// Update scratch buffer to the largest amount ever used.
   382  		if len(sorted) > len(*scratch) {
   383  			*scratch = sorted
   384  		}
   385  	case '[':
   386  		for d.PeekKind() != ']' {
   387  			mustReorderObjectsFromDecoder(d, scratch)
   388  		}
   389  		d.ReadToken()
   390  	default:
   391  		if err != nil {
   392  			panic("BUG: " + err.Error())
   393  		}
   394  	}
   395  }
   396  

View as plain text