Source file src/encoding/json/jsontext/value.go
1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build goexperiment.jsonv2 6 7 package jsontext 8 9 import ( 10 "bytes" 11 "errors" 12 "io" 13 "slices" 14 "sync" 15 16 "encoding/json/internal/jsonflags" 17 "encoding/json/internal/jsonwire" 18 ) 19 20 // NOTE: Value is analogous to v1 json.RawMessage. 21 22 // AppendFormat formats the JSON value in src and appends it to dst 23 // according to the specified options. 24 // See [Value.Format] for more details about the formatting behavior. 25 // 26 // The dst and src may overlap. 27 // If an error is reported, then the entirety of src is appended to dst. 28 func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) { 29 e := getBufferedEncoder(opts...) 30 defer putBufferedEncoder(e) 31 e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) 32 if err := e.s.WriteValue(src); err != nil { 33 return append(dst, src...), err 34 } 35 return append(dst, e.s.Buf...), nil 36 } 37 38 // Value represents a single raw JSON value, which may be one of the following: 39 // - a JSON literal (i.e., null, true, or false) 40 // - a JSON string (e.g., "hello, world!") 41 // - a JSON number (e.g., 123.456) 42 // - an entire JSON object (e.g., {"fizz":"buzz"} ) 43 // - an entire JSON array (e.g., [1,2,3] ) 44 // 45 // Value can represent entire array or object values, while [Token] cannot. 46 // Value may contain leading and/or trailing whitespace. 47 type Value []byte 48 49 // Clone returns a copy of v. 50 func (v Value) Clone() Value { 51 return bytes.Clone(v) 52 } 53 54 // String returns the string formatting of v. 55 func (v Value) String() string { 56 if v == nil { 57 return "null" 58 } 59 return string(v) 60 } 61 62 // IsValid reports whether the raw JSON value is syntactically valid 63 // according to the specified options. 64 // 65 // By default (if no options are specified), it validates according to RFC 7493. 66 // It verifies whether the input is properly encoded as UTF-8, 67 // that escape sequences within strings decode to valid Unicode codepoints, and 68 // that all names in each object are unique. 69 // It does not verify whether numbers are representable within the limits 70 // of any common numeric type (e.g., float64, int64, or uint64). 71 // 72 // Relevant options include: 73 // - [AllowDuplicateNames] 74 // - [AllowInvalidUTF8] 75 // 76 // All other options are ignored. 77 func (v Value) IsValid(opts ...Options) bool { 78 // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. 79 d := getBufferedDecoder(v, opts...) 80 defer putBufferedDecoder(d) 81 _, errVal := d.ReadValue() 82 _, errEOF := d.ReadToken() 83 return errVal == nil && errEOF == io.EOF 84 } 85 86 // Format formats the raw JSON value in place. 87 // 88 // By default (if no options are specified), it validates according to RFC 7493 89 // and produces the minimal JSON representation, where 90 // all whitespace is elided and JSON strings use the shortest encoding. 91 // 92 // Relevant options include: 93 // - [AllowDuplicateNames] 94 // - [AllowInvalidUTF8] 95 // - [EscapeForHTML] 96 // - [EscapeForJS] 97 // - [PreserveRawStrings] 98 // - [CanonicalizeRawInts] 99 // - [CanonicalizeRawFloats] 100 // - [ReorderRawObjects] 101 // - [SpaceAfterColon] 102 // - [SpaceAfterComma] 103 // - [Multiline] 104 // - [WithIndent] 105 // - [WithIndentPrefix] 106 // 107 // All other options are ignored. 108 // 109 // It is guaranteed to succeed if the value is valid according to the same options. 110 // If the value is already formatted, then the buffer is not mutated. 111 func (v *Value) Format(opts ...Options) error { 112 // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. 113 return v.format(opts, nil) 114 } 115 116 // format accepts two []Options to avoid the allocation appending them together. 117 // It is equivalent to v.Format(append(opts1, opts2...)...). 118 func (v *Value) format(opts1, opts2 []Options) error { 119 e := getBufferedEncoder(opts1...) 120 defer putBufferedEncoder(e) 121 e.s.Join(opts2...) 122 e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) 123 if err := e.s.WriteValue(*v); err != nil { 124 return err 125 } 126 if !bytes.Equal(*v, e.s.Buf) { 127 *v = append((*v)[:0], e.s.Buf...) 128 } 129 return nil 130 } 131 132 // Compact removes all whitespace from the raw JSON value. 133 // 134 // It does not reformat JSON strings or numbers to use any other representation. 135 // To maximize the set of JSON values that can be formatted, 136 // this permits values with duplicate names and invalid UTF-8. 137 // 138 // Compact is equivalent to calling [Value.Format] with the following options: 139 // - [AllowDuplicateNames](true) 140 // - [AllowInvalidUTF8](true) 141 // - [PreserveRawStrings](true) 142 // 143 // Any options specified by the caller are applied after the initial set 144 // and may deliberately override prior options. 145 func (v *Value) Compact(opts ...Options) error { 146 return v.format([]Options{ 147 AllowDuplicateNames(true), 148 AllowInvalidUTF8(true), 149 PreserveRawStrings(true), 150 }, opts) 151 } 152 153 // Indent reformats the whitespace in the raw JSON value so that each element 154 // in a JSON object or array begins on a indented line according to the nesting. 155 // 156 // It does not reformat JSON strings or numbers to use any other representation. 157 // To maximize the set of JSON values that can be formatted, 158 // this permits values with duplicate names and invalid UTF-8. 159 // 160 // Indent is equivalent to calling [Value.Format] with the following options: 161 // - [AllowDuplicateNames](true) 162 // - [AllowInvalidUTF8](true) 163 // - [PreserveRawStrings](true) 164 // - [Multiline](true) 165 // 166 // Any options specified by the caller are applied after the initial set 167 // and may deliberately override prior options. 168 func (v *Value) Indent(opts ...Options) error { 169 return v.format([]Options{ 170 AllowDuplicateNames(true), 171 AllowInvalidUTF8(true), 172 PreserveRawStrings(true), 173 Multiline(true), 174 }, opts) 175 } 176 177 // Canonicalize canonicalizes the raw JSON value according to the 178 // JSON Canonicalization Scheme (JCS) as defined by RFC 8785 179 // where it produces a stable representation of a JSON value. 180 // 181 // JSON strings are formatted to use their minimal representation, 182 // JSON numbers are formatted as double precision numbers according 183 // to some stable serialization algorithm. 184 // JSON object members are sorted in ascending order by name. 185 // All whitespace is removed. 186 // 187 // The output stability is dependent on the stability of the application data 188 // (see RFC 8785, Appendix E). It cannot produce stable output from 189 // fundamentally unstable input. For example, if the JSON value 190 // contains ephemeral data (e.g., a frequently changing timestamp), 191 // then the value is still unstable regardless of whether this is called. 192 // 193 // Canonicalize is equivalent to calling [Value.Format] with the following options: 194 // - [CanonicalizeRawInts](true) 195 // - [CanonicalizeRawFloats](true) 196 // - [ReorderRawObjects](true) 197 // 198 // Any options specified by the caller are applied after the initial set 199 // and may deliberately override prior options. 200 // 201 // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. 202 // Any numbers with precision beyond what is representable by that form 203 // will lose their precision when canonicalized. For example, integer values 204 // beyond ±2⁵³ will lose their precision. To preserve the original representation 205 // of JSON integers, additionally set [CanonicalizeRawInts] to false: 206 // 207 // v.Canonicalize(jsontext.CanonicalizeRawInts(false)) 208 func (v *Value) Canonicalize(opts ...Options) error { 209 return v.format([]Options{ 210 CanonicalizeRawInts(true), 211 CanonicalizeRawFloats(true), 212 ReorderRawObjects(true), 213 }, opts) 214 } 215 216 // MarshalJSON returns v as the JSON encoding of v. 217 // It returns the stored value as the raw JSON output without any validation. 218 // If v is nil, then this returns a JSON null. 219 func (v Value) MarshalJSON() ([]byte, error) { 220 // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. 221 if v == nil { 222 return []byte("null"), nil 223 } 224 return v, nil 225 } 226 227 // UnmarshalJSON sets v as the JSON encoding of b. 228 // It stores a copy of the provided raw JSON input without any validation. 229 func (v *Value) UnmarshalJSON(b []byte) error { 230 // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. 231 if v == nil { 232 return errors.New("jsontext.Value: UnmarshalJSON on nil pointer") 233 } 234 *v = append((*v)[:0], b...) 235 return nil 236 } 237 238 // Kind returns the starting token kind. 239 // For a valid value, this will never include '}' or ']'. 240 func (v Value) Kind() Kind { 241 if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { 242 return Kind(v[0]).normalize() 243 } 244 return invalidKind 245 } 246 247 const commaAndWhitespace = ", \n\r\t" 248 249 type objectMember struct { 250 // name is the unquoted name. 251 name []byte // e.g., "name" 252 // buffer is the entirety of the raw JSON object member 253 // starting from right after the previous member (or opening '{') 254 // until right after the member value. 255 buffer []byte // e.g., `, \n\r\t"name": "value"` 256 } 257 258 func (x objectMember) Compare(y objectMember) int { 259 if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 { 260 return c 261 } 262 // With [AllowDuplicateNames] or [AllowInvalidUTF8], 263 // names could be identical, so also sort using the member value. 264 return jsonwire.CompareUTF16( 265 bytes.TrimLeft(x.buffer, commaAndWhitespace), 266 bytes.TrimLeft(y.buffer, commaAndWhitespace)) 267 } 268 269 var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }} 270 271 func getObjectMembers() *[]objectMember { 272 ns := objectMemberPool.Get().(*[]objectMember) 273 *ns = (*ns)[:0] 274 return ns 275 } 276 func putObjectMembers(ns *[]objectMember) { 277 if cap(*ns) < 1<<10 { 278 clear(*ns) // avoid pinning name and buffer 279 objectMemberPool.Put(ns) 280 } 281 } 282 283 // mustReorderObjects reorders in-place all object members in a JSON value, 284 // which must be valid otherwise it panics. 285 func mustReorderObjects(b []byte) { 286 // Obtain a buffered encoder just to use its internal buffer as 287 // a scratch buffer for reordering object members. 288 e2 := getBufferedEncoder() 289 defer putBufferedEncoder(e2) 290 291 // Disable unnecessary checks to syntactically parse the JSON value. 292 d := getBufferedDecoder(b) 293 defer putBufferedDecoder(d) 294 d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) 295 mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 296 } 297 298 // mustReorderObjectsFromDecoder recursively reorders all object members in place 299 // according to the ordering specified in RFC 8785, section 3.2.3. 300 // 301 // Pre-conditions: 302 // - The value is valid (i.e., no decoder errors should ever occur). 303 // - Initial call is provided a Decoder reading from the start of v. 304 // 305 // Post-conditions: 306 // - Exactly one JSON value is read from the Decoder. 307 // - All fully-parsed JSON objects are reordered by directly moving 308 // the members in the value buffer. 309 // 310 // The runtime is approximately O(n·log(n)) + O(m·log(m)), 311 // where n is len(v) and m is the total number of object members. 312 func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) { 313 switch tok, err := d.ReadToken(); tok.Kind() { 314 case '{': 315 // Iterate and collect the name and offsets for every object member. 316 members := getObjectMembers() 317 defer putObjectMembers(members) 318 var prevMember objectMember 319 isSorted := true 320 321 beforeBody := d.InputOffset() // offset after '{' 322 for d.PeekKind() != '}' { 323 beforeName := d.InputOffset() 324 var flags jsonwire.ValueFlags 325 name, _ := d.s.ReadValue(&flags) 326 name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) 327 mustReorderObjectsFromDecoder(d, scratch) 328 afterValue := d.InputOffset() 329 330 currMember := objectMember{name, d.s.buf[beforeName:afterValue]} 331 if isSorted && len(*members) > 0 { 332 isSorted = objectMember.Compare(prevMember, currMember) < 0 333 } 334 *members = append(*members, currMember) 335 prevMember = currMember 336 } 337 afterBody := d.InputOffset() // offset before '}' 338 d.ReadToken() 339 340 // Sort the members; return early if it's already sorted. 341 if isSorted { 342 return 343 } 344 firstBufferBeforeSorting := (*members)[0].buffer 345 slices.SortFunc(*members, objectMember.Compare) 346 firstBufferAfterSorting := (*members)[0].buffer 347 348 // Append the reordered members to a new buffer, 349 // then copy the reordered members back over the original members. 350 // Avoid swapping in place since each member may be a different size 351 // where moving a member over a smaller member may corrupt the data 352 // for subsequent members before they have been moved. 353 // 354 // The following invariant must hold: 355 // sum([m.after-m.before for m in members]) == afterBody-beforeBody 356 commaAndWhitespacePrefix := func(b []byte) []byte { 357 return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))] 358 } 359 sorted := (*scratch)[:0] 360 for i, member := range *members { 361 switch { 362 case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]: 363 // First member after sorting is not the first member before sorting, 364 // so use the prefix of the first member before sorting. 365 sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...) 366 sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) 367 case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]: 368 // Later member after sorting is the first member before sorting, 369 // so use the prefix of the first member after sorting. 370 sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...) 371 sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) 372 default: 373 sorted = append(sorted, member.buffer...) 374 } 375 } 376 if int(afterBody-beforeBody) != len(sorted) { 377 panic("BUG: length invariant violated") 378 } 379 copy(d.s.buf[beforeBody:afterBody], sorted) 380 381 // Update scratch buffer to the largest amount ever used. 382 if len(sorted) > len(*scratch) { 383 *scratch = sorted 384 } 385 case '[': 386 for d.PeekKind() != ']' { 387 mustReorderObjectsFromDecoder(d, scratch) 388 } 389 d.ReadToken() 390 default: 391 if err != nil { 392 panic("BUG: " + err.Error()) 393 } 394 } 395 } 396