1
2
3
4
5
6
7 package jsontext
8
9 import (
10 "errors"
11 "iter"
12 "math"
13 "strconv"
14 "strings"
15 "unicode/utf8"
16
17 "encoding/json/internal/jsonwire"
18 )
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 var ErrDuplicateName = errors.New("duplicate object member name")
36
37
38
39
40
41 var ErrNonStringName = errors.New("object member name must be a string")
42
43 var (
44 errMissingValue = errors.New("missing value after object name")
45 errMismatchDelim = errors.New("mismatching structural token for object or array")
46 errMaxDepth = errors.New("exceeded max depth")
47
48 errInvalidNamespace = errors.New("object namespace is in an invalid state")
49 )
50
51
52
53 const maxNestingDepth = 10000
54
55 type state struct {
56
57 Tokens stateMachine
58
59
60 Names objectNameStack
61
62
63
64
65
66
67 Namespaces objectNamespaceStack
68 }
69
70
71
72 func (s *state) needObjectValue() bool {
73 return s.Tokens.Last.needObjectValue()
74 }
75
76 func (s *state) reset() {
77 s.Tokens.reset()
78 s.Names.reset()
79 s.Namespaces.reset()
80 }
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 type Pointer string
96
97
98
99 func (p Pointer) IsValid() bool {
100 for i, r := range p {
101 switch {
102 case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
103 return false
104 case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
105 return false
106 }
107 }
108 return len(p) == 0 || p[0] == '/'
109 }
110
111
112
113 func (p Pointer) Contains(pc Pointer) bool {
114
115 suffix, ok := strings.CutPrefix(string(pc), string(p))
116 return ok && (suffix == "" || suffix[0] == '/')
117 }
118
119
120
121 func (p Pointer) Parent() Pointer {
122 return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
123 }
124
125
126
127 func (p Pointer) LastToken() string {
128 last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
129 return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
130 }
131
132
133 func (p Pointer) AppendToken(tok string) Pointer {
134 return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
135 }
136
137
138
139
140
141
142 func (p Pointer) Tokens() iter.Seq[string] {
143 return func(yield func(string) bool) {
144 for len(p) > 0 {
145 p = Pointer(strings.TrimPrefix(string(p), "/"))
146 i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
147 if !yield(unescapePointerToken(string(p)[:i])) {
148 return
149 }
150 p = p[i:]
151 }
152 }
153 }
154
155 func unescapePointerToken(token string) string {
156 if strings.Contains(token, "~") {
157
158 token = strings.ReplaceAll(token, "~1", "/")
159 token = strings.ReplaceAll(token, "~0", "~")
160 }
161 return token
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 func (s state) appendStackPointer(b []byte, where int) []byte {
182 var objectDepth int
183 for i := 1; i < s.Tokens.Depth(); i++ {
184 e := s.Tokens.index(i)
185 arrayDelta := -1
186 if isLast := i == s.Tokens.Depth()-1; isLast {
187 switch {
188 case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
189 return b
190 case where > 0 && e.isArray():
191 arrayDelta = 0
192 }
193 }
194 switch {
195 case e.isObject():
196 b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
197 objectDepth++
198 case e.isArray():
199 b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
200 }
201 }
202 return b
203 }
204
205 func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
206 for _, r := range string(name) {
207
208 switch r {
209 case '~':
210 b = append(b, "~0"...)
211 case '/':
212 b = append(b, "~1"...)
213 default:
214 b = utf8.AppendRune(b, r)
215 }
216 }
217 return b
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232 type stateMachine struct {
233 Stack []stateEntry
234 Last stateEntry
235 }
236
237
238
239 func (m *stateMachine) reset() {
240 m.Stack = m.Stack[:0]
241 if cap(m.Stack) > 1<<10 {
242 m.Stack = nil
243 }
244 m.Last = stateTypeArray
245 }
246
247
248
249 func (m stateMachine) Depth() int {
250 return len(m.Stack) + 1
251 }
252
253
254
255 func (m *stateMachine) index(i int) *stateEntry {
256 if i == len(m.Stack) {
257 return &m.Last
258 }
259 return &m.Stack[i]
260 }
261
262
263
264 func (m stateMachine) DepthLength() (int, int64) {
265 return m.Depth(), m.Last.Length()
266 }
267
268
269
270 func (m *stateMachine) appendLiteral() error {
271 switch {
272 case m.Last.NeedObjectName():
273 return ErrNonStringName
274 case !m.Last.isValidNamespace():
275 return errInvalidNamespace
276 default:
277 m.Last.Increment()
278 return nil
279 }
280 }
281
282
283
284 func (m *stateMachine) appendString() error {
285 switch {
286 case !m.Last.isValidNamespace():
287 return errInvalidNamespace
288 default:
289 m.Last.Increment()
290 return nil
291 }
292 }
293
294
295
296 func (m *stateMachine) appendNumber() error {
297 return m.appendLiteral()
298 }
299
300
301
302 func (m *stateMachine) pushObject() error {
303 switch {
304 case m.Last.NeedObjectName():
305 return ErrNonStringName
306 case !m.Last.isValidNamespace():
307 return errInvalidNamespace
308 case len(m.Stack) == maxNestingDepth:
309 return errMaxDepth
310 default:
311 m.Last.Increment()
312 m.Stack = append(m.Stack, m.Last)
313 m.Last = stateTypeObject
314 return nil
315 }
316 }
317
318
319
320 func (m *stateMachine) popObject() error {
321 switch {
322 case !m.Last.isObject():
323 return errMismatchDelim
324 case m.Last.needObjectValue():
325 return errMissingValue
326 case !m.Last.isValidNamespace():
327 return errInvalidNamespace
328 default:
329 m.Last = m.Stack[len(m.Stack)-1]
330 m.Stack = m.Stack[:len(m.Stack)-1]
331 return nil
332 }
333 }
334
335
336
337 func (m *stateMachine) pushArray() error {
338 switch {
339 case m.Last.NeedObjectName():
340 return ErrNonStringName
341 case !m.Last.isValidNamespace():
342 return errInvalidNamespace
343 case len(m.Stack) == maxNestingDepth:
344 return errMaxDepth
345 default:
346 m.Last.Increment()
347 m.Stack = append(m.Stack, m.Last)
348 m.Last = stateTypeArray
349 return nil
350 }
351 }
352
353
354
355 func (m *stateMachine) popArray() error {
356 switch {
357 case !m.Last.isArray() || len(m.Stack) == 0:
358 return errMismatchDelim
359 case !m.Last.isValidNamespace():
360 return errInvalidNamespace
361 default:
362 m.Last = m.Stack[len(m.Stack)-1]
363 m.Stack = m.Stack[:len(m.Stack)-1]
364 return nil
365 }
366 }
367
368
369
370
371
372 func (m stateMachine) NeedIndent(next Kind) (n int) {
373 willEnd := next == '}' || next == ']'
374 switch {
375 case m.Depth() == 1:
376 return 0
377 case m.Last.Length() == 0 && willEnd:
378 return 0
379 case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
380 return m.Depth()
381 case willEnd:
382 return m.Depth() - 1
383 default:
384 return 0
385 }
386 }
387
388
389 func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
390 switch {
391 case m.Last.needImplicitColon():
392 return append(b, ':')
393 case m.Last.needImplicitComma(next) && len(m.Stack) != 0:
394 return append(b, ',')
395 default:
396 return b
397 }
398 }
399
400
401
402
403 func (m stateMachine) needDelim(next Kind) (delim byte) {
404 switch {
405 case m.Last.needImplicitColon():
406 return ':'
407 case m.Last.needImplicitComma(next) && len(m.Stack) != 0:
408 return ','
409 default:
410 return 0
411 }
412 }
413
414
415
416
417
418
419
420
421 func (m *stateMachine) InvalidateDisabledNamespaces() {
422 for i := range m.Depth() {
423 e := m.index(i)
424 if !e.isActiveNamespace() {
425 e.invalidateNamespace()
426 }
427 }
428 }
429
430
431
432
433
434 type stateEntry uint64
435
436 const (
437
438 stateTypeMask stateEntry = 0x8000_0000_0000_0000
439 stateTypeObject stateEntry = 0x8000_0000_0000_0000
440 stateTypeArray stateEntry = 0x0000_0000_0000_0000
441
442
443
444
445 stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
446 stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
447 stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
448
449
450 stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
451 stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
452 stateCountOdd stateEntry = 0x0000_0000_0000_0001
453 stateCountEven stateEntry = 0x0000_0000_0000_0000
454 )
455
456
457
458 func (e stateEntry) Length() int64 {
459 return int64(e & stateCountMask)
460 }
461
462
463 func (e stateEntry) isObject() bool {
464 return e&stateTypeMask == stateTypeObject
465 }
466
467
468 func (e stateEntry) isArray() bool {
469 return e&stateTypeMask == stateTypeArray
470 }
471
472
473
474 func (e stateEntry) NeedObjectName() bool {
475 return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
476 }
477
478
479
480 func (e stateEntry) needImplicitColon() bool {
481 return e.needObjectValue()
482 }
483
484
485
486 func (e stateEntry) needObjectValue() bool {
487 return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
488 }
489
490
491
492
493 func (e stateEntry) needImplicitComma(next Kind) bool {
494 return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
495 }
496
497
498
499
500 func (e *stateEntry) Increment() {
501 (*e)++
502 }
503
504
505
506 func (e *stateEntry) decrement() {
507 (*e)--
508 }
509
510
511
512 func (e *stateEntry) DisableNamespace() {
513 *e |= stateDisableNamespace
514 }
515
516
517
518 func (e stateEntry) isActiveNamespace() bool {
519 return e&(stateDisableNamespace) == 0
520 }
521
522
523 func (e *stateEntry) invalidateNamespace() {
524 *e |= stateInvalidNamespace
525 }
526
527
528 func (e stateEntry) isValidNamespace() bool {
529 return e&(stateInvalidNamespace) == 0
530 }
531
532
533
534
535
536
537
538
539
540
541
542 type objectNameStack struct {
543
544
545
546
547
548
549 offsets []int
550
551 unquotedNames []byte
552 }
553
554 func (ns *objectNameStack) reset() {
555 ns.offsets = ns.offsets[:0]
556 ns.unquotedNames = ns.unquotedNames[:0]
557 if cap(ns.offsets) > 1<<6 {
558 ns.offsets = nil
559 }
560 if cap(ns.unquotedNames) > 1<<10 {
561 ns.unquotedNames = nil
562 }
563 }
564
565 func (ns *objectNameStack) length() int {
566 return len(ns.offsets)
567 }
568
569
570
571
572
573 func (ns *objectNameStack) getUnquoted(i int) []byte {
574 ns.ensureCopiedBuffer()
575 if i == 0 {
576 return ns.unquotedNames[:ns.offsets[0]]
577 } else {
578 return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
579 }
580 }
581
582
583 const invalidOffset = math.MinInt
584
585
586 func (ns *objectNameStack) push() {
587 ns.offsets = append(ns.offsets, invalidOffset)
588 }
589
590
591
592
593 func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
594
595
596
597
598
599 ns.offsets[len(ns.offsets)-1] = ^i
600 }
601
602
603
604
605 func (ns *objectNameStack) replaceLastUnquotedName(s string) {
606 ns.ensureCopiedBuffer()
607 var startOffset int
608 if len(ns.offsets) > 1 {
609 startOffset = ns.offsets[len(ns.offsets)-2]
610 }
611 ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
612 ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
613 }
614
615
616
617 func (ns *objectNameStack) clearLast() {
618 ns.offsets[len(ns.offsets)-1] = invalidOffset
619 }
620
621
622 func (ns *objectNameStack) pop() {
623 ns.offsets = ns.offsets[:len(ns.offsets)-1]
624 }
625
626
627
628
629
630 func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
631
632 var i int
633 for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
634 continue
635 }
636
637
638 for i = i + 1; i < len(ns.offsets); i++ {
639 if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
640 if i == 0 {
641 ns.offsets[i] = 0
642 } else {
643 ns.offsets[i] = ns.offsets[i-1]
644 }
645 break
646 }
647
648
649
650
651 quotedName := b[^ns.offsets[i]:]
652 if quotedName[0] == invalidateBufferByte {
653 quotedName[0] = '"'
654 }
655
656
657 var startOffset int
658 if i > 0 {
659 startOffset = ns.offsets[i-1]
660 }
661 if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
662 ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
663 } else {
664 ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
665 }
666 ns.offsets[i] = len(ns.unquotedNames)
667 }
668 }
669
670 func (ns *objectNameStack) ensureCopiedBuffer() {
671 if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
672 panic("BUG: copyQuotedBuffer not called beforehand")
673 }
674 }
675
676
677
678 type objectNamespaceStack []objectNamespace
679
680
681 func (nss *objectNamespaceStack) reset() {
682 if cap(*nss) > 1<<10 {
683 *nss = nil
684 }
685 *nss = (*nss)[:0]
686 }
687
688
689 func (nss *objectNamespaceStack) push() {
690 if cap(*nss) > len(*nss) {
691 *nss = (*nss)[:len(*nss)+1]
692 nss.Last().reset()
693 } else {
694 *nss = append(*nss, objectNamespace{})
695 }
696 }
697
698
699 func (nss objectNamespaceStack) Last() *objectNamespace {
700 return &nss[len(nss)-1]
701 }
702
703
704 func (nss *objectNamespaceStack) pop() {
705 *nss = (*nss)[:len(*nss)-1]
706 }
707
708
709
710
711
712
713 type objectNamespace struct {
714
715
716
717
718
719 endOffsets []uint
720
721 allUnquotedNames []byte
722
723
724 mapNames map[string]struct{}
725 }
726
727
728 func (ns *objectNamespace) reset() {
729 ns.endOffsets = ns.endOffsets[:0]
730 ns.allUnquotedNames = ns.allUnquotedNames[:0]
731 ns.mapNames = nil
732 if cap(ns.endOffsets) > 1<<6 {
733 ns.endOffsets = nil
734 }
735 if cap(ns.allUnquotedNames) > 1<<10 {
736 ns.allUnquotedNames = nil
737 }
738 }
739
740
741 func (ns *objectNamespace) length() int {
742 return len(ns.endOffsets)
743 }
744
745
746 func (ns *objectNamespace) getUnquoted(i int) []byte {
747 if i == 0 {
748 return ns.allUnquotedNames[:ns.endOffsets[0]]
749 } else {
750 return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
751 }
752 }
753
754
755 func (ns *objectNamespace) lastUnquoted() []byte {
756 return ns.getUnquoted(ns.length() - 1)
757 }
758
759
760
761
762 func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
763 if isVerbatim {
764 name = name[len(`"`) : len(name)-len(`"`)]
765 }
766 return ns.insert(name, !isVerbatim)
767 }
768 func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
769 return ns.insert(name, false)
770 }
771 func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
772 var allNames []byte
773 if quoted {
774 allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
775 } else {
776 allNames = append(ns.allUnquotedNames, name...)
777 }
778 name = allNames[len(ns.allUnquotedNames):]
779
780
781
782 if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
783 ns.mapNames = make(map[string]struct{})
784 var startOffset uint
785 for _, endOffset := range ns.endOffsets {
786 name := ns.allUnquotedNames[startOffset:endOffset]
787 ns.mapNames[string(name)] = struct{}{}
788 startOffset = endOffset
789 }
790 }
791
792 if ns.mapNames == nil {
793
794
795 var startOffset uint
796 for _, endOffset := range ns.endOffsets {
797 if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
798 return false
799 }
800 startOffset = endOffset
801 }
802 } else {
803
804
805 if _, ok := ns.mapNames[string(name)]; ok {
806 return false
807 }
808 ns.mapNames[string(name)] = struct{}{}
809 }
810
811 ns.allUnquotedNames = allNames
812 ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
813 return true
814 }
815
816
817 func (ns *objectNamespace) removeLast() {
818 if ns.mapNames != nil {
819 delete(ns.mapNames, string(ns.lastUnquoted()))
820 }
821 if ns.length()-1 == 0 {
822 ns.endOffsets = ns.endOffsets[:0]
823 ns.allUnquotedNames = ns.allUnquotedNames[:0]
824 } else {
825 ns.endOffsets = ns.endOffsets[:ns.length()-1]
826 ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
827 }
828 }
829
View as plain text