1
2
3
4
5
6
7 package jsontext
8
9 import (
10 "errors"
11 "iter"
12 "math"
13 "strconv"
14 "strings"
15 "unicode/utf8"
16
17 "encoding/json/internal/jsonwire"
18 )
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 var ErrDuplicateName = errors.New("duplicate object member name")
35
36
37
38
39
40 var ErrNonStringName = errors.New("object member name must be a string")
41
42 var (
43 errMissingValue = errors.New("missing value after object name")
44 errMismatchDelim = errors.New("mismatching structural token for object or array")
45 errMaxDepth = errors.New("exceeded max depth")
46
47 errInvalidNamespace = errors.New("object namespace is in an invalid state")
48 )
49
50
51
52 const maxNestingDepth = 10000
53
54 type state struct {
55
56 Tokens stateMachine
57
58
59 Names objectNameStack
60
61
62
63
64
65
66 Namespaces objectNamespaceStack
67 }
68
69
70
71 func (s *state) needObjectValue() bool {
72 return s.Tokens.Last.needObjectValue()
73 }
74
75 func (s *state) reset() {
76 s.Tokens.reset()
77 s.Names.reset()
78 s.Namespaces.reset()
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type Pointer string
95
96
97
98 func (p Pointer) IsValid() bool {
99 for i, r := range p {
100 switch {
101 case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
102 return false
103 case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
104 return false
105 }
106 }
107 return len(p) == 0 || p[0] == '/'
108 }
109
110
111
112 func (p Pointer) Contains(pc Pointer) bool {
113
114 suffix, ok := strings.CutPrefix(string(pc), string(p))
115 return ok && (suffix == "" || suffix[0] == '/')
116 }
117
118
119
120 func (p Pointer) Parent() Pointer {
121 return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
122 }
123
124
125
126 func (p Pointer) LastToken() string {
127 last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
128 return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
129 }
130
131
132 func (p Pointer) AppendToken(tok string) Pointer {
133 return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
134 }
135
136
137
138
139
140
141 func (p Pointer) Tokens() iter.Seq[string] {
142 return func(yield func(string) bool) {
143 for len(p) > 0 {
144 p = Pointer(strings.TrimPrefix(string(p), "/"))
145 i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
146 if !yield(unescapePointerToken(string(p)[:i])) {
147 return
148 }
149 p = p[i:]
150 }
151 }
152 }
153
154 func unescapePointerToken(token string) string {
155 if strings.Contains(token, "~") {
156
157 token = strings.ReplaceAll(token, "~1", "/")
158 token = strings.ReplaceAll(token, "~0", "~")
159 }
160 return token
161 }
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180 func (s state) appendStackPointer(b []byte, where int) []byte {
181 var objectDepth int
182 for i := 1; i < s.Tokens.Depth(); i++ {
183 e := s.Tokens.index(i)
184 arrayDelta := -1
185 if isLast := i == s.Tokens.Depth()-1; isLast {
186 switch {
187 case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
188 return b
189 case where > 0 && e.isArray():
190 arrayDelta = 0
191 }
192 }
193 switch {
194 case e.isObject():
195 b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
196 objectDepth++
197 case e.isArray():
198 b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
199 }
200 }
201 return b
202 }
203
204 func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
205 for _, r := range string(name) {
206
207 switch r {
208 case '~':
209 b = append(b, "~0"...)
210 case '/':
211 b = append(b, "~1"...)
212 default:
213 b = utf8.AppendRune(b, r)
214 }
215 }
216 return b
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230
231 type stateMachine struct {
232 Stack []stateEntry
233 Last stateEntry
234 }
235
236
237
238 func (m *stateMachine) reset() {
239 m.Stack = m.Stack[:0]
240 if cap(m.Stack) > 1<<10 {
241 m.Stack = nil
242 }
243 m.Last = stateTypeArray
244 }
245
246
247
248 func (m stateMachine) Depth() int {
249 return len(m.Stack) + 1
250 }
251
252
253
254 func (m *stateMachine) index(i int) *stateEntry {
255 if i == len(m.Stack) {
256 return &m.Last
257 }
258 return &m.Stack[i]
259 }
260
261
262
263 func (m stateMachine) DepthLength() (int, int64) {
264 return m.Depth(), m.Last.Length()
265 }
266
267
268
269 func (m *stateMachine) appendLiteral() error {
270 switch {
271 case m.Last.NeedObjectName():
272 return ErrNonStringName
273 case !m.Last.isValidNamespace():
274 return errInvalidNamespace
275 default:
276 m.Last.Increment()
277 return nil
278 }
279 }
280
281
282
283 func (m *stateMachine) appendString() error {
284 switch {
285 case !m.Last.isValidNamespace():
286 return errInvalidNamespace
287 default:
288 m.Last.Increment()
289 return nil
290 }
291 }
292
293
294
295 func (m *stateMachine) appendNumber() error {
296 return m.appendLiteral()
297 }
298
299
300
301 func (m *stateMachine) pushObject() error {
302 switch {
303 case m.Last.NeedObjectName():
304 return ErrNonStringName
305 case !m.Last.isValidNamespace():
306 return errInvalidNamespace
307 case len(m.Stack) == maxNestingDepth:
308 return errMaxDepth
309 default:
310 m.Last.Increment()
311 m.Stack = append(m.Stack, m.Last)
312 m.Last = stateTypeObject
313 return nil
314 }
315 }
316
317
318
319 func (m *stateMachine) popObject() error {
320 switch {
321 case !m.Last.isObject():
322 return errMismatchDelim
323 case m.Last.needObjectValue():
324 return errMissingValue
325 case !m.Last.isValidNamespace():
326 return errInvalidNamespace
327 default:
328 m.Last = m.Stack[len(m.Stack)-1]
329 m.Stack = m.Stack[:len(m.Stack)-1]
330 return nil
331 }
332 }
333
334
335
336 func (m *stateMachine) pushArray() error {
337 switch {
338 case m.Last.NeedObjectName():
339 return ErrNonStringName
340 case !m.Last.isValidNamespace():
341 return errInvalidNamespace
342 case len(m.Stack) == maxNestingDepth:
343 return errMaxDepth
344 default:
345 m.Last.Increment()
346 m.Stack = append(m.Stack, m.Last)
347 m.Last = stateTypeArray
348 return nil
349 }
350 }
351
352
353
354 func (m *stateMachine) popArray() error {
355 switch {
356 case !m.Last.isArray() || len(m.Stack) == 0:
357 return errMismatchDelim
358 case !m.Last.isValidNamespace():
359 return errInvalidNamespace
360 default:
361 m.Last = m.Stack[len(m.Stack)-1]
362 m.Stack = m.Stack[:len(m.Stack)-1]
363 return nil
364 }
365 }
366
367
368
369
370
371 func (m stateMachine) NeedIndent(next Kind) (n int) {
372 willEnd := next == '}' || next == ']'
373 switch {
374 case m.Depth() == 1:
375 return 0
376 case m.Last.Length() == 0 && willEnd:
377 return 0
378 case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
379 return m.Depth()
380 case willEnd:
381 return m.Depth() - 1
382 default:
383 return 0
384 }
385 }
386
387
388 func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
389 switch {
390 case m.Last.needImplicitColon():
391 return append(b, ':')
392 case m.Last.needImplicitComma(next) && len(m.Stack) != 0:
393 return append(b, ',')
394 default:
395 return b
396 }
397 }
398
399
400
401
402 func (m stateMachine) needDelim(next Kind) (delim byte) {
403 switch {
404 case m.Last.needImplicitColon():
405 return ':'
406 case m.Last.needImplicitComma(next) && len(m.Stack) != 0:
407 return ','
408 default:
409 return 0
410 }
411 }
412
413
414
415
416
417
418
419
420 func (m *stateMachine) InvalidateDisabledNamespaces() {
421 for i := range m.Depth() {
422 e := m.index(i)
423 if !e.isActiveNamespace() {
424 e.invalidateNamespace()
425 }
426 }
427 }
428
429
430
431
432
433 type stateEntry uint64
434
435 const (
436
437 stateTypeMask stateEntry = 0x8000_0000_0000_0000
438 stateTypeObject stateEntry = 0x8000_0000_0000_0000
439 stateTypeArray stateEntry = 0x0000_0000_0000_0000
440
441
442
443
444 stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
445 stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
446 stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
447
448
449 stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
450 stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
451 stateCountOdd stateEntry = 0x0000_0000_0000_0001
452 stateCountEven stateEntry = 0x0000_0000_0000_0000
453 )
454
455
456
457 func (e stateEntry) Length() int64 {
458 return int64(e & stateCountMask)
459 }
460
461
462 func (e stateEntry) isObject() bool {
463 return e&stateTypeMask == stateTypeObject
464 }
465
466
467 func (e stateEntry) isArray() bool {
468 return e&stateTypeMask == stateTypeArray
469 }
470
471
472
473 func (e stateEntry) NeedObjectName() bool {
474 return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
475 }
476
477
478
479 func (e stateEntry) needImplicitColon() bool {
480 return e.needObjectValue()
481 }
482
483
484
485 func (e stateEntry) needObjectValue() bool {
486 return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
487 }
488
489
490
491
492 func (e stateEntry) needImplicitComma(next Kind) bool {
493 return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
494 }
495
496
497
498
499 func (e *stateEntry) Increment() {
500 (*e)++
501 }
502
503
504
505 func (e *stateEntry) decrement() {
506 (*e)--
507 }
508
509
510
511 func (e *stateEntry) DisableNamespace() {
512 *e |= stateDisableNamespace
513 }
514
515
516
517 func (e stateEntry) isActiveNamespace() bool {
518 return e&(stateDisableNamespace) == 0
519 }
520
521
522 func (e *stateEntry) invalidateNamespace() {
523 *e |= stateInvalidNamespace
524 }
525
526
527 func (e stateEntry) isValidNamespace() bool {
528 return e&(stateInvalidNamespace) == 0
529 }
530
531
532
533
534
535
536
537
538
539
540
541 type objectNameStack struct {
542
543
544
545
546
547
548 offsets []int
549
550 unquotedNames []byte
551 }
552
553 func (ns *objectNameStack) reset() {
554 ns.offsets = ns.offsets[:0]
555 ns.unquotedNames = ns.unquotedNames[:0]
556 if cap(ns.offsets) > 1<<6 {
557 ns.offsets = nil
558 }
559 if cap(ns.unquotedNames) > 1<<10 {
560 ns.unquotedNames = nil
561 }
562 }
563
564 func (ns *objectNameStack) length() int {
565 return len(ns.offsets)
566 }
567
568
569
570
571
572 func (ns *objectNameStack) getUnquoted(i int) []byte {
573 ns.ensureCopiedBuffer()
574 if i == 0 {
575 return ns.unquotedNames[:ns.offsets[0]]
576 } else {
577 return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
578 }
579 }
580
581
582 const invalidOffset = math.MinInt
583
584
585 func (ns *objectNameStack) push() {
586 ns.offsets = append(ns.offsets, invalidOffset)
587 }
588
589
590
591
592 func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
593
594
595
596
597
598 ns.offsets[len(ns.offsets)-1] = ^i
599 }
600
601
602
603
604 func (ns *objectNameStack) replaceLastUnquotedName(s string) {
605 ns.ensureCopiedBuffer()
606 var startOffset int
607 if len(ns.offsets) > 1 {
608 startOffset = ns.offsets[len(ns.offsets)-2]
609 }
610 ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
611 ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
612 }
613
614
615
616 func (ns *objectNameStack) clearLast() {
617 ns.offsets[len(ns.offsets)-1] = invalidOffset
618 }
619
620
621 func (ns *objectNameStack) pop() {
622 ns.offsets = ns.offsets[:len(ns.offsets)-1]
623 }
624
625
626
627
628
629 func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
630
631 var i int
632 for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
633 continue
634 }
635
636
637 for i = i + 1; i < len(ns.offsets); i++ {
638 if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
639 if i == 0 {
640 ns.offsets[i] = 0
641 } else {
642 ns.offsets[i] = ns.offsets[i-1]
643 }
644 break
645 }
646
647
648
649
650 quotedName := b[^ns.offsets[i]:]
651 if quotedName[0] == invalidateBufferByte {
652 quotedName[0] = '"'
653 }
654
655
656 var startOffset int
657 if i > 0 {
658 startOffset = ns.offsets[i-1]
659 }
660 if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
661 ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
662 } else {
663 ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
664 }
665 ns.offsets[i] = len(ns.unquotedNames)
666 }
667 }
668
669 func (ns *objectNameStack) ensureCopiedBuffer() {
670 if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
671 panic("BUG: copyQuotedBuffer not called beforehand")
672 }
673 }
674
675
676
677 type objectNamespaceStack []objectNamespace
678
679
680 func (nss *objectNamespaceStack) reset() {
681 if cap(*nss) > 1<<10 {
682 *nss = nil
683 }
684 *nss = (*nss)[:0]
685 }
686
687
688 func (nss *objectNamespaceStack) push() {
689 if cap(*nss) > len(*nss) {
690 *nss = (*nss)[:len(*nss)+1]
691 nss.Last().reset()
692 } else {
693 *nss = append(*nss, objectNamespace{})
694 }
695 }
696
697
698 func (nss objectNamespaceStack) Last() *objectNamespace {
699 return &nss[len(nss)-1]
700 }
701
702
703 func (nss *objectNamespaceStack) pop() {
704 *nss = (*nss)[:len(*nss)-1]
705 }
706
707
708
709
710
711
712 type objectNamespace struct {
713
714
715
716
717
718 endOffsets []uint
719
720 allUnquotedNames []byte
721
722
723 mapNames map[string]struct{}
724 }
725
726
727 func (ns *objectNamespace) reset() {
728 ns.endOffsets = ns.endOffsets[:0]
729 ns.allUnquotedNames = ns.allUnquotedNames[:0]
730 ns.mapNames = nil
731 if cap(ns.endOffsets) > 1<<6 {
732 ns.endOffsets = nil
733 }
734 if cap(ns.allUnquotedNames) > 1<<10 {
735 ns.allUnquotedNames = nil
736 }
737 }
738
739
740 func (ns *objectNamespace) length() int {
741 return len(ns.endOffsets)
742 }
743
744
745 func (ns *objectNamespace) getUnquoted(i int) []byte {
746 if i == 0 {
747 return ns.allUnquotedNames[:ns.endOffsets[0]]
748 } else {
749 return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
750 }
751 }
752
753
754 func (ns *objectNamespace) lastUnquoted() []byte {
755 return ns.getUnquoted(ns.length() - 1)
756 }
757
758
759
760
761 func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
762 if isVerbatim {
763 name = name[len(`"`) : len(name)-len(`"`)]
764 }
765 return ns.insert(name, !isVerbatim)
766 }
767 func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
768 return ns.insert(name, false)
769 }
770 func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
771 var allNames []byte
772 if quoted {
773 allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
774 } else {
775 allNames = append(ns.allUnquotedNames, name...)
776 }
777 name = allNames[len(ns.allUnquotedNames):]
778
779
780
781 if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
782 ns.mapNames = make(map[string]struct{})
783 var startOffset uint
784 for _, endOffset := range ns.endOffsets {
785 name := ns.allUnquotedNames[startOffset:endOffset]
786 ns.mapNames[string(name)] = struct{}{}
787 startOffset = endOffset
788 }
789 }
790
791 if ns.mapNames == nil {
792
793
794 var startOffset uint
795 for _, endOffset := range ns.endOffsets {
796 if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
797 return false
798 }
799 startOffset = endOffset
800 }
801 } else {
802
803
804 if _, ok := ns.mapNames[string(name)]; ok {
805 return false
806 }
807 ns.mapNames[string(name)] = struct{}{}
808 }
809
810 ns.allUnquotedNames = allNames
811 ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
812 return true
813 }
814
815
816 func (ns *objectNamespace) removeLast() {
817 if ns.mapNames != nil {
818 delete(ns.mapNames, string(ns.lastUnquoted()))
819 }
820 if ns.length()-1 == 0 {
821 ns.endOffsets = ns.endOffsets[:0]
822 ns.allUnquotedNames = ns.allUnquotedNames[:0]
823 } else {
824 ns.endOffsets = ns.endOffsets[:ns.length()-1]
825 ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
826 }
827 }
828
View as plain text