1
2
3
4
5
6
7 package jsonwire
8
9 import (
10 "io"
11 "math"
12 "slices"
13 "strconv"
14 "unicode/utf16"
15 "unicode/utf8"
16 )
17
18 type ValueFlags uint
19
20 const (
21 _ ValueFlags = (1 << iota) / 2
22
23 stringNonVerbatim
24 stringNonCanonical
25
26 )
27
28 func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
29 func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
30 func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
31
32
33 func ConsumeWhitespace(b []byte) (n int) {
34
35 for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
36 n++
37 }
38 return n
39 }
40
41
42
43 func ConsumeNull(b []byte) int {
44
45 const literal = "null"
46 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
47 return len(literal)
48 }
49 return 0
50 }
51
52
53
54 func ConsumeFalse(b []byte) int {
55
56 const literal = "false"
57 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
58 return len(literal)
59 }
60 return 0
61 }
62
63
64
65 func ConsumeTrue(b []byte) int {
66
67 const literal = "true"
68 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
69 return len(literal)
70 }
71 return 0
72 }
73
74
75
76 func ConsumeLiteral(b []byte, lit string) (n int, err error) {
77 for i := 0; i < len(b) && i < len(lit); i++ {
78 if b[i] != lit[i] {
79 return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
80 }
81 }
82 if len(b) < len(lit) {
83 return len(b), io.ErrUnexpectedEOF
84 }
85 return len(lit), nil
86 }
87
88
89
90
91
92
93
94
95
96
97 func ConsumeSimpleString(b []byte) (n int) {
98
99 if len(b) > 0 && b[0] == '"' {
100 n++
101 for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
102 n++
103 }
104 if uint(len(b)) > uint(n) && b[n] == '"' {
105 n++
106 return n
107 }
108 }
109 return 0
110 }
111
112
113
114
115
116
117 func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
118 return ConsumeStringResumable(flags, b, 0, validateUTF8)
119 }
120
121
122
123 func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
124
125 switch {
126 case resumeOffset > 0:
127 n = resumeOffset
128 case uint(len(b)) == 0:
129 return n, io.ErrUnexpectedEOF
130 case b[0] == '"':
131 n++
132 default:
133 return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
134 }
135
136
137 for uint(len(b)) > uint(n) {
138
139 noEscape := func(c byte) bool {
140 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
141 }
142 for uint(len(b)) > uint(n) && noEscape(b[n]) {
143 n++
144 }
145 if uint(len(b)) <= uint(n) {
146 return n, io.ErrUnexpectedEOF
147 }
148
149
150 if b[n] == '"' {
151 n++
152 return n, nil
153 }
154
155 switch r, rn := utf8.DecodeRune(b[n:]); {
156
157
158
159 case rn > 1:
160 n += rn
161
162 case r == '\\':
163 flags.Join(stringNonVerbatim)
164 resumeOffset = n
165 if uint(len(b)) < uint(n+2) {
166 return resumeOffset, io.ErrUnexpectedEOF
167 }
168 switch r := b[n+1]; r {
169 case '/':
170
171
172 flags.Join(stringNonCanonical)
173 n += 2
174 case '"', '\\', 'b', 'f', 'n', 'r', 't':
175 n += 2
176 case 'u':
177 if uint(len(b)) < uint(n+6) {
178 if hasEscapedUTF16Prefix(b[n:], false) {
179 return resumeOffset, io.ErrUnexpectedEOF
180 }
181 flags.Join(stringNonCanonical)
182 return n, NewInvalidEscapeSequenceError(b[n:])
183 }
184 v1, ok := parseHexUint16(b[n+2 : n+6])
185 if !ok {
186 flags.Join(stringNonCanonical)
187 return n, NewInvalidEscapeSequenceError(b[n : n+6])
188 }
189
190
191 switch v1 {
192
193 case '\b', '\f', '\n', '\r', '\t':
194 flags.Join(stringNonCanonical)
195 default:
196
197 if v1 >= ' ' {
198 flags.Join(stringNonCanonical)
199 } else {
200
201 for _, c := range b[n+2 : n+6] {
202 if 'A' <= c && c <= 'F' {
203 flags.Join(stringNonCanonical)
204 }
205 }
206 }
207 }
208 n += 6
209
210 r := rune(v1)
211 if validateUTF8 && utf16.IsSurrogate(r) {
212 if uint(len(b)) < uint(n+6) {
213 if hasEscapedUTF16Prefix(b[n:], true) {
214 return resumeOffset, io.ErrUnexpectedEOF
215 }
216 flags.Join(stringNonCanonical)
217 return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
218 } else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
219 flags.Join(stringNonCanonical)
220 return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
221 } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
222 flags.Join(stringNonCanonical)
223 return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
224 } else {
225 n += 6
226 }
227 }
228 default:
229 flags.Join(stringNonCanonical)
230 return n, NewInvalidEscapeSequenceError(b[n : n+2])
231 }
232
233 case r == utf8.RuneError:
234 if !utf8.FullRune(b[n:]) {
235 return n, io.ErrUnexpectedEOF
236 }
237 flags.Join(stringNonVerbatim | stringNonCanonical)
238 if validateUTF8 {
239 return n, ErrInvalidUTF8
240 }
241 n++
242
243 case r < ' ':
244 flags.Join(stringNonVerbatim | stringNonCanonical)
245 return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
246 default:
247 panic("BUG: unhandled character " + QuoteRune(b[n:]))
248 }
249 }
250 return n, io.ErrUnexpectedEOF
251 }
252
253
254
255
256
257 func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
258 dst = slices.Grow(dst, len(src))
259
260
261 var i, n int
262 switch {
263 case uint(len(src)) == 0:
264 return dst, io.ErrUnexpectedEOF
265 case src[0] == '"':
266 i, n = 1, 1
267 default:
268 return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
269 }
270
271
272 for uint(len(src)) > uint(n) {
273
274 noEscape := func(c byte) bool {
275 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
276 }
277 for uint(len(src)) > uint(n) && noEscape(src[n]) {
278 n++
279 }
280 if uint(len(src)) <= uint(n) {
281 dst = append(dst, src[i:n]...)
282 return dst, io.ErrUnexpectedEOF
283 }
284
285
286 if src[n] == '"' {
287 dst = append(dst, src[i:n]...)
288 n++
289 if n < len(src) {
290 err = NewInvalidCharacterError(src[n:], "after string value")
291 }
292 return dst, err
293 }
294
295 switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
296
297
298
299 case rn > 1:
300 n += rn
301
302 case r == '\\':
303 dst = append(dst, src[i:n]...)
304
305
306 if uint(len(src)) < uint(n+2) {
307 return dst, io.ErrUnexpectedEOF
308 }
309 switch r := src[n+1]; r {
310 case '"', '\\', '/':
311 dst = append(dst, r)
312 n += 2
313 case 'b':
314 dst = append(dst, '\b')
315 n += 2
316 case 'f':
317 dst = append(dst, '\f')
318 n += 2
319 case 'n':
320 dst = append(dst, '\n')
321 n += 2
322 case 'r':
323 dst = append(dst, '\r')
324 n += 2
325 case 't':
326 dst = append(dst, '\t')
327 n += 2
328 case 'u':
329 if uint(len(src)) < uint(n+6) {
330 if hasEscapedUTF16Prefix(src[n:], false) {
331 return dst, io.ErrUnexpectedEOF
332 }
333 return dst, NewInvalidEscapeSequenceError(src[n:])
334 }
335 v1, ok := parseHexUint16(src[n+2 : n+6])
336 if !ok {
337 return dst, NewInvalidEscapeSequenceError(src[n : n+6])
338 }
339 n += 6
340
341
342 r := rune(v1)
343 if utf16.IsSurrogate(r) {
344 r = utf8.RuneError
345 if uint(len(src)) < uint(n+6) {
346 if hasEscapedUTF16Prefix(src[n:], true) {
347 return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
348 }
349 err = NewInvalidEscapeSequenceError(src[n-6:])
350 } else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
351 err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
352 } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
353 err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
354 } else {
355 n += 6
356 }
357 }
358
359 dst = utf8.AppendRune(dst, r)
360 default:
361 return dst, NewInvalidEscapeSequenceError(src[n : n+2])
362 }
363 i = n
364
365 case r == utf8.RuneError:
366 dst = append(dst, src[i:n]...)
367 if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
368 return dst, io.ErrUnexpectedEOF
369 }
370
371
372 dst = append(dst, "\uFFFD"...)
373 n += rn
374 i = n
375 err = ErrInvalidUTF8
376
377 case r < ' ':
378 dst = append(dst, src[i:n]...)
379 return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
380 default:
381 panic("BUG: unhandled character " + QuoteRune(src[n:]))
382 }
383 }
384 dst = append(dst, src[i:n]...)
385 return dst, io.ErrUnexpectedEOF
386 }
387
388
389
390 func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
391 for i := range len(b) {
392 switch c := b[i]; {
393 case i == 0 && c != '\\':
394 return false
395 case i == 1 && c != 'u':
396 return false
397 case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
398 return false
399 case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
400 return false
401 case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
402 return false
403 }
404 }
405 return true
406 }
407
408
409
410
411
412
413 func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
414
415 if isVerbatim {
416 return b[len(`"`) : len(b)-len(`"`)]
417 }
418 b, _ = AppendUnquote(nil, b)
419 return b
420 }
421
422
423
424
425
426 func ConsumeSimpleNumber(b []byte) (n int) {
427
428 if len(b) > 0 {
429 if b[0] == '0' {
430 n++
431 } else if '1' <= b[0] && b[0] <= '9' {
432 n++
433 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
434 n++
435 }
436 } else {
437 return 0
438 }
439 if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
440 return n
441 }
442 }
443 return 0
444 }
445
446 type ConsumeNumberState uint
447
448 const (
449 consumeNumberInit ConsumeNumberState = iota
450 beforeIntegerDigits
451 withinIntegerDigits
452 beforeFractionalDigits
453 withinFractionalDigits
454 beforeExponentDigits
455 withinExponentDigits
456 )
457
458
459
460
461
462
463
464
465 func ConsumeNumber(b []byte) (n int, err error) {
466 n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
467 return n, err
468 }
469
470
471
472 func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
473
474 n = resumeOffset
475 if state > consumeNumberInit {
476 switch state {
477 case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
478
479 for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
480 n++
481 }
482 if uint(len(b)) <= uint(n) {
483 return n, state, nil
484 }
485 state++
486 }
487 switch state {
488 case beforeIntegerDigits:
489 goto beforeInteger
490 case beforeFractionalDigits:
491 goto beforeFractional
492 case beforeExponentDigits:
493 goto beforeExponent
494 default:
495 return n, state, nil
496 }
497 }
498
499
500 beforeInteger:
501 resumeOffset = n
502 if uint(len(b)) > 0 && b[0] == '-' {
503 n++
504 }
505 switch {
506 case uint(len(b)) <= uint(n):
507 return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
508 case b[n] == '0':
509 n++
510 state = beforeFractionalDigits
511 case '1' <= b[n] && b[n] <= '9':
512 n++
513 for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
514 n++
515 }
516 state = withinIntegerDigits
517 default:
518 return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
519 }
520
521
522 beforeFractional:
523 if uint(len(b)) > uint(n) && b[n] == '.' {
524 resumeOffset = n
525 n++
526 switch {
527 case uint(len(b)) <= uint(n):
528 return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
529 case '0' <= b[n] && b[n] <= '9':
530 n++
531 default:
532 return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
533 }
534 for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
535 n++
536 }
537 state = withinFractionalDigits
538 }
539
540
541 beforeExponent:
542 if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
543 resumeOffset = n
544 n++
545 if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
546 n++
547 }
548 switch {
549 case uint(len(b)) <= uint(n):
550 return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
551 case '0' <= b[n] && b[n] <= '9':
552 n++
553 default:
554 return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
555 }
556 for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
557 n++
558 }
559 state = withinExponentDigits
560 }
561
562 return n, state, nil
563 }
564
565
566
567
568 func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
569 if len(b) != 4 {
570 return 0, false
571 }
572 for i := range 4 {
573 c := b[i]
574 switch {
575 case '0' <= c && c <= '9':
576 c = c - '0'
577 case 'a' <= c && c <= 'f':
578 c = 10 + c - 'a'
579 case 'A' <= c && c <= 'F':
580 c = 10 + c - 'A'
581 default:
582 return 0, false
583 }
584 v = v*16 + uint16(c)
585 }
586 return v, true
587 }
588
589
590
591
592
593 func ParseUint(b []byte) (v uint64, ok bool) {
594 const unsafeWidth = 20
595 var n int
596 for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
597 v = 10*v + uint64(b[n]-'0')
598 }
599 switch {
600 case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
601 return 0, false
602 case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
603 return math.MaxUint64, false
604 }
605 return v, true
606 }
607
608
609
610
611
612
613
614 func ParseFloat(b []byte, bits int) (v float64, ok bool) {
615 fv, err := strconv.ParseFloat(string(b), bits)
616 if math.IsInf(fv, 0) {
617 switch {
618 case bits == 32 && math.IsInf(fv, +1):
619 fv = +math.MaxFloat32
620 case bits == 64 && math.IsInf(fv, +1):
621 fv = +math.MaxFloat64
622 case bits == 32 && math.IsInf(fv, -1):
623 fv = -math.MaxFloat32
624 case bits == 64 && math.IsInf(fv, -1):
625 fv = -math.MaxFloat64
626 }
627 }
628 return fv, err == nil
629 }
630
View as plain text