Source file
src/strconv/quote.go
1
2
3
4
5
6
7 package strconv
8
9 import (
10 "unicode/utf8"
11 )
12
13 const (
14 lowerhex = "0123456789abcdef"
15 upperhex = "0123456789ABCDEF"
16 )
17
18
19 func contains(s string, c byte) bool {
20 return index(s, c) != -1
21 }
22
23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
25 }
26
27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
29 }
30
31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
32
33
34 if cap(buf)-len(buf) < len(s) {
35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
36 copy(nBuf, buf)
37 buf = nBuf
38 }
39 buf = append(buf, quote)
40 for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
41 r, width = utf8.DecodeRuneInString(s)
42 if width == 1 && r == utf8.RuneError {
43 buf = append(buf, `\x`...)
44 buf = append(buf, lowerhex[s[0]>>4])
45 buf = append(buf, lowerhex[s[0]&0xF])
46 continue
47 }
48 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
49 }
50 buf = append(buf, quote)
51 return buf
52 }
53
54 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
55 buf = append(buf, quote)
56 if !utf8.ValidRune(r) {
57 r = utf8.RuneError
58 }
59 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
60 buf = append(buf, quote)
61 return buf
62 }
63
64 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
65 if r == rune(quote) || r == '\\' {
66 buf = append(buf, '\\')
67 buf = append(buf, byte(r))
68 return buf
69 }
70 if ASCIIonly {
71 if r < utf8.RuneSelf && IsPrint(r) {
72 buf = append(buf, byte(r))
73 return buf
74 }
75 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
76 return utf8.AppendRune(buf, r)
77 }
78 switch r {
79 case '\a':
80 buf = append(buf, `\a`...)
81 case '\b':
82 buf = append(buf, `\b`...)
83 case '\f':
84 buf = append(buf, `\f`...)
85 case '\n':
86 buf = append(buf, `\n`...)
87 case '\r':
88 buf = append(buf, `\r`...)
89 case '\t':
90 buf = append(buf, `\t`...)
91 case '\v':
92 buf = append(buf, `\v`...)
93 default:
94 switch {
95 case r < ' ' || r == 0x7f:
96 buf = append(buf, `\x`...)
97 buf = append(buf, lowerhex[byte(r)>>4])
98 buf = append(buf, lowerhex[byte(r)&0xF])
99 case !utf8.ValidRune(r):
100 r = 0xFFFD
101 fallthrough
102 case r < 0x10000:
103 buf = append(buf, `\u`...)
104 for s := 12; s >= 0; s -= 4 {
105 buf = append(buf, lowerhex[r>>uint(s)&0xF])
106 }
107 default:
108 buf = append(buf, `\U`...)
109 for s := 28; s >= 0; s -= 4 {
110 buf = append(buf, lowerhex[r>>uint(s)&0xF])
111 }
112 }
113 }
114 return buf
115 }
116
117
118
119
120
121 func Quote(s string) string {
122 return quoteWith(s, '"', false, false)
123 }
124
125
126
127 func AppendQuote(dst []byte, s string) []byte {
128 return appendQuotedWith(dst, s, '"', false, false)
129 }
130
131
132
133
134 func QuoteToASCII(s string) string {
135 return quoteWith(s, '"', true, false)
136 }
137
138
139
140 func AppendQuoteToASCII(dst []byte, s string) []byte {
141 return appendQuotedWith(dst, s, '"', true, false)
142 }
143
144
145
146
147
148 func QuoteToGraphic(s string) string {
149 return quoteWith(s, '"', false, true)
150 }
151
152
153
154 func AppendQuoteToGraphic(dst []byte, s string) []byte {
155 return appendQuotedWith(dst, s, '"', false, true)
156 }
157
158
159
160
161
162
163 func QuoteRune(r rune) string {
164 return quoteRuneWith(r, '\'', false, false)
165 }
166
167
168
169 func AppendQuoteRune(dst []byte, r rune) []byte {
170 return appendQuotedRuneWith(dst, r, '\'', false, false)
171 }
172
173
174
175
176
177
178
179 func QuoteRuneToASCII(r rune) string {
180 return quoteRuneWith(r, '\'', true, false)
181 }
182
183
184
185 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
186 return appendQuotedRuneWith(dst, r, '\'', true, false)
187 }
188
189
190
191
192
193
194
195 func QuoteRuneToGraphic(r rune) string {
196 return quoteRuneWith(r, '\'', false, true)
197 }
198
199
200
201 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
202 return appendQuotedRuneWith(dst, r, '\'', false, true)
203 }
204
205
206
207
208 func CanBackquote(s string) bool {
209 for len(s) > 0 {
210 r, wid := utf8.DecodeRuneInString(s)
211 s = s[wid:]
212 if wid > 1 {
213 if r == '\ufeff' {
214 return false
215 }
216 continue
217 }
218 if r == utf8.RuneError {
219 return false
220 }
221 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
222 return false
223 }
224 }
225 return true
226 }
227
228 func unhex(b byte) (v rune, ok bool) {
229 c := rune(b)
230 switch {
231 case '0' <= c && c <= '9':
232 return c - '0', true
233 case 'a' <= c && c <= 'f':
234 return c - 'a' + 10, true
235 case 'A' <= c && c <= 'F':
236 return c - 'A' + 10, true
237 }
238 return
239 }
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
256
257 if len(s) == 0 {
258 err = ErrSyntax
259 return
260 }
261 switch c := s[0]; {
262 case c == quote && (quote == '\'' || quote == '"'):
263 err = ErrSyntax
264 return
265 case c >= utf8.RuneSelf:
266 r, size := utf8.DecodeRuneInString(s)
267 return r, true, s[size:], nil
268 case c != '\\':
269 return rune(s[0]), false, s[1:], nil
270 }
271
272
273 if len(s) <= 1 {
274 err = ErrSyntax
275 return
276 }
277 c := s[1]
278 s = s[2:]
279
280 switch c {
281 case 'a':
282 value = '\a'
283 case 'b':
284 value = '\b'
285 case 'f':
286 value = '\f'
287 case 'n':
288 value = '\n'
289 case 'r':
290 value = '\r'
291 case 't':
292 value = '\t'
293 case 'v':
294 value = '\v'
295 case 'x', 'u', 'U':
296 n := 0
297 switch c {
298 case 'x':
299 n = 2
300 case 'u':
301 n = 4
302 case 'U':
303 n = 8
304 }
305 var v rune
306 if len(s) < n {
307 err = ErrSyntax
308 return
309 }
310 for j := 0; j < n; j++ {
311 x, ok := unhex(s[j])
312 if !ok {
313 err = ErrSyntax
314 return
315 }
316 v = v<<4 | x
317 }
318 s = s[n:]
319 if c == 'x' {
320
321 value = v
322 break
323 }
324 if !utf8.ValidRune(v) {
325 err = ErrSyntax
326 return
327 }
328 value = v
329 multibyte = true
330 case '0', '1', '2', '3', '4', '5', '6', '7':
331 v := rune(c) - '0'
332 if len(s) < 2 {
333 err = ErrSyntax
334 return
335 }
336 for j := 0; j < 2; j++ {
337 x := rune(s[j]) - '0'
338 if x < 0 || x > 7 {
339 err = ErrSyntax
340 return
341 }
342 v = (v << 3) | x
343 }
344 s = s[2:]
345 if v > 255 {
346 err = ErrSyntax
347 return
348 }
349 value = v
350 case '\\':
351 value = '\\'
352 case '\'', '"':
353 if c != quote {
354 err = ErrSyntax
355 return
356 }
357 value = rune(c)
358 default:
359 err = ErrSyntax
360 return
361 }
362 tail = s
363 return
364 }
365
366
367
368 func QuotedPrefix(s string) (string, error) {
369 out, _, err := unquote(s, false)
370 return out, err
371 }
372
373
374
375
376
377
378
379 func Unquote(s string) (string, error) {
380 out, rem, err := unquote(s, true)
381 if len(rem) > 0 {
382 return "", ErrSyntax
383 }
384 return out, err
385 }
386
387
388
389
390
391 func unquote(in string, unescape bool) (out, rem string, err error) {
392
393 if len(in) < 2 {
394 return "", in, ErrSyntax
395 }
396 quote := in[0]
397 end := index(in[1:], quote)
398 if end < 0 {
399 return "", in, ErrSyntax
400 }
401 end += 2
402
403 switch quote {
404 case '`':
405 switch {
406 case !unescape:
407 out = in[:end]
408 case !contains(in[:end], '\r'):
409 out = in[len("`") : end-len("`")]
410 default:
411
412
413 buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
414 for i := len("`"); i < end-len("`"); i++ {
415 if in[i] != '\r' {
416 buf = append(buf, in[i])
417 }
418 }
419 out = string(buf)
420 }
421
422
423
424
425
426 return out, in[end:], nil
427 case '"', '\'':
428
429 if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
430 var valid bool
431 switch quote {
432 case '"':
433 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
434 case '\'':
435 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
436 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
437 }
438 if valid {
439 out = in[:end]
440 if unescape {
441 out = out[1 : end-1]
442 }
443 return out, in[end:], nil
444 }
445 }
446
447
448 var buf []byte
449 in0 := in
450 in = in[1:]
451 if unescape {
452 buf = make([]byte, 0, 3*end/2)
453 }
454 for len(in) > 0 && in[0] != quote {
455
456
457 r, multibyte, rem, err := UnquoteChar(in, quote)
458 if in[0] == '\n' || err != nil {
459 return "", in0, ErrSyntax
460 }
461 in = rem
462
463
464 if unescape {
465 if r < utf8.RuneSelf || !multibyte {
466 buf = append(buf, byte(r))
467 } else {
468 buf = utf8.AppendRune(buf, r)
469 }
470 }
471
472
473 if quote == '\'' {
474 break
475 }
476 }
477
478
479 if !(len(in) > 0 && in[0] == quote) {
480 return "", in0, ErrSyntax
481 }
482 in = in[1:]
483
484 if unescape {
485 return string(buf), in, nil
486 }
487 return in0[:len(in0)-len(in)], in, nil
488 default:
489 return "", in, ErrSyntax
490 }
491 }
492
493
494
495 func bsearch[S ~[]E, E ~uint16 | ~uint32](s S, v E) (int, bool) {
496 n := len(s)
497 i, j := 0, n
498 for i < j {
499 h := i + (j-i)>>1
500 if s[h] < v {
501 i = h + 1
502 } else {
503 j = h
504 }
505 }
506 return i, i < n && s[i] == v
507 }
508
509
510
511
512
513
514
515
516
517
518 func IsPrint(r rune) bool {
519
520 if r <= 0xFF {
521 if 0x20 <= r && r <= 0x7E {
522
523 return true
524 }
525 if 0xA1 <= r && r <= 0xFF {
526
527 return r != 0xAD
528 }
529 return false
530 }
531
532
533
534
535
536
537
538 if 0 <= r && r < 1<<16 {
539 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
540 i, _ := bsearch(isPrint, rr)
541 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
542 return false
543 }
544 _, found := bsearch(isNotPrint, rr)
545 return !found
546 }
547
548 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
549 i, _ := bsearch(isPrint, rr)
550 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
551 return false
552 }
553 if r >= 0x20000 {
554 return true
555 }
556 r -= 0x10000
557 _, found := bsearch(isNotPrint, uint16(r))
558 return !found
559 }
560
561
562
563
564 func IsGraphic(r rune) bool {
565 if IsPrint(r) {
566 return true
567 }
568 return isInGraphicList(r)
569 }
570
571
572
573
574 func isInGraphicList(r rune) bool {
575
576 if r > 0xFFFF {
577 return false
578 }
579 _, found := bsearch(isGraphic, uint16(r))
580 return found
581 }
582
View as plain text