Source file src/encoding/json/internal/jsonwire/encode.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsonwire
     8  
     9  import (
    10  	"math"
    11  	"slices"
    12  	"strconv"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  
    16  	"encoding/json/internal/jsonflags"
    17  )
    18  
    19  // escapeASCII reports whether the ASCII character needs to be escaped.
    20  // It conservatively assumes EscapeForHTML.
    21  var escapeASCII = [...]uint8{
    22  	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
    23  	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
    24  	0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
    25  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
    26  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    27  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
    28  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    29  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    30  }
    31  
    32  // NeedEscape reports whether src needs escaping of any characters.
    33  // It conservatively assumes EscapeForHTML and EscapeForJS.
    34  // It reports true for inputs with invalid UTF-8.
    35  func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
    36  	var i int
    37  	for uint(len(src)) > uint(i) {
    38  		if c := src[i]; c < utf8.RuneSelf {
    39  			if escapeASCII[c] > 0 {
    40  				return true
    41  			}
    42  			i++
    43  		} else {
    44  			r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
    45  			if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
    46  				return true
    47  			}
    48  			i += rn
    49  		}
    50  	}
    51  	return false
    52  }
    53  
    54  // AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
    55  //
    56  // It takes in flags and respects the following:
    57  //   - EscapeForHTML escapes '<', '>', and '&'.
    58  //   - EscapeForJS escapes '\u2028' and '\u2029'.
    59  //   - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
    60  //
    61  // Regardless of whether AllowInvalidUTF8 is specified,
    62  // invalid bytes are replaced with the Unicode replacement character ('\ufffd').
    63  // If no escape flags are set, then the shortest representable form is used,
    64  // which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
    65  func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
    66  	var i, n int
    67  	var hasInvalidUTF8 bool
    68  	dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
    69  	dst = append(dst, '"')
    70  	for uint(len(src)) > uint(n) {
    71  		if c := src[n]; c < utf8.RuneSelf {
    72  			// Handle single-byte ASCII.
    73  			n++
    74  			if escapeASCII[c] == 0 {
    75  				continue // no escaping possibly needed
    76  			}
    77  			// Handle escaping of single-byte ASCII.
    78  			if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
    79  				dst = append(dst, src[i:n-1]...)
    80  				dst = appendEscapedASCII(dst, c)
    81  				i = n
    82  			}
    83  		} else {
    84  			// Handle multi-byte Unicode.
    85  			r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
    86  			n += rn
    87  			if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
    88  				continue // no escaping possibly needed
    89  			}
    90  			// Handle escaping of multi-byte Unicode.
    91  			switch {
    92  			case isInvalidUTF8(r, rn):
    93  				hasInvalidUTF8 = true
    94  				dst = append(dst, src[i:n-rn]...)
    95  				if flags.Get(jsonflags.EscapeInvalidUTF8) {
    96  					dst = append(dst, `\ufffd`...)
    97  				} else {
    98  					dst = append(dst, "\ufffd"...)
    99  				}
   100  				i = n
   101  			case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
   102  				dst = append(dst, src[i:n-rn]...)
   103  				dst = appendEscapedUnicode(dst, r)
   104  				i = n
   105  			}
   106  		}
   107  	}
   108  	dst = append(dst, src[i:n]...)
   109  	dst = append(dst, '"')
   110  	if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
   111  		return dst, ErrInvalidUTF8
   112  	}
   113  	return dst, nil
   114  }
   115  
   116  func appendEscapedASCII(dst []byte, c byte) []byte {
   117  	switch c {
   118  	case '"', '\\':
   119  		dst = append(dst, '\\', c)
   120  	case '\b':
   121  		dst = append(dst, "\\b"...)
   122  	case '\f':
   123  		dst = append(dst, "\\f"...)
   124  	case '\n':
   125  		dst = append(dst, "\\n"...)
   126  	case '\r':
   127  		dst = append(dst, "\\r"...)
   128  	case '\t':
   129  		dst = append(dst, "\\t"...)
   130  	default:
   131  		dst = appendEscapedUTF16(dst, uint16(c))
   132  	}
   133  	return dst
   134  }
   135  
   136  func appendEscapedUnicode(dst []byte, r rune) []byte {
   137  	if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
   138  		dst = appendEscapedUTF16(dst, uint16(r1))
   139  		dst = appendEscapedUTF16(dst, uint16(r2))
   140  	} else {
   141  		dst = appendEscapedUTF16(dst, uint16(r))
   142  	}
   143  	return dst
   144  }
   145  
   146  func appendEscapedUTF16(dst []byte, x uint16) []byte {
   147  	const hex = "0123456789abcdef"
   148  	return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
   149  }
   150  
   151  // ReformatString consumes a JSON string from src and appends it to dst,
   152  // reformatting it if necessary according to the specified flags.
   153  // It returns the appended output and the number of consumed input bytes.
   154  func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
   155  	// TODO: Should this update ValueFlags as input?
   156  	var valFlags ValueFlags
   157  	n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
   158  	if err != nil {
   159  		return dst, n, err
   160  	}
   161  
   162  	// If the output requires no special escapes, and the input
   163  	// is already in canonical form or should be preserved verbatim,
   164  	// then directly copy the input to the output.
   165  	if !flags.Get(jsonflags.AnyEscape) &&
   166  		(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
   167  		dst = append(dst, src[:n]...) // copy the string verbatim
   168  		return dst, n, nil
   169  	}
   170  
   171  	// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
   172  	// remain escaped, however we still need to respect the
   173  	// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
   174  	if flags.Get(jsonflags.PreserveRawStrings) {
   175  		var i, lastAppendIndex int
   176  		for i < n {
   177  			if c := src[i]; c < utf8.RuneSelf {
   178  				if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
   179  					dst = append(dst, src[lastAppendIndex:i]...)
   180  					dst = appendEscapedASCII(dst, c)
   181  					lastAppendIndex = i + 1
   182  				}
   183  				i++
   184  			} else {
   185  				r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
   186  				if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
   187  					dst = append(dst, src[lastAppendIndex:i]...)
   188  					dst = appendEscapedUnicode(dst, r)
   189  					lastAppendIndex = i + rn
   190  				}
   191  				i += rn
   192  			}
   193  		}
   194  		return append(dst, src[lastAppendIndex:n]...), n, nil
   195  	}
   196  
   197  	// The input contains characters that might need escaping,
   198  	// unnecessary escape sequences, or invalid UTF-8.
   199  	// Perform a round-trip unquote and quote to properly reformat
   200  	// these sequences according the current flags.
   201  	b, _ := AppendUnquote(nil, src[:n])
   202  	dst, _ = AppendQuote(dst, b, flags)
   203  	return dst, n, nil
   204  }
   205  
   206  // AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
   207  // It formats numbers similar to the ES6 number-to-string conversion.
   208  // See https://go.dev/issue/14135.
   209  //
   210  // The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
   211  // RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
   212  // which is formatted as -0 instead of just 0.
   213  //
   214  // For 32-bit floating-point numbers,
   215  // the output is a 32-bit equivalent of the algorithm.
   216  // Note that ECMA-262 specifies no algorithm for 32-bit numbers.
   217  func AppendFloat(dst []byte, src float64, bits int) []byte {
   218  	if bits == 32 {
   219  		src = float64(float32(src))
   220  	}
   221  
   222  	abs := math.Abs(src)
   223  	fmt := byte('f')
   224  	if abs != 0 {
   225  		if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
   226  			bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
   227  			fmt = 'e'
   228  		}
   229  	}
   230  	dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
   231  	if fmt == 'e' {
   232  		// Clean up e-09 to e-9.
   233  		n := len(dst)
   234  		if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
   235  			dst[n-2] = dst[n-1]
   236  			dst = dst[:n-1]
   237  		}
   238  	}
   239  	return dst
   240  }
   241  
   242  // ReformatNumber consumes a JSON string from src and appends it to dst,
   243  // canonicalizing it if specified.
   244  // It returns the appended output and the number of consumed input bytes.
   245  func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
   246  	n, err := ConsumeNumber(src)
   247  	if err != nil {
   248  		return dst, n, err
   249  	}
   250  	if !flags.Get(jsonflags.CanonicalizeNumbers) {
   251  		dst = append(dst, src[:n]...) // copy the number verbatim
   252  		return dst, n, nil
   253  	}
   254  
   255  	// Identify the kind of number.
   256  	var isFloat bool
   257  	for _, c := range src[:n] {
   258  		if c == '.' || c == 'e' || c == 'E' {
   259  			isFloat = true // has fraction or exponent
   260  			break
   261  		}
   262  	}
   263  
   264  	// Check if need to canonicalize this kind of number.
   265  	switch {
   266  	case string(src[:n]) == "-0":
   267  		break // canonicalize -0 as 0 regardless of kind
   268  	case isFloat:
   269  		if !flags.Get(jsonflags.CanonicalizeRawFloats) {
   270  			dst = append(dst, src[:n]...) // copy the number verbatim
   271  			return dst, n, nil
   272  		}
   273  	default:
   274  		// As an optimization, we can copy integer numbers below 2⁵³ verbatim
   275  		// since the canonical form is always identical.
   276  		const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
   277  		if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
   278  			dst = append(dst, src[:n]...) // copy the number verbatim
   279  			return dst, n, nil
   280  		}
   281  	}
   282  
   283  	// Parse and reformat the number (which uses a canonical format).
   284  	fv, _ := strconv.ParseFloat(string(src[:n]), 64)
   285  	switch {
   286  	case fv == 0:
   287  		fv = 0 // normalize negative zero as just zero
   288  	case math.IsInf(fv, +1):
   289  		fv = +math.MaxFloat64
   290  	case math.IsInf(fv, -1):
   291  		fv = -math.MaxFloat64
   292  	}
   293  	return AppendFloat(dst, fv, 64), n, nil
   294  }
   295  

View as plain text