Source file src/encoding/json/jsontext/token.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"math"
    13  	"strconv"
    14  
    15  	"encoding/json/internal/jsonflags"
    16  	"encoding/json/internal/jsonwire"
    17  )
    18  
    19  // NOTE: Token is analogous to v1 json.Token.
    20  
    21  const (
    22  	maxInt64  = math.MaxInt64
    23  	minInt64  = math.MinInt64
    24  	maxUint64 = math.MaxUint64
    25  	minUint64 = 0 // for consistency and readability purposes
    26  
    27  	invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
    28  )
    29  
    30  var errInvalidToken = errors.New("invalid jsontext.Token")
    31  
    32  // Token represents a lexical JSON token, which may be one of the following:
    33  //   - a JSON literal (i.e., null, true, or false)
    34  //   - a JSON string (e.g., "hello, world!")
    35  //   - a JSON number (e.g., 123.456)
    36  //   - a start or end delimiter for a JSON object (i.e., { or } )
    37  //   - a start or end delimiter for a JSON array (i.e., [ or ] )
    38  //
    39  // A Token cannot represent entire array or object values, while a [Value] can.
    40  // There is no Token to represent commas and colons since
    41  // these structural tokens can be inferred from the surrounding context.
    42  type Token struct {
    43  	nonComparable
    44  
    45  	// Tokens can exist in either a "raw" or an "exact" form.
    46  	// Tokens produced by the Decoder are in the "raw" form.
    47  	// Tokens returned by constructors are usually in the "exact" form.
    48  	// The Encoder accepts Tokens in either the "raw" or "exact" form.
    49  	//
    50  	// The following chart shows the possible values for each Token type:
    51  	//	╔═════════════════╦════════════╤════════════╤════════════╗
    52  	//	║ Token type      ║ raw field  │ str field  │ num field  ║
    53  	//	╠═════════════════╬════════════╪════════════╪════════════╣
    54  	//	║ null   (raw)    ║ "null"     │ ""         │ 0          ║
    55  	//	║ false  (raw)    ║ "false"    │ ""         │ 0          ║
    56  	//	║ true   (raw)    ║ "true"     │ ""         │ 0          ║
    57  	//	║ string (raw)    ║ non-empty  │ ""         │ offset     ║
    58  	//	║ string (string) ║ nil        │ non-empty  │ 0          ║
    59  	//	║ number (raw)    ║ non-empty  │ ""         │ offset     ║
    60  	//	║ number (float)  ║ nil        │ "f"        │ non-zero   ║
    61  	//	║ number (int64)  ║ nil        │ "i"        │ non-zero   ║
    62  	//	║ number (uint64) ║ nil        │ "u"        │ non-zero   ║
    63  	//	║ object (delim)  ║ "{" or "}" │ ""         │ 0          ║
    64  	//	║ array  (delim)  ║ "[" or "]" │ ""         │ 0          ║
    65  	//	╚═════════════════╩════════════╧════════════╧════════════╝
    66  	//
    67  	// Notes:
    68  	//   - For tokens stored in "raw" form, the num field contains the
    69  	//     absolute offset determined by raw.previousOffsetStart().
    70  	//     The buffer itself is stored in raw.previousBuffer().
    71  	//   - JSON literals and structural characters are always in the "raw" form.
    72  	//   - JSON strings and numbers can be in either "raw" or "exact" forms.
    73  	//   - The exact zero value of JSON strings and numbers in the "exact" forms
    74  	//     have ambiguous representation. Thus, they are always represented
    75  	//     in the "raw" form.
    76  
    77  	// raw contains a reference to the raw decode buffer.
    78  	// If non-nil, then its value takes precedence over str and num.
    79  	// It is only valid if num == raw.previousOffsetStart().
    80  	raw *decodeBuffer
    81  
    82  	// str is the unescaped JSON string if num is zero.
    83  	// Otherwise, it is "f", "i", or "u" if num should be interpreted
    84  	// as a float64, int64, or uint64, respectively.
    85  	str string
    86  
    87  	// num is a float64, int64, or uint64 stored as a uint64 value.
    88  	// It is non-zero for any JSON number in the "exact" form.
    89  	num uint64
    90  }
    91  
    92  // TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
    93  
    94  var (
    95  	Null  Token = rawToken("null")
    96  	False Token = rawToken("false")
    97  	True  Token = rawToken("true")
    98  
    99  	BeginObject Token = rawToken("{")
   100  	EndObject   Token = rawToken("}")
   101  	BeginArray  Token = rawToken("[")
   102  	EndArray    Token = rawToken("]")
   103  
   104  	zeroString Token = rawToken(`""`)
   105  	zeroNumber Token = rawToken(`0`)
   106  
   107  	nanString  Token = String("NaN")
   108  	pinfString Token = String("Infinity")
   109  	ninfString Token = String("-Infinity")
   110  )
   111  
   112  func rawToken(s string) Token {
   113  	return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
   114  }
   115  
   116  // Bool constructs a Token representing a JSON boolean.
   117  func Bool(b bool) Token {
   118  	if b {
   119  		return True
   120  	}
   121  	return False
   122  }
   123  
   124  // String constructs a Token representing a JSON string.
   125  // The provided string should contain valid UTF-8, otherwise invalid characters
   126  // may be mangled as the Unicode replacement character.
   127  func String(s string) Token {
   128  	if len(s) == 0 {
   129  		return zeroString
   130  	}
   131  	return Token{str: s}
   132  }
   133  
   134  // Float constructs a Token representing a JSON number.
   135  // The values NaN, +Inf, and -Inf will be represented
   136  // as a JSON string with the values "NaN", "Infinity", and "-Infinity".
   137  func Float(n float64) Token {
   138  	switch {
   139  	case math.Float64bits(n) == 0:
   140  		return zeroNumber
   141  	case math.IsNaN(n):
   142  		return nanString
   143  	case math.IsInf(n, +1):
   144  		return pinfString
   145  	case math.IsInf(n, -1):
   146  		return ninfString
   147  	}
   148  	return Token{str: "f", num: math.Float64bits(n)}
   149  }
   150  
   151  // Int constructs a Token representing a JSON number from an int64.
   152  func Int(n int64) Token {
   153  	if n == 0 {
   154  		return zeroNumber
   155  	}
   156  	return Token{str: "i", num: uint64(n)}
   157  }
   158  
   159  // Uint constructs a Token representing a JSON number from a uint64.
   160  func Uint(n uint64) Token {
   161  	if n == 0 {
   162  		return zeroNumber
   163  	}
   164  	return Token{str: "u", num: uint64(n)}
   165  }
   166  
   167  // Clone makes a copy of the Token such that its value remains valid
   168  // even after a subsequent [Decoder.Read] call.
   169  func (t Token) Clone() Token {
   170  	// TODO: Allow caller to avoid any allocations?
   171  	if raw := t.raw; raw != nil {
   172  		// Avoid copying globals.
   173  		if t.raw.prevStart == 0 {
   174  			switch t.raw {
   175  			case Null.raw:
   176  				return Null
   177  			case False.raw:
   178  				return False
   179  			case True.raw:
   180  				return True
   181  			case BeginObject.raw:
   182  				return BeginObject
   183  			case EndObject.raw:
   184  				return EndObject
   185  			case BeginArray.raw:
   186  				return BeginArray
   187  			case EndArray.raw:
   188  				return EndArray
   189  			}
   190  		}
   191  
   192  		if uint64(raw.previousOffsetStart()) != t.num {
   193  			panic(invalidTokenPanic)
   194  		}
   195  		buf := bytes.Clone(raw.previousBuffer())
   196  		return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
   197  	}
   198  	return t
   199  }
   200  
   201  // Bool returns the value for a JSON boolean.
   202  // It panics if the token kind is not a JSON boolean.
   203  func (t Token) Bool() bool {
   204  	switch t.raw {
   205  	case True.raw:
   206  		return true
   207  	case False.raw:
   208  		return false
   209  	default:
   210  		panic("invalid JSON token kind: " + t.Kind().String())
   211  	}
   212  }
   213  
   214  // appendString appends a JSON string to dst and returns it.
   215  // It panics if t is not a JSON string.
   216  func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
   217  	if raw := t.raw; raw != nil {
   218  		// Handle raw string value.
   219  		buf := raw.previousBuffer()
   220  		if Kind(buf[0]) == '"' {
   221  			if jsonwire.ConsumeSimpleString(buf) == len(buf) {
   222  				return append(dst, buf...), nil
   223  			}
   224  			dst, _, err := jsonwire.ReformatString(dst, buf, flags)
   225  			return dst, err
   226  		}
   227  	} else if len(t.str) != 0 && t.num == 0 {
   228  		// Handle exact string value.
   229  		return jsonwire.AppendQuote(dst, t.str, flags)
   230  	}
   231  
   232  	panic("invalid JSON token kind: " + t.Kind().String())
   233  }
   234  
   235  // String returns the unescaped string value for a JSON string.
   236  // For other JSON kinds, this returns the raw JSON representation.
   237  func (t Token) String() string {
   238  	// This is inlinable to take advantage of "function outlining".
   239  	// This avoids an allocation for the string(b) conversion
   240  	// if the caller does not use the string in an escaping manner.
   241  	// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
   242  	s, b := t.string()
   243  	if len(b) > 0 {
   244  		return string(b)
   245  	}
   246  	return s
   247  }
   248  func (t Token) string() (string, []byte) {
   249  	if raw := t.raw; raw != nil {
   250  		if uint64(raw.previousOffsetStart()) != t.num {
   251  			panic(invalidTokenPanic)
   252  		}
   253  		buf := raw.previousBuffer()
   254  		if buf[0] == '"' {
   255  			// TODO: Preserve ValueFlags in Token?
   256  			isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
   257  			return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
   258  		}
   259  		// Handle tokens that are not JSON strings for fmt.Stringer.
   260  		return "", buf
   261  	}
   262  	if len(t.str) != 0 && t.num == 0 {
   263  		return t.str, nil
   264  	}
   265  	// Handle tokens that are not JSON strings for fmt.Stringer.
   266  	if t.num > 0 {
   267  		switch t.str[0] {
   268  		case 'f':
   269  			return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
   270  		case 'i':
   271  			return strconv.FormatInt(int64(t.num), 10), nil
   272  		case 'u':
   273  			return strconv.FormatUint(uint64(t.num), 10), nil
   274  		}
   275  	}
   276  	return "<invalid jsontext.Token>", nil
   277  }
   278  
   279  // appendNumber appends a JSON number to dst and returns it.
   280  // It panics if t is not a JSON number.
   281  func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
   282  	if raw := t.raw; raw != nil {
   283  		// Handle raw number value.
   284  		buf := raw.previousBuffer()
   285  		if Kind(buf[0]).normalize() == '0' {
   286  			dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
   287  			return dst, err
   288  		}
   289  	} else if t.num != 0 {
   290  		// Handle exact number value.
   291  		switch t.str[0] {
   292  		case 'f':
   293  			return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
   294  		case 'i':
   295  			return strconv.AppendInt(dst, int64(t.num), 10), nil
   296  		case 'u':
   297  			return strconv.AppendUint(dst, uint64(t.num), 10), nil
   298  		}
   299  	}
   300  
   301  	panic("invalid JSON token kind: " + t.Kind().String())
   302  }
   303  
   304  // Float returns the floating-point value for a JSON number.
   305  // It returns a NaN, +Inf, or -Inf value for any JSON string
   306  // with the values "NaN", "Infinity", or "-Infinity".
   307  // It panics for all other cases.
   308  func (t Token) Float() float64 {
   309  	if raw := t.raw; raw != nil {
   310  		// Handle raw number value.
   311  		if uint64(raw.previousOffsetStart()) != t.num {
   312  			panic(invalidTokenPanic)
   313  		}
   314  		buf := raw.previousBuffer()
   315  		if Kind(buf[0]).normalize() == '0' {
   316  			fv, _ := jsonwire.ParseFloat(buf, 64)
   317  			return fv
   318  		}
   319  	} else if t.num != 0 {
   320  		// Handle exact number value.
   321  		switch t.str[0] {
   322  		case 'f':
   323  			return math.Float64frombits(t.num)
   324  		case 'i':
   325  			return float64(int64(t.num))
   326  		case 'u':
   327  			return float64(uint64(t.num))
   328  		}
   329  	}
   330  
   331  	// Handle string values with "NaN", "Infinity", or "-Infinity".
   332  	if t.Kind() == '"' {
   333  		switch t.String() {
   334  		case "NaN":
   335  			return math.NaN()
   336  		case "Infinity":
   337  			return math.Inf(+1)
   338  		case "-Infinity":
   339  			return math.Inf(-1)
   340  		}
   341  	}
   342  
   343  	panic("invalid JSON token kind: " + t.Kind().String())
   344  }
   345  
   346  // Int returns the signed integer value for a JSON number.
   347  // The fractional component of any number is ignored (truncation toward zero).
   348  // Any number beyond the representation of an int64 will be saturated
   349  // to the closest representable value.
   350  // It panics if the token kind is not a JSON number.
   351  func (t Token) Int() int64 {
   352  	if raw := t.raw; raw != nil {
   353  		// Handle raw integer value.
   354  		if uint64(raw.previousOffsetStart()) != t.num {
   355  			panic(invalidTokenPanic)
   356  		}
   357  		neg := false
   358  		buf := raw.previousBuffer()
   359  		if len(buf) > 0 && buf[0] == '-' {
   360  			neg, buf = true, buf[1:]
   361  		}
   362  		if numAbs, ok := jsonwire.ParseUint(buf); ok {
   363  			if neg {
   364  				if numAbs > -minInt64 {
   365  					return minInt64
   366  				}
   367  				return -1 * int64(numAbs)
   368  			} else {
   369  				if numAbs > +maxInt64 {
   370  					return maxInt64
   371  				}
   372  				return +1 * int64(numAbs)
   373  			}
   374  		}
   375  	} else if t.num != 0 {
   376  		// Handle exact integer value.
   377  		switch t.str[0] {
   378  		case 'i':
   379  			return int64(t.num)
   380  		case 'u':
   381  			if t.num > maxInt64 {
   382  				return maxInt64
   383  			}
   384  			return int64(t.num)
   385  		}
   386  	}
   387  
   388  	// Handle JSON number that is a floating-point value.
   389  	if t.Kind() == '0' {
   390  		switch fv := t.Float(); {
   391  		case fv >= maxInt64:
   392  			return maxInt64
   393  		case fv <= minInt64:
   394  			return minInt64
   395  		default:
   396  			return int64(fv) // truncation toward zero
   397  		}
   398  	}
   399  
   400  	panic("invalid JSON token kind: " + t.Kind().String())
   401  }
   402  
   403  // Uint returns the unsigned integer value for a JSON number.
   404  // The fractional component of any number is ignored (truncation toward zero).
   405  // Any number beyond the representation of an uint64 will be saturated
   406  // to the closest representable value.
   407  // It panics if the token kind is not a JSON number.
   408  func (t Token) Uint() uint64 {
   409  	// NOTE: This accessor returns 0 for any negative JSON number,
   410  	// which might be surprising, but is at least consistent with the behavior
   411  	// of saturating out-of-bounds numbers to the closest representable number.
   412  
   413  	if raw := t.raw; raw != nil {
   414  		// Handle raw integer value.
   415  		if uint64(raw.previousOffsetStart()) != t.num {
   416  			panic(invalidTokenPanic)
   417  		}
   418  		neg := false
   419  		buf := raw.previousBuffer()
   420  		if len(buf) > 0 && buf[0] == '-' {
   421  			neg, buf = true, buf[1:]
   422  		}
   423  		if num, ok := jsonwire.ParseUint(buf); ok {
   424  			if neg {
   425  				return minUint64
   426  			}
   427  			return num
   428  		}
   429  	} else if t.num != 0 {
   430  		// Handle exact integer value.
   431  		switch t.str[0] {
   432  		case 'u':
   433  			return t.num
   434  		case 'i':
   435  			if int64(t.num) < minUint64 {
   436  				return minUint64
   437  			}
   438  			return uint64(int64(t.num))
   439  		}
   440  	}
   441  
   442  	// Handle JSON number that is a floating-point value.
   443  	if t.Kind() == '0' {
   444  		switch fv := t.Float(); {
   445  		case fv >= maxUint64:
   446  			return maxUint64
   447  		case fv <= minUint64:
   448  			return minUint64
   449  		default:
   450  			return uint64(fv) // truncation toward zero
   451  		}
   452  	}
   453  
   454  	panic("invalid JSON token kind: " + t.Kind().String())
   455  }
   456  
   457  // Kind returns the token kind.
   458  func (t Token) Kind() Kind {
   459  	switch {
   460  	case t.raw != nil:
   461  		raw := t.raw
   462  		if uint64(raw.previousOffsetStart()) != t.num {
   463  			panic(invalidTokenPanic)
   464  		}
   465  		return Kind(t.raw.buf[raw.prevStart]).normalize()
   466  	case t.num != 0:
   467  		return '0'
   468  	case len(t.str) != 0:
   469  		return '"'
   470  	default:
   471  		return invalidKind
   472  	}
   473  }
   474  
   475  // Kind represents each possible JSON token kind with a single byte,
   476  // which is conveniently the first byte of that kind's grammar
   477  // with the restriction that numbers always be represented with '0':
   478  //
   479  //   - 'n': null
   480  //   - 'f': false
   481  //   - 't': true
   482  //   - '"': string
   483  //   - '0': number
   484  //   - '{': object start
   485  //   - '}': object end
   486  //   - '[': array start
   487  //   - ']': array end
   488  //
   489  // An invalid kind is usually represented using 0,
   490  // but may be non-zero due to invalid JSON data.
   491  type Kind byte
   492  
   493  const invalidKind Kind = 0
   494  
   495  // String prints the kind in a humanly readable fashion.
   496  func (k Kind) String() string {
   497  	switch k {
   498  	case 'n':
   499  		return "null"
   500  	case 'f':
   501  		return "false"
   502  	case 't':
   503  		return "true"
   504  	case '"':
   505  		return "string"
   506  	case '0':
   507  		return "number"
   508  	case '{':
   509  		return "{"
   510  	case '}':
   511  		return "}"
   512  	case '[':
   513  		return "["
   514  	case ']':
   515  		return "]"
   516  	default:
   517  		return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
   518  	}
   519  }
   520  
   521  // normalize coalesces all possible starting characters of a number as just '0'.
   522  func (k Kind) normalize() Kind {
   523  	if k == '-' || ('0' <= k && k <= '9') {
   524  		return '0'
   525  	}
   526  	return k
   527  }
   528  

View as plain text