atoi.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  import (
     8  	"errors"
     9  	"internal/stringslite"
    10  )
    11  
    12  // lower(c) is a lower-case letter if and only if
    13  // c is either that lower-case letter or the equivalent upper-case letter.
    14  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    15  // Note that lower of non-letters can produce other non-letters.
    16  func lower(c byte) byte {
    17  	return c | ('x' - 'X')
    18  }
    19  
    20  // ErrRange indicates that a value is out of range for the target type.
    21  var ErrRange = errors.New("value out of range")
    22  
    23  // ErrSyntax indicates that a value does not have the right syntax for the target type.
    24  var ErrSyntax = errors.New("invalid syntax")
    25  
    26  // A NumError records a failed conversion.
    27  type NumError struct {
    28  	Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
    29  	Num  string // the input
    30  	Err  error  // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
    31  }
    32  
    33  func (e *NumError) Error() string {
    34  	return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
    35  }
    36  
    37  func (e *NumError) Unwrap() error { return e.Err }
    38  
    39  // All ParseXXX functions allow the input string to escape to the error value.
    40  // This hurts strconv.ParseXXX(string(b)) calls where b is []byte since
    41  // the conversion from []byte must allocate a string on the heap.
    42  // If we assume errors are infrequent, then we can avoid escaping the input
    43  // back to the output by copying it first. This allows the compiler to call
    44  // strconv.ParseXXX without a heap allocation for most []byte to string
    45  // conversions, since it can now prove that the string cannot escape Parse.
    46  
    47  func syntaxError(fn, str string) *NumError {
    48  	return &NumError{fn, stringslite.Clone(str), ErrSyntax}
    49  }
    50  
    51  func rangeError(fn, str string) *NumError {
    52  	return &NumError{fn, stringslite.Clone(str), ErrRange}
    53  }
    54  
    55  func baseError(fn, str string, base int) *NumError {
    56  	return &NumError{fn, stringslite.Clone(str), errors.New("invalid base " + Itoa(base))}
    57  }
    58  
    59  func bitSizeError(fn, str string, bitSize int) *NumError {
    60  	return &NumError{fn, stringslite.Clone(str), errors.New("invalid bit size " + Itoa(bitSize))}
    61  }
    62  
    63  const intSize = 32 << (^uint(0) >> 63)
    64  
    65  // IntSize is the size in bits of an int or uint value.
    66  const IntSize = intSize
    67  
    68  const maxUint64 = 1<<64 - 1
    69  
    70  // ParseUint is like [ParseInt] but for unsigned numbers.
    71  //
    72  // A sign prefix is not permitted.
    73  func ParseUint(s string, base int, bitSize int) (uint64, error) {
    74  	const fnParseUint = "ParseUint"
    75  
    76  	if s == "" {
    77  		return 0, syntaxError(fnParseUint, s)
    78  	}
    79  
    80  	base0 := base == 0
    81  
    82  	s0 := s
    83  	switch {
    84  	case 2 <= base && base <= 36:
    85  		// valid base; nothing to do
    86  
    87  	case base == 0:
    88  		// Look for octal, hex prefix.
    89  		base = 10
    90  		if s[0] == '0' {
    91  			switch {
    92  			case len(s) >= 3 && lower(s[1]) == 'b':
    93  				base = 2
    94  				s = s[2:]
    95  			case len(s) >= 3 && lower(s[1]) == 'o':
    96  				base = 8
    97  				s = s[2:]
    98  			case len(s) >= 3 && lower(s[1]) == 'x':
    99  				base = 16
   100  				s = s[2:]
   101  			default:
   102  				base = 8
   103  				s = s[1:]
   104  			}
   105  		}
   106  
   107  	default:
   108  		return 0, baseError(fnParseUint, s0, base)
   109  	}
   110  
   111  	if bitSize == 0 {
   112  		bitSize = IntSize
   113  	} else if bitSize < 0 || bitSize > 64 {
   114  		return 0, bitSizeError(fnParseUint, s0, bitSize)
   115  	}
   116  
   117  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
   118  	// Use compile-time constants for common cases.
   119  	var cutoff uint64
   120  	switch base {
   121  	case 10:
   122  		cutoff = maxUint64/10 + 1
   123  	case 16:
   124  		cutoff = maxUint64/16 + 1
   125  	default:
   126  		cutoff = maxUint64/uint64(base) + 1
   127  	}
   128  
   129  	maxVal := uint64(1)<<uint(bitSize) - 1
   130  
   131  	underscores := false
   132  	var n uint64
   133  	for _, c := range []byte(s) {
   134  		var d byte
   135  		switch {
   136  		case c == '_' && base0:
   137  			underscores = true
   138  			continue
   139  		case '0' <= c && c <= '9':
   140  			d = c - '0'
   141  		case 'a' <= lower(c) && lower(c) <= 'z':
   142  			d = lower(c) - 'a' + 10
   143  		default:
   144  			return 0, syntaxError(fnParseUint, s0)
   145  		}
   146  
   147  		if d >= byte(base) {
   148  			return 0, syntaxError(fnParseUint, s0)
   149  		}
   150  
   151  		if n >= cutoff {
   152  			// n*base overflows
   153  			return maxVal, rangeError(fnParseUint, s0)
   154  		}
   155  		n *= uint64(base)
   156  
   157  		n1 := n + uint64(d)
   158  		if n1 < n || n1 > maxVal {
   159  			// n+d overflows
   160  			return maxVal, rangeError(fnParseUint, s0)
   161  		}
   162  		n = n1
   163  	}
   164  
   165  	if underscores && !underscoreOK(s0) {
   166  		return 0, syntaxError(fnParseUint, s0)
   167  	}
   168  
   169  	return n, nil
   170  }
   171  
   172  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   173  // bit size (0 to 64) and returns the corresponding value i.
   174  //
   175  // The string may begin with a leading sign: "+" or "-".
   176  //
   177  // If the base argument is 0, the true base is implied by the string's
   178  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
   179  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
   180  // underscore characters are permitted as defined by the Go syntax for
   181  // [integer literals].
   182  //
   183  // The bitSize argument specifies the integer type
   184  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   185  // correspond to int, int8, int16, int32, and int64.
   186  // If bitSize is below 0 or above 64, an error is returned.
   187  //
   188  // The errors that ParseInt returns have concrete type [*NumError]
   189  // and include err.Num = s. If s is empty or contains invalid
   190  // digits, err.Err = [ErrSyntax] and the returned value is 0;
   191  // if the value corresponding to s cannot be represented by a
   192  // signed integer of the given size, err.Err = [ErrRange] and the
   193  // returned value is the maximum magnitude integer of the
   194  // appropriate bitSize and sign.
   195  //
   196  // [integer literals]: https://go.dev/ref/spec#Integer_literals
   197  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   198  	const fnParseInt = "ParseInt"
   199  
   200  	if s == "" {
   201  		return 0, syntaxError(fnParseInt, s)
   202  	}
   203  
   204  	// Pick off leading sign.
   205  	s0 := s
   206  	neg := false
   207  	if s[0] == '+' {
   208  		s = s[1:]
   209  	} else if s[0] == '-' {
   210  		neg = true
   211  		s = s[1:]
   212  	}
   213  
   214  	// Convert unsigned and check range.
   215  	var un uint64
   216  	un, err = ParseUint(s, base, bitSize)
   217  	if err != nil && err.(*NumError).Err != ErrRange {
   218  		err.(*NumError).Func = fnParseInt
   219  		err.(*NumError).Num = stringslite.Clone(s0)
   220  		return 0, err
   221  	}
   222  
   223  	if bitSize == 0 {
   224  		bitSize = IntSize
   225  	}
   226  
   227  	cutoff := uint64(1 << uint(bitSize-1))
   228  	if !neg && un >= cutoff {
   229  		return int64(cutoff - 1), rangeError(fnParseInt, s0)
   230  	}
   231  	if neg && un > cutoff {
   232  		return -int64(cutoff), rangeError(fnParseInt, s0)
   233  	}
   234  	n := int64(un)
   235  	if neg {
   236  		n = -n
   237  	}
   238  	return n, nil
   239  }
   240  
   241  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   242  func Atoi(s string) (int, error) {
   243  	const fnAtoi = "Atoi"
   244  
   245  	sLen := len(s)
   246  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   247  		intSize == 64 && (0 < sLen && sLen < 19) {
   248  		// Fast path for small integers that fit int type.
   249  		s0 := s
   250  		if s[0] == '-' || s[0] == '+' {
   251  			s = s[1:]
   252  			if len(s) < 1 {
   253  				return 0, syntaxError(fnAtoi, s0)
   254  			}
   255  		}
   256  
   257  		n := 0
   258  		for _, ch := range []byte(s) {
   259  			ch -= '0'
   260  			if ch > 9 {
   261  				return 0, syntaxError(fnAtoi, s0)
   262  			}
   263  			n = n*10 + int(ch)
   264  		}
   265  		if s0[0] == '-' {
   266  			n = -n
   267  		}
   268  		return n, nil
   269  	}
   270  
   271  	// Slow path for invalid, big, or underscored integers.
   272  	i64, err := ParseInt(s, 10, 0)
   273  	if nerr, ok := err.(*NumError); ok {
   274  		nerr.Func = fnAtoi
   275  	}
   276  	return int(i64), err
   277  }
   278  
   279  // underscoreOK reports whether the underscores in s are allowed.
   280  // Checking them in this one function lets all the parsers skip over them simply.
   281  // Underscore must appear only between digits or between a base prefix and a digit.
   282  func underscoreOK(s string) bool {
   283  	// saw tracks the last character (class) we saw:
   284  	// ^ for beginning of number,
   285  	// 0 for a digit or base prefix,
   286  	// _ for an underscore,
   287  	// ! for none of the above.
   288  	saw := '^'
   289  	i := 0
   290  
   291  	// Optional sign.
   292  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   293  		s = s[1:]
   294  	}
   295  
   296  	// Optional base prefix.
   297  	hex := false
   298  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   299  		i = 2
   300  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   301  		hex = lower(s[1]) == 'x'
   302  	}
   303  
   304  	// Number proper.
   305  	for ; i < len(s); i++ {
   306  		// Digits are always okay.
   307  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   308  			saw = '0'
   309  			continue
   310  		}
   311  		// Underscore must follow digit.
   312  		if s[i] == '_' {
   313  			if saw != '0' {
   314  				return false
   315  			}
   316  			saw = '_'
   317  			continue
   318  		}
   319  		// Underscore must also be followed by digit.
   320  		if saw == '_' {
   321  			return false
   322  		}
   323  		// Saw non-digit, non-underscore.
   324  		saw = '!'
   325  	}
   326  	return saw != '_'
   327  }
   328
View as plain text