// Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package fuzz import ( "bytes" "fmt" "go/ast" "go/parser" "go/token" "math" "strconv" "strings" "unicode/utf8" ) // encVersion1 will be the first line of a file with version 1 encoding. var encVersion1 = "go test fuzz v1" // marshalCorpusFile encodes an arbitrary number of arguments into the file format for the // corpus. func marshalCorpusFile(vals ...any) []byte { if len(vals) == 0 { panic("must have at least one value to marshal") } b := bytes.NewBuffer([]byte(encVersion1 + "\n")) // TODO(katiehockman): keep uint8 and int32 encoding where applicable, // instead of changing to byte and rune respectively. for _, val := range vals { switch t := val.(type) { case int, int8, int16, int64, uint, uint16, uint32, uint64, bool: fmt.Fprintf(b, "%T(%v)\n", t, t) case float32: if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) { // We encode unusual NaNs as hex values, because that is how users are // likely to encounter them in literature about floating-point encoding. // This allows us to reproduce fuzz failures that depend on the specific // NaN representation (for float32 there are about 2^24 possibilities!), // not just the fact that the value is *a* NaN. // // Note that the specific value of float32(math.NaN()) can vary based on // whether the architecture represents signaling NaNs using a low bit // (as is common) or a high bit (as commonly implemented on MIPS // hardware before around 2012). We believe that the increase in clarity // from identifying "NaN" with math.NaN() is worth the slight ambiguity // from a platform-dependent value. fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t)) } else { // We encode all other values — including the NaN value that is // bitwise-identical to float32(math.Nan()) — using the default // formatting, which is equivalent to strconv.FormatFloat with format // 'g' and can be parsed by strconv.ParseFloat. // // For an ordinary floating-point number this format includes // sufficiently many digits to reconstruct the exact value. For positive // or negative infinity it is the string "+Inf" or "-Inf". For positive // or negative zero it is "0" or "-0". For NaN, it is the string "NaN". fmt.Fprintf(b, "%T(%v)\n", t, t) } case float64: if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) { fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t)) } else { fmt.Fprintf(b, "%T(%v)\n", t, t) } case string: fmt.Fprintf(b, "string(%q)\n", t) case rune: // int32 // Although rune and int32 are represented by the same type, only a subset // of valid int32 values can be expressed as rune literals. Notably, // negative numbers, surrogate halves, and values above unicode.MaxRune // have no quoted representation. // // fmt with "%q" (and the corresponding functions in the strconv package) // would quote out-of-range values to the Unicode replacement character // instead of the original value (see https://go.dev/issue/51526), so // they must be treated as int32 instead. // // We arbitrarily draw the line at UTF-8 validity, which biases toward the // "rune" interpretation. (However, we accept either format as input.) if utf8.ValidRune(t) { fmt.Fprintf(b, "rune(%q)\n", t) } else { fmt.Fprintf(b, "int32(%v)\n", t) } case byte: // uint8 // For bytes, we arbitrarily prefer the character interpretation. // (Every byte has a valid character encoding.) fmt.Fprintf(b, "byte(%q)\n", t) case []byte: // []uint8 fmt.Fprintf(b, "[]byte(%q)\n", t) default: panic(fmt.Sprintf("unsupported type: %T", t)) } } return b.Bytes() } // unmarshalCorpusFile decodes corpus bytes into their respective values. func unmarshalCorpusFile(b []byte) ([]any, error) { if len(b) == 0 { return nil, fmt.Errorf("cannot unmarshal empty string") } lines := bytes.Split(b, []byte("\n")) if len(lines) < 2 { return nil, fmt.Errorf("must include version and at least one value") } version := strings.TrimSuffix(string(lines[0]), "\r") if version != encVersion1 { return nil, fmt.Errorf("unknown encoding version: %s", version) } var vals []any for _, line := range lines[1:] { line = bytes.TrimSpace(line) if len(line) == 0 { continue } v, err := parseCorpusValue(line) if err != nil { return nil, fmt.Errorf("malformed line %q: %v", line, err) } vals = append(vals, v) } return vals, nil } func parseCorpusValue(line []byte) (any, error) { fs := token.NewFileSet() expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) if err != nil { return nil, err } call, ok := expr.(*ast.CallExpr) if !ok { return nil, fmt.Errorf("expected call expression") } if len(call.Args) != 1 { return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) } arg := call.Args[0] if arrayType, ok := call.Fun.(*ast.ArrayType); ok { if arrayType.Len != nil { return nil, fmt.Errorf("expected []byte or primitive type") } elt, ok := arrayType.Elt.(*ast.Ident) if !ok || elt.Name != "byte" { return nil, fmt.Errorf("expected []byte") } lit, ok := arg.(*ast.BasicLit) if !ok || lit.Kind != token.STRING { return nil, fmt.Errorf("string literal required for type []byte") } s, err := strconv.Unquote(lit.Value) if err != nil { return nil, err } return []byte(s), nil } var idType *ast.Ident if selector, ok := call.Fun.(*ast.SelectorExpr); ok { xIdent, ok := selector.X.(*ast.Ident) if !ok || xIdent.Name != "math" { return nil, fmt.Errorf("invalid selector type") } switch selector.Sel.Name { case "Float64frombits": idType = &ast.Ident{Name: "float64-bits"} case "Float32frombits": idType = &ast.Ident{Name: "float32-bits"} default: return nil, fmt.Errorf("invalid selector type") } } else { idType, ok = call.Fun.(*ast.Ident) if !ok { return nil, fmt.Errorf("expected []byte or primitive type") } if idType.Name == "bool" { id, ok := arg.(*ast.Ident) if !ok { return nil, fmt.Errorf("malformed bool") } if id.Name == "true" { return true, nil } else if id.Name == "false" { return false, nil } else { return nil, fmt.Errorf("true or false required for type bool") } } } var ( val string kind token.Token ) if op, ok := arg.(*ast.UnaryExpr); ok { switch lit := op.X.(type) { case *ast.BasicLit: if op.Op != token.SUB { return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op) } // Special case for negative numbers. val = op.Op.String() + lit.Value // e.g. "-" + "124" kind = lit.Kind case *ast.Ident: if lit.Name != "Inf" { return nil, fmt.Errorf("expected operation on int or float type") } if op.Op == token.SUB { val = "-Inf" } else { val = "+Inf" } kind = token.FLOAT default: return nil, fmt.Errorf("expected operation on int or float type") } } else { switch lit := arg.(type) { case *ast.BasicLit: val, kind = lit.Value, lit.Kind case *ast.Ident: if lit.Name != "NaN" { return nil, fmt.Errorf("literal value required for primitive type") } val, kind = "NaN", token.FLOAT default: return nil, fmt.Errorf("literal value required for primitive type") } } switch typ := idType.Name; typ { case "string": if kind != token.STRING { return nil, fmt.Errorf("string literal value required for type string") } return strconv.Unquote(val) case "byte", "rune": if kind == token.INT { switch typ { case "rune": return parseInt(val, typ) case "byte": return parseUint(val, typ) } } if kind != token.CHAR { return nil, fmt.Errorf("character literal required for byte/rune types") } n := len(val) if n < 2 { return nil, fmt.Errorf("malformed character literal, missing single quotes") } code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'') if err != nil { return nil, err } if typ == "rune" { return code, nil } if code >= 256 { return nil, fmt.Errorf("can only encode single byte to a byte type") } return byte(code), nil case "int", "int8", "int16", "int32", "int64": if kind != token.INT { return nil, fmt.Errorf("integer literal required for int types") } return parseInt(val, typ) case "uint", "uint8", "uint16", "uint32", "uint64": if kind != token.INT { return nil, fmt.Errorf("integer literal required for uint types") } return parseUint(val, typ) case "float32": if kind != token.FLOAT && kind != token.INT { return nil, fmt.Errorf("float or integer literal required for float32 type") } v, err := strconv.ParseFloat(val, 32) return float32(v), err case "float64": if kind != token.FLOAT && kind != token.INT { return nil, fmt.Errorf("float or integer literal required for float64 type") } return strconv.ParseFloat(val, 64) case "float32-bits": if kind != token.INT { return nil, fmt.Errorf("integer literal required for math.Float32frombits type") } bits, err := parseUint(val, "uint32") if err != nil { return nil, err } return math.Float32frombits(bits.(uint32)), nil case "float64-bits": if kind != token.FLOAT && kind != token.INT { return nil, fmt.Errorf("integer literal required for math.Float64frombits type") } bits, err := parseUint(val, "uint64") if err != nil { return nil, err } return math.Float64frombits(bits.(uint64)), nil default: return nil, fmt.Errorf("expected []byte or primitive type") } } // parseInt returns an integer of value val and type typ. func parseInt(val, typ string) (any, error) { switch typ { case "int": // The int type may be either 32 or 64 bits. If 32, the fuzz tests in the // corpus may include 64-bit values produced by fuzzing runs on 64-bit // architectures. When running those tests, we implicitly wrap the values to // fit in a regular int. (The test case is still “interesting”, even if the // specific values of its inputs are platform-dependent.) i, err := strconv.ParseInt(val, 0, 64) return int(i), err case "int8": i, err := strconv.ParseInt(val, 0, 8) return int8(i), err case "int16": i, err := strconv.ParseInt(val, 0, 16) return int16(i), err case "int32", "rune": i, err := strconv.ParseInt(val, 0, 32) return int32(i), err case "int64": return strconv.ParseInt(val, 0, 64) default: panic("unreachable") } } // parseUint returns an unsigned integer of value val and type typ. func parseUint(val, typ string) (any, error) { switch typ { case "uint": i, err := strconv.ParseUint(val, 0, 64) return uint(i), err case "uint8", "byte": i, err := strconv.ParseUint(val, 0, 8) return uint8(i), err case "uint16": i, err := strconv.ParseUint(val, 0, 16) return uint16(i), err case "uint32": i, err := strconv.ParseUint(val, 0, 32) return uint32(i), err case "uint64": return strconv.ParseUint(val, 0, 64) default: panic("unreachable") } }