// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // simdgen is an experiment in generating Go <-> asm SIMD mappings. // // Usage: simdgen [-xedPath=path] [-q=query] input.yaml... // // If -xedPath is provided, one of the inputs is a sum of op-code definitions // generated from the Intel XED data at path. // // If input YAML files are provided, each file is read as an input value. See // [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the // format of these files. // // TODO: Example definitions and values. // // The command unifies across all of the inputs and prints all possible results // of this unification. // // If the -q flag is provided, its string value is parsed as a value and treated // as another input to unification. This is intended as a way to "query" the // result, typically by narrowing it down to a small subset of results. // // Typical usage: // // go run . -xedPath $XEDPATH *.yaml // // To see just the definitions generated from XED, run: // // go run . -xedPath $XEDPATH // // (This works because if there's only one input, there's nothing to unify it // with, so the result is simply itself.) // // To see just the definitions for VPADDQ: // // go run . -xedPath $XEDPATH -q '{asm: VPADDQ}' // // simdgen can also generate Go definitions of SIMD mappings: // To generate go files to the go root, run: // // go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml // // types.yaml is already written, it specifies the shapes of vectors. // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED // data, you can find an example in ops/AddSub/. // // When generating Go definitions, simdgen do 3 "magic"s: // - It splits masked operations(with op's [Masked] field set) to const and non const: // - One is a normal masked operation, the original // - The other has its mask operand's [Const] fields set to "K0". // - This way the user does not need to provide a separate "K0"-masked operation def. // // - It deduplicates intrinsic names that have duplicates: // - If there are two operations that shares the same signature, one is AVX512 the other // is before AVX512, the other will be selected. // - This happens often when some operations are defined both before AVX512 and after. // This way the user does not need to provide a separate "K0" operation for the // AVX512 counterpart. // // - It copies the op's [ConstImm] field to its immediate operand's [Const] field. // - This way the user does not need to provide verbose op definition while only // the const immediate field is different. This is useful to reduce verbosity of // compares with imm control predicates. // // These 3 magics could be disabled by enabling -nosplitmask, -nodedup or // -noconstimmporting flags. // // simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error. package main // Big TODOs: // // - This can produce duplicates, which can also lead to less efficient // environment merging. Add hashing and use it for deduplication. Be careful // about how this shows up in debug traces, since it could make things // confusing if we don't show it happening. // // - Do I need Closure, Value, and Domain? It feels like I should only need two // types. import ( "cmp" "flag" "fmt" "log" "maps" "os" "path/filepath" "runtime/pprof" "slices" "strings" "simd/_gen/unify" "gopkg.in/yaml.v3" ) var ( xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") flagO = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree") flagGoDefRoot = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files") FlagNoDedup = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions") FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") FlagArch = flag.String("arch", "amd64", "the target architecture") Verbose = flag.Bool("v", false, "verbose") flagDebugXED = flag.Bool("debug-xed", false, "show XED instructions") flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") FlagReportDup = flag.Bool("reportdup", false, "report the duplicate godefs") flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`") flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`") ) const simdPackage = "simd" func main() { flag.Parse() if *flagCPUProfile != "" { f, err := os.Create(*flagCPUProfile) if err != nil { log.Fatalf("-cpuprofile: %s", err) } defer f.Close() pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if *flagMemProfile != "" { f, err := os.Create(*flagMemProfile) if err != nil { log.Fatalf("-memprofile: %s", err) } defer func() { pprof.WriteHeapProfile(f) f.Close() }() } var inputs []unify.Closure if *FlagArch != "amd64" { log.Fatalf("simdgen only supports amd64") } // Load XED into a defs set. if *xedPath != "" { xedDefs := loadXED(*xedPath) inputs = append(inputs, unify.NewSum(xedDefs...)) } // Load query. if *flagQ != "" { r := strings.NewReader(*flagQ) def, err := unify.Read(r, "", unify.ReadOpts{}) if err != nil { log.Fatalf("parsing -q: %s", err) } inputs = append(inputs, def) } // Load defs files. must := make(map[*unify.Value]struct{}) for _, path := range flag.Args() { defs, err := unify.ReadFile(path, unify.ReadOpts{}) if err != nil { log.Fatal(err) } inputs = append(inputs, defs) if filepath.Base(path) == "go.yaml" { // These must all be used in the final result for def := range defs.Summands() { must[def] = struct{}{} } } } // Prepare for unification if *flagDebugUnify { unify.Debug.UnifyLog = os.Stderr } if *flagDebugHTML != "" { f, err := os.Create(*flagDebugHTML) if err != nil { log.Fatal(err) } unify.Debug.HTML = f defer f.Close() } // Unify! unified, err := unify.Unify(inputs...) if err != nil { log.Fatal(err) } // Validate results. // // Don't validate if this is a command-line query because that tends to // eliminate lots of required defs and is used in cases where maybe defs // aren't enumerable anyway. if *flagQ == "" && len(must) > 0 { validate(unified, must) } // Print results. switch *flagO { case "yaml": // Produce a result that looks like encoding a slice, but stream it. fmt.Println("!sum") var val1 [1]*unify.Value for val := range unified.All() { val1[0] = val // We have to make a new encoder each time or it'll print a document // separator between each object. enc := yaml.NewEncoder(os.Stdout) if err := enc.Encode(val1); err != nil { log.Fatal(err) } enc.Close() } case "godefs": if err := writeGoDefs(*flagGoDefRoot, unified); err != nil { log.Fatalf("Failed writing godefs: %+v", err) } } if !*Verbose && *xedPath != "" { if operandRemarks == 0 { fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n") } else { fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) } } } func validate(cl unify.Closure, required map[*unify.Value]struct{}) { // Validate that: // 1. All final defs are exact // 2. All required defs are used for def := range cl.All() { if _, ok := def.Domain.(unify.Def); !ok { fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain) continue } if !def.Exact() { fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact()) fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t")) } for root := range def.Provenance() { delete(required, root) } } // Report unused defs unused := slices.SortedFunc(maps.Keys(required), func(a, b *unify.Value) int { return cmp.Or( cmp.Compare(a.Pos().Path, b.Pos().Path), cmp.Compare(a.Pos().Line, b.Pos().Line), ) }) for _, def := range unused { // TODO: Can we say anything more actionable? This is always a problem // with unification: if it fails, it's very hard to point a finger at // any particular reason. We could go back and try unifying this again // with each subset of the inputs (starting with individual inputs) to // at least say "it doesn't unify with anything in x.yaml". That's a lot // of work, but if we have trouble debugging unification failure it may // be worth it. fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n", def.PosString(), def) } }