Source file src/simd/_gen/simdgen/main.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // simdgen is an experiment in generating Go <-> asm SIMD mappings.
     6  //
     7  // Usage: simdgen [-xedPath=path] [-q=query] input.yaml...
     8  //
     9  // If -xedPath is provided, one of the inputs is a sum of op-code definitions
    10  // generated from the Intel XED data at path.
    11  //
    12  // If input YAML files are provided, each file is read as an input value. See
    13  // [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the
    14  // format of these files.
    15  //
    16  // TODO: Example definitions and values.
    17  //
    18  // The command unifies across all of the inputs and prints all possible results
    19  // of this unification.
    20  //
    21  // If the -q flag is provided, its string value is parsed as a value and treated
    22  // as another input to unification. This is intended as a way to "query" the
    23  // result, typically by narrowing it down to a small subset of results.
    24  //
    25  // Typical usage:
    26  //
    27  //	go run . -xedPath $XEDPATH *.yaml
    28  //
    29  // To see just the definitions generated from XED, run:
    30  //
    31  //	go run . -xedPath $XEDPATH
    32  //
    33  // (This works because if there's only one input, there's nothing to unify it
    34  // with, so the result is simply itself.)
    35  //
    36  // To see just the definitions for VPADDQ:
    37  //
    38  //	go run . -xedPath $XEDPATH -q '{asm: VPADDQ}'
    39  //
    40  // simdgen can also generate Go definitions of SIMD mappings:
    41  // To generate go files to the go root, run:
    42  //
    43  //	go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml
    44  //
    45  // types.yaml is already written, it specifies the shapes of vectors.
    46  // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED
    47  // data, you can find an example in ops/AddSub/.
    48  //
    49  // When generating Go definitions, simdgen do 3 "magic"s:
    50  // - It splits masked operations(with op's [Masked] field set) to const and non const:
    51  //   - One is a normal masked operation, the original
    52  //   - The other has its mask operand's [Const] fields set to "K0".
    53  //   - This way the user does not need to provide a separate "K0"-masked operation def.
    54  //
    55  // - It deduplicates intrinsic names that have duplicates:
    56  //   - If there are two operations that shares the same signature, one is AVX512 the other
    57  //     is before AVX512, the other will be selected.
    58  //   - This happens often when some operations are defined both before AVX512 and after.
    59  //     This way the user does not need to provide a separate "K0" operation for the
    60  //     AVX512 counterpart.
    61  //
    62  // - It copies the op's [ConstImm] field to its immediate operand's [Const] field.
    63  //   - This way the user does not need to provide verbose op definition while only
    64  //     the const immediate field is different. This is useful to reduce verbosity of
    65  //     compares with imm control predicates.
    66  //
    67  // These 3 magics could be disabled by enabling -nosplitmask, -nodedup or
    68  // -noconstimmporting flags.
    69  //
    70  // simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error.
    71  package main
    72  
    73  // Big TODOs:
    74  //
    75  // - This can produce duplicates, which can also lead to less efficient
    76  // environment merging. Add hashing and use it for deduplication. Be careful
    77  // about how this shows up in debug traces, since it could make things
    78  // confusing if we don't show it happening.
    79  //
    80  // - Do I need Closure, Value, and Domain? It feels like I should only need two
    81  // types.
    82  
    83  import (
    84  	"cmp"
    85  	"flag"
    86  	"fmt"
    87  	"log"
    88  	"maps"
    89  	"os"
    90  	"path/filepath"
    91  	"runtime/pprof"
    92  	"slices"
    93  	"strings"
    94  
    95  	"simd/_gen/unify"
    96  
    97  	"gopkg.in/yaml.v3"
    98  )
    99  
   100  var (
   101  	xedPath               = flag.String("xedPath", "", "load XED datafiles from `path`")
   102  	flagQ                 = flag.String("q", "", "query: read `def` as another input (skips final validation)")
   103  	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree")
   104  	flagGoDefRoot         = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files")
   105  	FlagNoDedup           = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions")
   106  	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")
   107  	FlagArch              = flag.String("arch", "amd64", "the target architecture")
   108  
   109  	Verbose = flag.Bool("v", false, "verbose")
   110  
   111  	flagDebugXED   = flag.Bool("debug-xed", false, "show XED instructions")
   112  	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
   113  	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
   114  	FlagReportDup  = flag.Bool("reportdup", false, "report the duplicate godefs")
   115  
   116  	flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`")
   117  	flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`")
   118  )
   119  
   120  const simdPackage = "simd"
   121  
   122  func main() {
   123  	flag.Parse()
   124  
   125  	if *flagCPUProfile != "" {
   126  		f, err := os.Create(*flagCPUProfile)
   127  		if err != nil {
   128  			log.Fatalf("-cpuprofile: %s", err)
   129  		}
   130  		defer f.Close()
   131  		pprof.StartCPUProfile(f)
   132  		defer pprof.StopCPUProfile()
   133  	}
   134  	if *flagMemProfile != "" {
   135  		f, err := os.Create(*flagMemProfile)
   136  		if err != nil {
   137  			log.Fatalf("-memprofile: %s", err)
   138  		}
   139  		defer func() {
   140  			pprof.WriteHeapProfile(f)
   141  			f.Close()
   142  		}()
   143  	}
   144  
   145  	var inputs []unify.Closure
   146  
   147  	if *FlagArch != "amd64" {
   148  		log.Fatalf("simdgen only supports amd64")
   149  	}
   150  
   151  	// Load XED into a defs set.
   152  	if *xedPath != "" {
   153  		xedDefs := loadXED(*xedPath)
   154  		inputs = append(inputs, unify.NewSum(xedDefs...))
   155  	}
   156  
   157  	// Load query.
   158  	if *flagQ != "" {
   159  		r := strings.NewReader(*flagQ)
   160  		def, err := unify.Read(r, "<query>", unify.ReadOpts{})
   161  		if err != nil {
   162  			log.Fatalf("parsing -q: %s", err)
   163  		}
   164  		inputs = append(inputs, def)
   165  	}
   166  
   167  	// Load defs files.
   168  	must := make(map[*unify.Value]struct{})
   169  	for _, path := range flag.Args() {
   170  		defs, err := unify.ReadFile(path, unify.ReadOpts{})
   171  		if err != nil {
   172  			log.Fatal(err)
   173  		}
   174  		inputs = append(inputs, defs)
   175  
   176  		if filepath.Base(path) == "go.yaml" {
   177  			// These must all be used in the final result
   178  			for def := range defs.Summands() {
   179  				must[def] = struct{}{}
   180  			}
   181  		}
   182  	}
   183  
   184  	// Prepare for unification
   185  	if *flagDebugUnify {
   186  		unify.Debug.UnifyLog = os.Stderr
   187  	}
   188  	if *flagDebugHTML != "" {
   189  		f, err := os.Create(*flagDebugHTML)
   190  		if err != nil {
   191  			log.Fatal(err)
   192  		}
   193  		unify.Debug.HTML = f
   194  		defer f.Close()
   195  	}
   196  
   197  	// Unify!
   198  	unified, err := unify.Unify(inputs...)
   199  	if err != nil {
   200  		log.Fatal(err)
   201  	}
   202  
   203  	// Validate results.
   204  	//
   205  	// Don't validate if this is a command-line query because that tends to
   206  	// eliminate lots of required defs and is used in cases where maybe defs
   207  	// aren't enumerable anyway.
   208  	if *flagQ == "" && len(must) > 0 {
   209  		validate(unified, must)
   210  	}
   211  
   212  	// Print results.
   213  	switch *flagO {
   214  	case "yaml":
   215  		// Produce a result that looks like encoding a slice, but stream it.
   216  		fmt.Println("!sum")
   217  		var val1 [1]*unify.Value
   218  		for val := range unified.All() {
   219  			val1[0] = val
   220  			// We have to make a new encoder each time or it'll print a document
   221  			// separator between each object.
   222  			enc := yaml.NewEncoder(os.Stdout)
   223  			if err := enc.Encode(val1); err != nil {
   224  				log.Fatal(err)
   225  			}
   226  			enc.Close()
   227  		}
   228  	case "godefs":
   229  		if err := writeGoDefs(*flagGoDefRoot, unified); err != nil {
   230  			log.Fatalf("Failed writing godefs: %+v", err)
   231  		}
   232  	}
   233  
   234  	if !*Verbose && *xedPath != "" {
   235  		if operandRemarks == 0 {
   236  			fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n")
   237  		} else {
   238  			fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
   239  		}
   240  	}
   241  }
   242  
   243  func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
   244  	// Validate that:
   245  	// 1. All final defs are exact
   246  	// 2. All required defs are used
   247  	for def := range cl.All() {
   248  		if _, ok := def.Domain.(unify.Def); !ok {
   249  			fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain)
   250  			continue
   251  		}
   252  
   253  		if !def.Exact() {
   254  			fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact())
   255  			fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t"))
   256  		}
   257  
   258  		for root := range def.Provenance() {
   259  			delete(required, root)
   260  		}
   261  	}
   262  	// Report unused defs
   263  	unused := slices.SortedFunc(maps.Keys(required),
   264  		func(a, b *unify.Value) int {
   265  			return cmp.Or(
   266  				cmp.Compare(a.Pos().Path, b.Pos().Path),
   267  				cmp.Compare(a.Pos().Line, b.Pos().Line),
   268  			)
   269  		})
   270  	for _, def := range unused {
   271  		// TODO: Can we say anything more actionable? This is always a problem
   272  		// with unification: if it fails, it's very hard to point a finger at
   273  		// any particular reason. We could go back and try unifying this again
   274  		// with each subset of the inputs (starting with individual inputs) to
   275  		// at least say "it doesn't unify with anything in x.yaml". That's a lot
   276  		// of work, but if we have trouble debugging unification failure it may
   277  		// be worth it.
   278  		fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n",
   279  			def.PosString(), def)
   280  	}
   281  }
   282  

View as plain text