Source file src/simd/_gen/simdgen/gen_simdTypes.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"cmp"
    10  	"fmt"
    11  	"maps"
    12  	"slices"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  )
    17  
    18  type simdType struct {
    19  	Name                    string // The go type name of this simd type, for example Int32x4.
    20  	Lanes                   int    // The number of elements in this vector/mask.
    21  	Base                    string // The element's type, like for Int32x4 it will be int32.
    22  	Fields                  string // The struct fields, it should be right formatted.
    23  	Type                    string // Either "mask" or "vreg"
    24  	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
    25  	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
    26  	Size                    int    // The size of the vector type
    27  }
    28  
    29  func (x simdType) ElemBits() int {
    30  	return x.Size / x.Lanes
    31  }
    32  
    33  // LanesContainer returns the smallest int/uint bit size that is
    34  // large enough to hold one bit for each lane.  E.g., Mask32x4
    35  // is 4 lanes, and a uint8 is the smallest uint that has 4 bits.
    36  func (x simdType) LanesContainer() int {
    37  	if x.Lanes > 64 {
    38  		panic("too many lanes")
    39  	}
    40  	if x.Lanes > 32 {
    41  		return 64
    42  	}
    43  	if x.Lanes > 16 {
    44  		return 32
    45  	}
    46  	if x.Lanes > 8 {
    47  		return 16
    48  	}
    49  	return 8
    50  }
    51  
    52  // MaskedLoadStoreFilter encodes which simd type type currently
    53  // get masked loads/stores generated, it is used in two places,
    54  // this forces coordination.
    55  func (x simdType) MaskedLoadStoreFilter() bool {
    56  	return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
    57  }
    58  
    59  func (x simdType) IntelSizeSuffix() string {
    60  	switch x.ElemBits() {
    61  	case 8:
    62  		return "B"
    63  	case 16:
    64  		return "W"
    65  	case 32:
    66  		return "D"
    67  	case 64:
    68  		return "Q"
    69  	}
    70  	panic("oops")
    71  }
    72  
    73  func (x simdType) MaskedLoadDoc() string {
    74  	if x.Size == 512 || x.ElemBits() < 32 {
    75  		return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
    76  	} else {
    77  		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
    78  	}
    79  }
    80  
    81  func (x simdType) MaskedStoreDoc() string {
    82  	if x.Size == 512 || x.ElemBits() < 32 {
    83  		return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
    84  	} else {
    85  		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
    86  	}
    87  }
    88  
    89  func compareSimdTypes(x, y simdType) int {
    90  	// "vreg" then "mask"
    91  	if c := -compareNatural(x.Type, y.Type); c != 0 {
    92  		return c
    93  	}
    94  	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
    95  	// not "int16" < "int32" < "int64" < "int8")
    96  	// so limit comparison to first 3 bytes in string.
    97  	if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
    98  		return c
    99  	}
   100  	// base type size, 8 < 16 < 32 < 64
   101  	if c := x.ElemBits() - y.ElemBits(); c != 0 {
   102  		return c
   103  	}
   104  	// vector size last
   105  	return x.Size - y.Size
   106  }
   107  
   108  type simdTypeMap map[int][]simdType
   109  
   110  type simdTypePair struct {
   111  	Tsrc simdType
   112  	Tdst simdType
   113  }
   114  
   115  func compareSimdTypePairs(x, y simdTypePair) int {
   116  	c := compareSimdTypes(x.Tsrc, y.Tsrc)
   117  	if c != 0 {
   118  		return c
   119  	}
   120  	return compareSimdTypes(x.Tdst, y.Tdst)
   121  }
   122  
   123  const simdPackageHeader = generatedHeader + `
   124  //go:build goexperiment.simd
   125  
   126  package simd
   127  `
   128  
   129  const simdTypesTemplates = `
   130  {{define "sizeTmpl"}}
   131  // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
   132  type v{{.}} struct {
   133  	_{{.}} [0]func() // uncomparable
   134  }
   135  {{end}}
   136  
   137  {{define "typeTmpl"}}
   138  // {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}
   139  type {{.Name}} struct {
   140  {{.Fields}}
   141  }
   142  
   143  {{end}}
   144  `
   145  
   146  const simdFeaturesTemplate = `
   147  import "internal/cpu"
   148  
   149  type X86Features struct {}
   150  
   151  var X86 X86Features
   152  
   153  {{range .}}
   154  {{- if eq .Feature "AVX512"}}
   155  // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
   156  //
   157  // These five CPU features are bundled together, and no use of AVX-512
   158  // is allowed unless all of these features are supported together.
   159  // Nearly every CPU that has shipped with any support for AVX-512 has
   160  // supported all five of these features.
   161  {{- else -}}
   162  // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
   163  {{- end}}
   164  //
   165  // {{.Feature}} is defined on all GOARCHes, but will only return true on
   166  // GOARCH {{.GoArch}}.
   167  func (X86Features) {{.Feature}}() bool {
   168  	return cpu.X86.Has{{.Feature}}
   169  }
   170  {{end}}
   171  `
   172  
   173  const simdLoadStoreTemplate = `
   174  // Len returns the number of elements in a {{.Name}}
   175  func (x {{.Name}}) Len() int { return {{.Lanes}} }
   176  
   177  // Load{{.Name}} loads a {{.Name}} from an array
   178  //
   179  //go:noescape
   180  func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
   181  
   182  // Store stores a {{.Name}} to an array
   183  //
   184  //go:noescape
   185  func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
   186  `
   187  
   188  const simdMaskFromValTemplate = `
   189  // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
   190  {{- if ne .Lanes .LanesContainer}}
   191  // Only the lower {{.Lanes}} bits of y are used.
   192  {{- end}}
   193  //
   194  // Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
   195  func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
   196  
   197  // ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
   198  {{- if ne .Lanes .LanesContainer}}
   199  // Only the lower {{.Lanes}} bits of y are used.
   200  {{- end}}
   201  //
   202  // Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
   203  func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
   204  `
   205  
   206  const simdMaskedLoadStoreTemplate = `
   207  // LoadMasked{{.Name}} loads a {{.Name}} from an array,
   208  // at those elements enabled by mask
   209  //
   210  {{.MaskedLoadDoc}}
   211  //
   212  //go:noescape
   213  func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
   214  
   215  // StoreMasked stores a {{.Name}} to an array,
   216  // at those elements enabled by mask
   217  //
   218  {{.MaskedStoreDoc}}
   219  //
   220  //go:noescape
   221  func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
   222  `
   223  
   224  const simdStubsTmpl = `
   225  {{define "op1"}}
   226  {{if .Documentation}}{{.Documentation}}
   227  //{{end}}
   228  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   229  func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
   230  {{end}}
   231  
   232  {{define "op2"}}
   233  {{if .Documentation}}{{.Documentation}}
   234  //{{end}}
   235  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   236  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
   237  {{end}}
   238  
   239  {{define "op2_21"}}
   240  {{if .Documentation}}{{.Documentation}}
   241  //{{end}}
   242  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   243  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
   244  {{end}}
   245  
   246  {{define "op2_21Type1"}}
   247  {{if .Documentation}}{{.Documentation}}
   248  //{{end}}
   249  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   250  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
   251  {{end}}
   252  
   253  {{define "op3"}}
   254  {{if .Documentation}}{{.Documentation}}
   255  //{{end}}
   256  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   257  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   258  {{end}}
   259  
   260  {{define "op3_31Zero3"}}
   261  {{if .Documentation}}{{.Documentation}}
   262  //{{end}}
   263  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   264  func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
   265  {{end}}
   266  
   267  {{define "op3_21"}}
   268  {{if .Documentation}}{{.Documentation}}
   269  //{{end}}
   270  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   271  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   272  {{end}}
   273  
   274  {{define "op3_21Type1"}}
   275  {{if .Documentation}}{{.Documentation}}
   276  //{{end}}
   277  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   278  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
   279  {{end}}
   280  
   281  {{define "op3_231Type1"}}
   282  {{if .Documentation}}{{.Documentation}}
   283  //{{end}}
   284  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   285  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
   286  {{end}}
   287  
   288  {{define "op2VecAsScalar"}}
   289  {{if .Documentation}}{{.Documentation}}
   290  //{{end}}
   291  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   292  func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
   293  {{end}}
   294  
   295  {{define "op3VecAsScalar"}}
   296  {{if .Documentation}}{{.Documentation}}
   297  //{{end}}
   298  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   299  func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
   300  {{end}}
   301  
   302  {{define "op4"}}
   303  {{if .Documentation}}{{.Documentation}}
   304  //{{end}}
   305  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   306  func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   307  {{end}}
   308  
   309  {{define "op4_231Type1"}}
   310  {{if .Documentation}}{{.Documentation}}
   311  //{{end}}
   312  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   313  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   314  {{end}}
   315  
   316  {{define "op4_31"}}
   317  {{if .Documentation}}{{.Documentation}}
   318  //{{end}}
   319  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   320  func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
   321  {{end}}
   322  
   323  {{define "op1Imm8"}}
   324  {{if .Documentation}}{{.Documentation}}
   325  //{{end}}
   326  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   327  //
   328  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   329  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
   330  {{end}}
   331  
   332  {{define "op2Imm8"}}
   333  {{if .Documentation}}{{.Documentation}}
   334  //{{end}}
   335  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   336  //
   337  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   338  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   339  {{end}}
   340  
   341  {{define "op2Imm8_2I"}}
   342  {{if .Documentation}}{{.Documentation}}
   343  //{{end}}
   344  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   345  //
   346  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   347  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
   348  {{end}}
   349  
   350  {{define "op2Imm8_II"}}
   351  {{if .Documentation}}{{.Documentation}}
   352  //{{end}}
   353  // {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
   354  // {{.ImmName}} should be between 0 and 3, inclusive; other values may result in a runtime panic.
   355  //
   356  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   357  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   358  {{end}}
   359  
   360  {{define "op2Imm8_SHA1RNDS4"}}
   361  {{if .Documentation}}{{.Documentation}}
   362  //{{end}}
   363  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   364  //
   365  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   366  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
   367  {{end}}
   368  
   369  {{define "op3Imm8"}}
   370  {{if .Documentation}}{{.Documentation}}
   371  //{{end}}
   372  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   373  //
   374  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   375  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
   376  {{end}}
   377  
   378  {{define "op3Imm8_2I"}}
   379  {{if .Documentation}}{{.Documentation}}
   380  //{{end}}
   381  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   382  //
   383  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   384  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
   385  {{end}}
   386  
   387  
   388  {{define "op4Imm8"}}
   389  {{if .Documentation}}{{.Documentation}}
   390  //{{end}}
   391  // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   392  //
   393  // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
   394  func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
   395  {{end}}
   396  
   397  {{define "vectorConversion"}}
   398  // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
   399  func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
   400  {{end}}
   401  
   402  {{define "mask"}}
   403  // As{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}
   404  func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
   405  
   406  // asMask converts from {{.VectorCounterpart}} to {{.Name}}
   407  func (from {{.VectorCounterpart}}) asMask() (to {{.Name}})
   408  
   409  func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
   410  
   411  func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
   412  {{end}}
   413  `
   414  
   415  // parseSIMDTypes groups go simd types by their vector sizes, and
   416  // returns a map whose key is the vector size, value is the simd type.
   417  func parseSIMDTypes(ops []Operation) simdTypeMap {
   418  	// TODO: maybe instead of going over ops, let's try go over types.yaml.
   419  	ret := map[int][]simdType{}
   420  	seen := map[string]struct{}{}
   421  	processArg := func(arg Operand) {
   422  		if arg.Class == "immediate" || arg.Class == "greg" {
   423  			// Immediates are not encoded as vector types.
   424  			return
   425  		}
   426  		if _, ok := seen[*arg.Go]; ok {
   427  			return
   428  		}
   429  		seen[*arg.Go] = struct{}{}
   430  
   431  		lanes := *arg.Lanes
   432  		base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
   433  		tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
   434  		tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
   435  		valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
   436  		fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
   437  		if arg.Class == "mask" {
   438  			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
   439  			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
   440  			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
   441  			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
   442  			if _, ok := seen[vectorCounterpart]; !ok {
   443  				seen[vectorCounterpart] = struct{}{}
   444  				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
   445  			}
   446  		} else {
   447  			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
   448  		}
   449  	}
   450  	for _, op := range ops {
   451  		for _, arg := range op.In {
   452  			processArg(arg)
   453  		}
   454  		for _, arg := range op.Out {
   455  			processArg(arg)
   456  		}
   457  	}
   458  	return ret
   459  }
   460  
   461  func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
   462  	v := []simdTypePair{}
   463  	for _, ts := range typeMap {
   464  		for i, tsrc := range ts {
   465  			for j, tdst := range ts {
   466  				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
   467  					tsrc.Lanes > 1 && tdst.Lanes > 1 {
   468  					v = append(v, simdTypePair{tsrc, tdst})
   469  				}
   470  			}
   471  		}
   472  	}
   473  	slices.SortFunc(v, compareSimdTypePairs)
   474  	return v
   475  }
   476  
   477  func masksFromTypeMap(typeMap simdTypeMap) []simdType {
   478  	m := []simdType{}
   479  	for _, ts := range typeMap {
   480  		for _, tsrc := range ts {
   481  			if tsrc.Type == "mask" {
   482  				m = append(m, tsrc)
   483  			}
   484  		}
   485  	}
   486  	slices.SortFunc(m, compareSimdTypes)
   487  	return m
   488  }
   489  
   490  func typesFromTypeMap(typeMap simdTypeMap) []simdType {
   491  	m := []simdType{}
   492  	for _, ts := range typeMap {
   493  		for _, tsrc := range ts {
   494  			if tsrc.Lanes > 1 {
   495  				m = append(m, tsrc)
   496  			}
   497  		}
   498  	}
   499  	slices.SortFunc(m, compareSimdTypes)
   500  	return m
   501  }
   502  
   503  // writeSIMDTypes generates the simd vector types into a bytes.Buffer
   504  func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
   505  	t := templateOf(simdTypesTemplates, "types_amd64")
   506  	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
   507  	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
   508  	maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
   509  
   510  	buffer := new(bytes.Buffer)
   511  	buffer.WriteString(simdPackageHeader)
   512  
   513  	sizes := make([]int, 0, len(typeMap))
   514  	for size, types := range typeMap {
   515  		slices.SortFunc(types, compareSimdTypes)
   516  		sizes = append(sizes, size)
   517  	}
   518  	sort.Ints(sizes)
   519  
   520  	for _, size := range sizes {
   521  		if size <= 64 {
   522  			// these are scalar
   523  			continue
   524  		}
   525  		if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
   526  			panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
   527  		}
   528  		for _, typeDef := range typeMap[size] {
   529  			if typeDef.Lanes == 1 {
   530  				continue
   531  			}
   532  			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
   533  				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
   534  			}
   535  			if typeDef.Type != "mask" {
   536  				if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
   537  					panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
   538  				}
   539  				// restrict to AVX2 masked loads/stores first.
   540  				if typeDef.MaskedLoadStoreFilter() {
   541  					if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
   542  						panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
   543  					}
   544  				}
   545  			} else {
   546  				if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
   547  					panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
   548  				}
   549  			}
   550  		}
   551  	}
   552  
   553  	return buffer
   554  }
   555  
   556  func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
   557  	// Gather all features
   558  	type featureKey struct {
   559  		GoArch  string
   560  		Feature string
   561  	}
   562  	featureSet := make(map[featureKey]struct{})
   563  	for _, op := range ops {
   564  		// Generate a feature check for each independant feature in a
   565  		// composite feature.
   566  		for feature := range strings.SplitSeq(op.CPUFeature, ",") {
   567  			feature = strings.TrimSpace(feature)
   568  			featureSet[featureKey{op.GoArch, feature}] = struct{}{}
   569  		}
   570  	}
   571  	features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
   572  		if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
   573  			return c
   574  		}
   575  		return compareNatural(a.Feature, b.Feature)
   576  	})
   577  
   578  	// If we ever have the same feature name on more than one GOARCH, we'll have
   579  	// to be more careful about this.
   580  	t := templateOf(simdFeaturesTemplate, "features")
   581  
   582  	buffer := new(bytes.Buffer)
   583  	buffer.WriteString(simdPackageHeader)
   584  
   585  	if err := t.Execute(buffer, features); err != nil {
   586  		panic(fmt.Errorf("failed to execute features template: %w", err))
   587  	}
   588  
   589  	return buffer
   590  }
   591  
   592  // writeSIMDStubs returns two bytes.Buffers containing the declarations for the public
   593  // and internal-use vector intrinsics.
   594  func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
   595  	t := templateOf(simdStubsTmpl, "simdStubs")
   596  	f = new(bytes.Buffer)
   597  	fI = new(bytes.Buffer)
   598  	f.WriteString(simdPackageHeader)
   599  	fI.WriteString(simdPackageHeader)
   600  
   601  	slices.SortFunc(ops, compareOperations)
   602  
   603  	for i, op := range ops {
   604  		if op.NoTypes != nil && *op.NoTypes == "true" {
   605  			continue
   606  		}
   607  		if op.SkipMaskedMethod() {
   608  			continue
   609  		}
   610  		idxVecAsScalar, err := checkVecAsScalar(op)
   611  		if err != nil {
   612  			panic(err)
   613  		}
   614  		if s, op, err := classifyOp(op); err == nil {
   615  			if idxVecAsScalar != -1 {
   616  				if s == "op2" || s == "op3" {
   617  					s += "VecAsScalar"
   618  				} else {
   619  					panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
   620  				}
   621  			}
   622  			if i == 0 || op.Go != ops[i-1].Go {
   623  				if unicode.IsUpper([]rune(op.Go)[0]) {
   624  					fmt.Fprintf(f, "\n/* %s */\n", op.Go)
   625  				} else {
   626  					fmt.Fprintf(fI, "\n/* %s */\n", op.Go)
   627  				}
   628  			}
   629  			if unicode.IsUpper([]rune(op.Go)[0]) {
   630  				if err := t.ExecuteTemplate(f, s, op); err != nil {
   631  					panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
   632  				}
   633  			} else {
   634  				if err := t.ExecuteTemplate(fI, s, op); err != nil {
   635  					panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
   636  				}
   637  			}
   638  		} else {
   639  			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
   640  		}
   641  	}
   642  
   643  	vectorConversions := vConvertFromTypeMap(typeMap)
   644  	for _, conv := range vectorConversions {
   645  		if err := t.ExecuteTemplate(f, "vectorConversion", conv); err != nil {
   646  			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
   647  		}
   648  	}
   649  
   650  	masks := masksFromTypeMap(typeMap)
   651  	for _, mask := range masks {
   652  		if err := t.ExecuteTemplate(f, "mask", mask); err != nil {
   653  			panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
   654  		}
   655  	}
   656  
   657  	return
   658  }
   659  

View as plain text