Source file src/simd/_gen/simdgen/gen_simdIntrinsics.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"slices"
    11  )
    12  
    13  const simdIntrinsicsTmpl = `
    14  {{define "header"}}
    15  package ssagen
    16  
    17  import (
    18  	"cmd/compile/internal/ir"
    19  	"cmd/compile/internal/ssa"
    20  	"cmd/compile/internal/types"
    21  	"cmd/internal/sys"
    22  )
    23  
    24  const simdPackage = "` + simdPackage + `"
    25  
    26  func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
    27  {{end}}
    28  
    29  {{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    30  {{end}}
    31  {{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    32  {{end}}
    33  {{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    34  {{end}}
    35  {{define "op2_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    36  {{end}}
    37  {{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    38  {{end}}
    39  {{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    40  {{end}}
    41  {{define "op3_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    42  {{end}}
    43  {{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    44  {{end}}
    45  {{define "op3_31Zero3"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31Zero3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    46  {{end}}
    47  {{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    48  {{end}}
    49  {{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    50  {{end}}
    51  {{define "op4_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
    52  {{end}}
    53  {{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    54  {{end}}
    55  {{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    56  {{end}}
    57  {{define "op2Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    58  {{end}}
    59  {{define "op2Imm8_II"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_II(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    60  {{end}}
    61  {{define "op2Imm8_SHA1RNDS4"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_SHA1RNDS4(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    62  {{end}}
    63  {{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    64  {{end}}
    65  {{define "op3Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    66  {{end}}
    67  {{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
    68  {{end}}
    69  
    70  {{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
    71  {{end}}
    72  
    73  {{define "loadStore"}}	addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64)
    74  	addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64)
    75  {{end}}
    76  
    77  {{define "maskedLoadStore"}}	addF(simdPackage, "LoadMasked{{.Name}}", simdMaskedLoad(ssa.OpLoadMasked{{.ElemBits}}), sys.AMD64)
    78  	addF(simdPackage, "{{.Name}}.StoreMasked", simdMaskedStore(ssa.OpStoreMasked{{.ElemBits}}), sys.AMD64)
    79  {{end}}
    80  
    81  {{define "mask"}}	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
    82  	addF(simdPackage, "{{.VectorCounterpart}}.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
    83  	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
    84  	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
    85  	addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
    86  	addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
    87  {{end}}
    88  
    89  {{define "footer"}}}
    90  {{end}}
    91  `
    92  
    93  // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
    94  // within the specified directory.
    95  func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
    96  	t := templateOf(simdIntrinsicsTmpl, "simdintrinsics")
    97  	buffer := new(bytes.Buffer)
    98  	buffer.WriteString(generatedHeader)
    99  
   100  	if err := t.ExecuteTemplate(buffer, "header", nil); err != nil {
   101  		panic(fmt.Errorf("failed to execute header template: %w", err))
   102  	}
   103  
   104  	slices.SortFunc(ops, compareOperations)
   105  
   106  	for _, op := range ops {
   107  		if op.NoTypes != nil && *op.NoTypes == "true" {
   108  			continue
   109  		}
   110  		if op.SkipMaskedMethod() {
   111  			continue
   112  		}
   113  		if s, op, err := classifyOp(op); err == nil {
   114  			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
   115  				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
   116  			}
   117  
   118  		} else {
   119  			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
   120  		}
   121  	}
   122  
   123  	for _, conv := range vConvertFromTypeMap(typeMap) {
   124  		if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil {
   125  			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
   126  		}
   127  	}
   128  
   129  	for _, typ := range typesFromTypeMap(typeMap) {
   130  		if typ.Type != "mask" {
   131  			if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil {
   132  				panic(fmt.Errorf("failed to execute loadStore template: %w", err))
   133  			}
   134  		}
   135  	}
   136  
   137  	for _, typ := range typesFromTypeMap(typeMap) {
   138  		if typ.MaskedLoadStoreFilter() {
   139  			if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil {
   140  				panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err))
   141  			}
   142  		}
   143  	}
   144  
   145  	for _, mask := range masksFromTypeMap(typeMap) {
   146  		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
   147  			panic(fmt.Errorf("failed to execute mask template: %w", err))
   148  		}
   149  	}
   150  
   151  	if err := t.ExecuteTemplate(buffer, "footer", nil); err != nil {
   152  		panic(fmt.Errorf("failed to execute footer template: %w", err))
   153  	}
   154  
   155  	return buffer
   156  }
   157  

View as plain text