1
2
3
4
5 package main
6
7 import (
8 "bytes"
9 "cmp"
10 "fmt"
11 "maps"
12 "slices"
13 "sort"
14 "strings"
15 "unicode"
16 )
17
18 type simdType struct {
19 Name string
20 Lanes int
21 Base string
22 Fields string
23 Type string
24 VectorCounterpart string
25 ReshapedVectorWithAndOr string
26 Size int
27 }
28
29 func (x simdType) ElemBits() int {
30 return x.Size / x.Lanes
31 }
32
33
34
35
36 func (x simdType) LanesContainer() int {
37 if x.Lanes > 64 {
38 panic("too many lanes")
39 }
40 if x.Lanes > 32 {
41 return 64
42 }
43 if x.Lanes > 16 {
44 return 32
45 }
46 if x.Lanes > 8 {
47 return 16
48 }
49 return 8
50 }
51
52
53
54
55 func (x simdType) MaskedLoadStoreFilter() bool {
56 return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
57 }
58
59 func (x simdType) IntelSizeSuffix() string {
60 switch x.ElemBits() {
61 case 8:
62 return "B"
63 case 16:
64 return "W"
65 case 32:
66 return "D"
67 case 64:
68 return "Q"
69 }
70 panic("oops")
71 }
72
73 func (x simdType) MaskedLoadDoc() string {
74 if x.Size == 512 || x.ElemBits() < 32 {
75 return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
76 } else {
77 return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
78 }
79 }
80
81 func (x simdType) MaskedStoreDoc() string {
82 if x.Size == 512 || x.ElemBits() < 32 {
83 return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
84 } else {
85 return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
86 }
87 }
88
89 func compareSimdTypes(x, y simdType) int {
90
91 if c := -compareNatural(x.Type, y.Type); c != 0 {
92 return c
93 }
94
95
96
97 if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
98 return c
99 }
100
101 if c := x.ElemBits() - y.ElemBits(); c != 0 {
102 return c
103 }
104
105 return x.Size - y.Size
106 }
107
108 type simdTypeMap map[int][]simdType
109
110 type simdTypePair struct {
111 Tsrc simdType
112 Tdst simdType
113 }
114
115 func compareSimdTypePairs(x, y simdTypePair) int {
116 c := compareSimdTypes(x.Tsrc, y.Tsrc)
117 if c != 0 {
118 return c
119 }
120 return compareSimdTypes(x.Tdst, y.Tdst)
121 }
122
123 const simdPackageHeader = generatedHeader + `
124 //go:build goexperiment.simd
125
126 package simd
127 `
128
129 const simdTypesTemplates = `
130 {{define "sizeTmpl"}}
131 // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
132 type v{{.}} struct {
133 _{{.}} [0]func() // uncomparable
134 }
135 {{end}}
136
137 {{define "typeTmpl"}}
138 // {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}
139 type {{.Name}} struct {
140 {{.Fields}}
141 }
142
143 {{end}}
144 `
145
146 const simdFeaturesTemplate = `
147 import "internal/cpu"
148
149 type X86Features struct {}
150
151 var X86 X86Features
152
153 {{range .}}
154 {{- if eq .Feature "AVX512"}}
155 // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
156 //
157 // These five CPU features are bundled together, and no use of AVX-512
158 // is allowed unless all of these features are supported together.
159 // Nearly every CPU that has shipped with any support for AVX-512 has
160 // supported all five of these features.
161 {{- else -}}
162 // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
163 {{- end}}
164 //
165 // {{.Feature}} is defined on all GOARCHes, but will only return true on
166 // GOARCH {{.GoArch}}.
167 func (X86Features) {{.Feature}}() bool {
168 return cpu.X86.Has{{.Feature}}
169 }
170 {{end}}
171 `
172
173 const simdLoadStoreTemplate = `
174 // Len returns the number of elements in a {{.Name}}
175 func (x {{.Name}}) Len() int { return {{.Lanes}} }
176
177 // Load{{.Name}} loads a {{.Name}} from an array
178 //
179 //go:noescape
180 func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
181
182 // Store stores a {{.Name}} to an array
183 //
184 //go:noescape
185 func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
186 `
187
188 const simdMaskFromValTemplate = `
189 // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
190 {{- if ne .Lanes .LanesContainer}}
191 // Only the lower {{.Lanes}} bits of y are used.
192 {{- end}}
193 //
194 // Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
195 func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
196
197 // ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
198 {{- if ne .Lanes .LanesContainer}}
199 // Only the lower {{.Lanes}} bits of y are used.
200 {{- end}}
201 //
202 // Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
203 func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
204 `
205
206 const simdMaskedLoadStoreTemplate = `
207 // LoadMasked{{.Name}} loads a {{.Name}} from an array,
208 // at those elements enabled by mask
209 //
210 {{.MaskedLoadDoc}}
211 //
212 //go:noescape
213 func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
214
215 // StoreMasked stores a {{.Name}} to an array,
216 // at those elements enabled by mask
217 //
218 {{.MaskedStoreDoc}}
219 //
220 //go:noescape
221 func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
222 `
223
224 const simdStubsTmpl = `
225 {{define "op1"}}
226 {{if .Documentation}}{{.Documentation}}
227 //{{end}}
228 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
229 func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
230 {{end}}
231
232 {{define "op2"}}
233 {{if .Documentation}}{{.Documentation}}
234 //{{end}}
235 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
236 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
237 {{end}}
238
239 {{define "op2_21"}}
240 {{if .Documentation}}{{.Documentation}}
241 //{{end}}
242 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
243 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
244 {{end}}
245
246 {{define "op2_21Type1"}}
247 {{if .Documentation}}{{.Documentation}}
248 //{{end}}
249 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
250 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
251 {{end}}
252
253 {{define "op3"}}
254 {{if .Documentation}}{{.Documentation}}
255 //{{end}}
256 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
257 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
258 {{end}}
259
260 {{define "op3_31Zero3"}}
261 {{if .Documentation}}{{.Documentation}}
262 //{{end}}
263 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
264 func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
265 {{end}}
266
267 {{define "op3_21"}}
268 {{if .Documentation}}{{.Documentation}}
269 //{{end}}
270 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
271 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
272 {{end}}
273
274 {{define "op3_21Type1"}}
275 {{if .Documentation}}{{.Documentation}}
276 //{{end}}
277 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
278 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
279 {{end}}
280
281 {{define "op3_231Type1"}}
282 {{if .Documentation}}{{.Documentation}}
283 //{{end}}
284 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
285 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
286 {{end}}
287
288 {{define "op2VecAsScalar"}}
289 {{if .Documentation}}{{.Documentation}}
290 //{{end}}
291 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
292 func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
293 {{end}}
294
295 {{define "op3VecAsScalar"}}
296 {{if .Documentation}}{{.Documentation}}
297 //{{end}}
298 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
299 func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
300 {{end}}
301
302 {{define "op4"}}
303 {{if .Documentation}}{{.Documentation}}
304 //{{end}}
305 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
306 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
307 {{end}}
308
309 {{define "op4_231Type1"}}
310 {{if .Documentation}}{{.Documentation}}
311 //{{end}}
312 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
313 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
314 {{end}}
315
316 {{define "op4_31"}}
317 {{if .Documentation}}{{.Documentation}}
318 //{{end}}
319 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
320 func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
321 {{end}}
322
323 {{define "op1Imm8"}}
324 {{if .Documentation}}{{.Documentation}}
325 //{{end}}
326 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
327 //
328 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
329 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
330 {{end}}
331
332 {{define "op2Imm8"}}
333 {{if .Documentation}}{{.Documentation}}
334 //{{end}}
335 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
336 //
337 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
338 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
339 {{end}}
340
341 {{define "op2Imm8_2I"}}
342 {{if .Documentation}}{{.Documentation}}
343 //{{end}}
344 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
345 //
346 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
347 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
348 {{end}}
349
350 {{define "op2Imm8_II"}}
351 {{if .Documentation}}{{.Documentation}}
352 //{{end}}
353 // {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
354 // {{.ImmName}} should be between 0 and 3, inclusive; other values may result in a runtime panic.
355 //
356 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
357 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
358 {{end}}
359
360 {{define "op2Imm8_SHA1RNDS4"}}
361 {{if .Documentation}}{{.Documentation}}
362 //{{end}}
363 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
364 //
365 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
366 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
367 {{end}}
368
369 {{define "op3Imm8"}}
370 {{if .Documentation}}{{.Documentation}}
371 //{{end}}
372 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
373 //
374 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
375 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
376 {{end}}
377
378 {{define "op3Imm8_2I"}}
379 {{if .Documentation}}{{.Documentation}}
380 //{{end}}
381 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
382 //
383 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
384 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
385 {{end}}
386
387
388 {{define "op4Imm8"}}
389 {{if .Documentation}}{{.Documentation}}
390 //{{end}}
391 // {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
392 //
393 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
394 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
395 {{end}}
396
397 {{define "vectorConversion"}}
398 // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
399 func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
400 {{end}}
401
402 {{define "mask"}}
403 // As{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}
404 func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
405
406 // asMask converts from {{.VectorCounterpart}} to {{.Name}}
407 func (from {{.VectorCounterpart}}) asMask() (to {{.Name}})
408
409 func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
410
411 func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
412 {{end}}
413 `
414
415
416
417 func parseSIMDTypes(ops []Operation) simdTypeMap {
418
419 ret := map[int][]simdType{}
420 seen := map[string]struct{}{}
421 processArg := func(arg Operand) {
422 if arg.Class == "immediate" || arg.Class == "greg" {
423
424 return
425 }
426 if _, ok := seen[*arg.Go]; ok {
427 return
428 }
429 seen[*arg.Go] = struct{}{}
430
431 lanes := *arg.Lanes
432 base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
433 tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
434 tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
435 valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
436 fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
437 if arg.Class == "mask" {
438 vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
439 reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
440 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
441
442 if _, ok := seen[vectorCounterpart]; !ok {
443 seen[vectorCounterpart] = struct{}{}
444 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
445 }
446 } else {
447 ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
448 }
449 }
450 for _, op := range ops {
451 for _, arg := range op.In {
452 processArg(arg)
453 }
454 for _, arg := range op.Out {
455 processArg(arg)
456 }
457 }
458 return ret
459 }
460
461 func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
462 v := []simdTypePair{}
463 for _, ts := range typeMap {
464 for i, tsrc := range ts {
465 for j, tdst := range ts {
466 if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
467 tsrc.Lanes > 1 && tdst.Lanes > 1 {
468 v = append(v, simdTypePair{tsrc, tdst})
469 }
470 }
471 }
472 }
473 slices.SortFunc(v, compareSimdTypePairs)
474 return v
475 }
476
477 func masksFromTypeMap(typeMap simdTypeMap) []simdType {
478 m := []simdType{}
479 for _, ts := range typeMap {
480 for _, tsrc := range ts {
481 if tsrc.Type == "mask" {
482 m = append(m, tsrc)
483 }
484 }
485 }
486 slices.SortFunc(m, compareSimdTypes)
487 return m
488 }
489
490 func typesFromTypeMap(typeMap simdTypeMap) []simdType {
491 m := []simdType{}
492 for _, ts := range typeMap {
493 for _, tsrc := range ts {
494 if tsrc.Lanes > 1 {
495 m = append(m, tsrc)
496 }
497 }
498 }
499 slices.SortFunc(m, compareSimdTypes)
500 return m
501 }
502
503
504 func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
505 t := templateOf(simdTypesTemplates, "types_amd64")
506 loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
507 maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
508 maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
509
510 buffer := new(bytes.Buffer)
511 buffer.WriteString(simdPackageHeader)
512
513 sizes := make([]int, 0, len(typeMap))
514 for size, types := range typeMap {
515 slices.SortFunc(types, compareSimdTypes)
516 sizes = append(sizes, size)
517 }
518 sort.Ints(sizes)
519
520 for _, size := range sizes {
521 if size <= 64 {
522
523 continue
524 }
525 if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
526 panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
527 }
528 for _, typeDef := range typeMap[size] {
529 if typeDef.Lanes == 1 {
530 continue
531 }
532 if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
533 panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
534 }
535 if typeDef.Type != "mask" {
536 if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
537 panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
538 }
539
540 if typeDef.MaskedLoadStoreFilter() {
541 if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
542 panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
543 }
544 }
545 } else {
546 if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
547 panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
548 }
549 }
550 }
551 }
552
553 return buffer
554 }
555
556 func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
557
558 type featureKey struct {
559 GoArch string
560 Feature string
561 }
562 featureSet := make(map[featureKey]struct{})
563 for _, op := range ops {
564
565
566 for feature := range strings.SplitSeq(op.CPUFeature, ",") {
567 feature = strings.TrimSpace(feature)
568 featureSet[featureKey{op.GoArch, feature}] = struct{}{}
569 }
570 }
571 features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
572 if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
573 return c
574 }
575 return compareNatural(a.Feature, b.Feature)
576 })
577
578
579
580 t := templateOf(simdFeaturesTemplate, "features")
581
582 buffer := new(bytes.Buffer)
583 buffer.WriteString(simdPackageHeader)
584
585 if err := t.Execute(buffer, features); err != nil {
586 panic(fmt.Errorf("failed to execute features template: %w", err))
587 }
588
589 return buffer
590 }
591
592
593
594 func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
595 t := templateOf(simdStubsTmpl, "simdStubs")
596 f = new(bytes.Buffer)
597 fI = new(bytes.Buffer)
598 f.WriteString(simdPackageHeader)
599 fI.WriteString(simdPackageHeader)
600
601 slices.SortFunc(ops, compareOperations)
602
603 for i, op := range ops {
604 if op.NoTypes != nil && *op.NoTypes == "true" {
605 continue
606 }
607 if op.SkipMaskedMethod() {
608 continue
609 }
610 idxVecAsScalar, err := checkVecAsScalar(op)
611 if err != nil {
612 panic(err)
613 }
614 if s, op, err := classifyOp(op); err == nil {
615 if idxVecAsScalar != -1 {
616 if s == "op2" || s == "op3" {
617 s += "VecAsScalar"
618 } else {
619 panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
620 }
621 }
622 if i == 0 || op.Go != ops[i-1].Go {
623 if unicode.IsUpper([]rune(op.Go)[0]) {
624 fmt.Fprintf(f, "\n/* %s */\n", op.Go)
625 } else {
626 fmt.Fprintf(fI, "\n/* %s */\n", op.Go)
627 }
628 }
629 if unicode.IsUpper([]rune(op.Go)[0]) {
630 if err := t.ExecuteTemplate(f, s, op); err != nil {
631 panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
632 }
633 } else {
634 if err := t.ExecuteTemplate(fI, s, op); err != nil {
635 panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
636 }
637 }
638 } else {
639 panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
640 }
641 }
642
643 vectorConversions := vConvertFromTypeMap(typeMap)
644 for _, conv := range vectorConversions {
645 if err := t.ExecuteTemplate(f, "vectorConversion", conv); err != nil {
646 panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
647 }
648 }
649
650 masks := masksFromTypeMap(typeMap)
651 for _, mask := range masks {
652 if err := t.ExecuteTemplate(f, "mask", mask); err != nil {
653 panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
654 }
655 }
656
657 return
658 }
659
View as plain text