1
2
3
4
5 package ssagen
6
7 import (
8 "fmt"
9 "internal/abi"
10 "internal/buildcfg"
11
12 "cmd/compile/internal/base"
13 "cmd/compile/internal/ir"
14 "cmd/compile/internal/ssa"
15 "cmd/compile/internal/typecheck"
16 "cmd/compile/internal/types"
17 "cmd/internal/sys"
18 )
19
20 var intrinsics intrinsicBuilders
21
22
23
24 type intrinsicBuilder func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value
25
26 type intrinsicKey struct {
27 arch *sys.Arch
28 pkg string
29 fn string
30 }
31
32
33 type intrinsicBuildConfig struct {
34 instrumenting bool
35
36 go386 string
37 goamd64 int
38 goarm buildcfg.GoarmFeatures
39 goarm64 buildcfg.Goarm64Features
40 gomips string
41 gomips64 string
42 goppc64 int
43 goriscv64 int
44 }
45
46 type intrinsicBuilders map[intrinsicKey]intrinsicBuilder
47
48
49 func (ib intrinsicBuilders) add(arch *sys.Arch, pkg, fn string, b intrinsicBuilder) {
50 if _, found := ib[intrinsicKey{arch, pkg, fn}]; found {
51 panic(fmt.Sprintf("intrinsic already exists for %v.%v on %v", pkg, fn, arch.Name))
52 }
53 ib[intrinsicKey{arch, pkg, fn}] = b
54 }
55
56
57 func (ib intrinsicBuilders) addForArchs(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
58 for _, arch := range archs {
59 ib.add(arch, pkg, fn, b)
60 }
61 }
62
63
64 func (ib intrinsicBuilders) addForFamilies(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
65 for _, arch := range sys.Archs {
66 if arch.InFamily(archFamilies...) {
67 intrinsics.add(arch, pkg, fn, b)
68 }
69 }
70 }
71
72
73
74 func (ib intrinsicBuilders) alias(pkg, fn, targetPkg, targetFn string, archs ...*sys.Arch) {
75
76
77 aliased := false
78 for _, arch := range archs {
79 if b := intrinsics.lookup(arch, targetPkg, targetFn); b != nil {
80 intrinsics.add(arch, pkg, fn, b)
81 aliased = true
82 }
83 }
84 if !aliased {
85 panic(fmt.Sprintf("attempted to alias undefined intrinsic: %s.%s", pkg, fn))
86 }
87 }
88
89
90 func (ib intrinsicBuilders) lookup(arch *sys.Arch, pkg, fn string) intrinsicBuilder {
91 return intrinsics[intrinsicKey{arch, pkg, fn}]
92 }
93
94 func initIntrinsics(cfg *intrinsicBuildConfig) {
95 if cfg == nil {
96 cfg = &intrinsicBuildConfig{
97 instrumenting: base.Flag.Cfg.Instrumenting,
98 go386: buildcfg.GO386,
99 goamd64: buildcfg.GOAMD64,
100 goarm: buildcfg.GOARM,
101 goarm64: buildcfg.GOARM64,
102 gomips: buildcfg.GOMIPS,
103 gomips64: buildcfg.GOMIPS64,
104 goppc64: buildcfg.GOPPC64,
105 goriscv64: buildcfg.GORISCV64,
106 }
107 }
108 intrinsics = intrinsicBuilders{}
109
110 var p4 []*sys.Arch
111 var p8 []*sys.Arch
112 var lwatomics []*sys.Arch
113 for _, a := range sys.Archs {
114 if a.PtrSize == 4 {
115 p4 = append(p4, a)
116 } else {
117 p8 = append(p8, a)
118 }
119 if a.Family != sys.PPC64 {
120 lwatomics = append(lwatomics, a)
121 }
122 }
123 all := sys.Archs[:]
124
125 add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
126 intrinsics.addForArchs(pkg, fn, b, archs...)
127 }
128 addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
129 intrinsics.addForFamilies(pkg, fn, b, archFamilies...)
130 }
131 alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) {
132 intrinsics.alias(pkg, fn, pkg2, fn2, archs...)
133 }
134
135
136 if !cfg.instrumenting {
137 add("runtime", "slicebytetostringtmp",
138 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
139
140
141
142 return s.newValue2(ssa.OpStringMake, n.Type(), args[0], args[1])
143 },
144 all...)
145 }
146 addF("internal/runtime/math", "MulUintptr",
147 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
148 if s.config.PtrSize == 4 {
149 return s.newValue2(ssa.OpMul32uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
150 }
151 return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
152 },
153 sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
154 add("runtime", "KeepAlive",
155 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
156 data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
157 s.vars[memVar] = s.newValue2(ssa.OpKeepAlive, types.TypeMem, data, s.mem())
158 return nil
159 },
160 all...)
161
162 addF("runtime", "publicationBarrier",
163 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
164 s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
165 return nil
166 },
167 sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64)
168
169
170 add("internal/runtime/sys", "GetCallerPC",
171 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
172 return s.newValue0(ssa.OpGetCallerPC, s.f.Config.Types.Uintptr)
173 },
174 all...)
175
176 add("internal/runtime/sys", "GetCallerSP",
177 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
178 return s.newValue1(ssa.OpGetCallerSP, s.f.Config.Types.Uintptr, s.mem())
179 },
180 all...)
181
182 add("internal/runtime/sys", "GetClosurePtr",
183 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
184 return s.newValue0(ssa.OpGetClosurePtr, s.f.Config.Types.Uintptr)
185 },
186 all...)
187
188 addF("internal/runtime/sys", "Bswap32",
189 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
190 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
191 },
192 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X)
193 addF("internal/runtime/sys", "Bswap64",
194 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
195 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
196 },
197 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X)
198
199 addF("runtime", "memequal",
200 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
201 return s.newValue4(ssa.OpMemEq, s.f.Config.Types.Bool, args[0], args[1], args[2], s.mem())
202 },
203 sys.ARM64)
204
205 if cfg.goppc64 >= 10 {
206
207
208 addF("internal/runtime/sys", "Bswap32",
209 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
210 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
211 },
212 sys.PPC64)
213 addF("internal/runtime/sys", "Bswap64",
214 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
215 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
216 },
217 sys.PPC64)
218 }
219
220 if cfg.goriscv64 >= 22 {
221 addF("internal/runtime/sys", "Bswap32",
222 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
223 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
224 },
225 sys.RISCV64)
226 addF("internal/runtime/sys", "Bswap64",
227 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
228 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
229 },
230 sys.RISCV64)
231 }
232
233
234 makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
235 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
236 s.vars[memVar] = s.newValue2(op, types.TypeMem, args[0], s.mem())
237 return nil
238 }
239 }
240
241
242
243 addF("internal/runtime/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache),
244 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
245 addF("internal/runtime/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed),
246 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
247
248
249 type atomicOpEmitter func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool)
250
251 addF("internal/runtime/atomic", "Load",
252 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
253 v := s.newValue2(ssa.OpAtomicLoad32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
254 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
255 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
256 },
257 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
258 addF("internal/runtime/atomic", "Load8",
259 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
260 v := s.newValue2(ssa.OpAtomicLoad8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], s.mem())
261 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
262 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
263 },
264 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
265 addF("internal/runtime/atomic", "Load64",
266 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
267 v := s.newValue2(ssa.OpAtomicLoad64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
268 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
269 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
270 },
271 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
272 addF("internal/runtime/atomic", "LoadAcq",
273 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
274 v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
275 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
276 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
277 },
278 sys.PPC64)
279 addF("internal/runtime/atomic", "LoadAcq64",
280 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
281 v := s.newValue2(ssa.OpAtomicLoadAcq64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
282 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
283 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
284 },
285 sys.PPC64)
286 addF("internal/runtime/atomic", "Loadp",
287 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
288 v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
289 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
290 return s.newValue1(ssa.OpSelect0, s.f.Config.Types.BytePtr, v)
291 },
292 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
293
294 addF("internal/runtime/atomic", "Store",
295 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
296 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32, types.TypeMem, args[0], args[1], s.mem())
297 return nil
298 },
299 sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
300 addF("internal/runtime/atomic", "Store8",
301 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
302 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore8, types.TypeMem, args[0], args[1], s.mem())
303 return nil
304 },
305 sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
306 addF("internal/runtime/atomic", "Store64",
307 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
308 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64, types.TypeMem, args[0], args[1], s.mem())
309 return nil
310 },
311 sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
312 addF("internal/runtime/atomic", "StorepNoWB",
313 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
314 s.vars[memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, types.TypeMem, args[0], args[1], s.mem())
315 return nil
316 },
317 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.RISCV64, sys.S390X)
318 addF("internal/runtime/atomic", "StoreRel",
319 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
320 s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
321 return nil
322 },
323 sys.PPC64)
324 addF("internal/runtime/atomic", "StoreRel64",
325 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
326 s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel64, types.TypeMem, args[0], args[1], s.mem())
327 return nil
328 },
329 sys.PPC64)
330
331 makeAtomicStoreGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
332 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
333
334 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
335 v := s.load(types.Types[types.TBOOL], addr)
336 b := s.endBlock()
337 b.Kind = ssa.BlockIf
338 b.SetControl(v)
339 bTrue := s.f.NewBlock(ssa.BlockPlain)
340 bFalse := s.f.NewBlock(ssa.BlockPlain)
341 bEnd := s.f.NewBlock(ssa.BlockPlain)
342 b.AddEdgeTo(bTrue)
343 b.AddEdgeTo(bFalse)
344 b.Likely = ssa.BranchLikely
345
346
347 s.startBlock(bTrue)
348 emit(s, n, args, op1, typ, false)
349 s.endBlock().AddEdgeTo(bEnd)
350
351
352 s.startBlock(bFalse)
353 emit(s, n, args, op0, typ, false)
354 s.endBlock().AddEdgeTo(bEnd)
355
356
357 s.startBlock(bEnd)
358
359 return nil
360 }
361 }
362
363 atomicStoreEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
364 v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
365 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
366 if needReturn {
367 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
368 }
369 }
370
371 addF("internal/runtime/atomic", "Store8",
372 makeAtomicStoreGuardedIntrinsicLoong64(ssa.OpAtomicStore8, ssa.OpAtomicStore8Variant, types.TUINT8, atomicStoreEmitterLoong64),
373 sys.Loong64)
374 addF("internal/runtime/atomic", "Store",
375 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
376 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32Variant, types.TypeMem, args[0], args[1], s.mem())
377 return nil
378 },
379 sys.Loong64)
380 addF("internal/runtime/atomic", "Store64",
381 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
382 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64Variant, types.TypeMem, args[0], args[1], s.mem())
383 return nil
384 },
385 sys.Loong64)
386
387 addF("internal/runtime/atomic", "Xchg8",
388 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
389 v := s.newValue3(ssa.OpAtomicExchange8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
390 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
391 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
392 },
393 sys.AMD64, sys.PPC64)
394 addF("internal/runtime/atomic", "Xchg",
395 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
396 v := s.newValue3(ssa.OpAtomicExchange32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
397 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
398 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
399 },
400 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
401 addF("internal/runtime/atomic", "Xchg64",
402 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
403 v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
404 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
405 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
406 },
407 sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
408
409 makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder {
410
411 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
412 if cfg.goarm64.LSE {
413 emit(s, n, args, op1, typ, needReturn)
414 } else {
415
416 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
417 v := s.load(types.Types[types.TBOOL], addr)
418 b := s.endBlock()
419 b.Kind = ssa.BlockIf
420 b.SetControl(v)
421 bTrue := s.f.NewBlock(ssa.BlockPlain)
422 bFalse := s.f.NewBlock(ssa.BlockPlain)
423 bEnd := s.f.NewBlock(ssa.BlockPlain)
424 b.AddEdgeTo(bTrue)
425 b.AddEdgeTo(bFalse)
426 b.Likely = ssa.BranchLikely
427
428
429 s.startBlock(bTrue)
430 emit(s, n, args, op1, typ, needReturn)
431 s.endBlock().AddEdgeTo(bEnd)
432
433
434 s.startBlock(bFalse)
435 emit(s, n, args, op0, typ, needReturn)
436 s.endBlock().AddEdgeTo(bEnd)
437
438
439 s.startBlock(bEnd)
440 }
441 if needReturn {
442 return s.variable(n, types.Types[typ])
443 } else {
444 return nil
445 }
446 }
447 }
448 makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
449 return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, true)
450 }
451 makeAtomicGuardedIntrinsicARM64old := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
452 return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, false)
453 }
454
455 atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
456 v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
457 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
458 if needReturn {
459 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
460 }
461 }
462 addF("internal/runtime/atomic", "Xchg8",
463 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange8, ssa.OpAtomicExchange8Variant, types.TUINT8, atomicEmitterARM64),
464 sys.ARM64)
465 addF("internal/runtime/atomic", "Xchg",
466 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64),
467 sys.ARM64)
468 addF("internal/runtime/atomic", "Xchg64",
469 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64),
470 sys.ARM64)
471
472 makeAtomicXchg8GuardedIntrinsicLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
473 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
474 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
475 v := s.load(types.Types[types.TBOOL], addr)
476 b := s.endBlock()
477 b.Kind = ssa.BlockIf
478 b.SetControl(v)
479 bTrue := s.f.NewBlock(ssa.BlockPlain)
480 bFalse := s.f.NewBlock(ssa.BlockPlain)
481 bEnd := s.f.NewBlock(ssa.BlockPlain)
482 b.AddEdgeTo(bTrue)
483 b.AddEdgeTo(bFalse)
484 b.Likely = ssa.BranchLikely
485
486
487 s.startBlock(bTrue)
488 s.vars[n] = s.newValue3(op, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
489 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, s.vars[n])
490 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], s.vars[n])
491 s.endBlock().AddEdgeTo(bEnd)
492
493
494 s.startBlock(bFalse)
495 s.vars[n] = s.callResult(n, callNormal)
496 s.endBlock().AddEdgeTo(bEnd)
497
498
499 s.startBlock(bEnd)
500 return s.variable(n, types.Types[types.TUINT8])
501 }
502 }
503 addF("internal/runtime/atomic", "Xchg8",
504 makeAtomicXchg8GuardedIntrinsicLoong64(ssa.OpAtomicExchange8Variant),
505 sys.Loong64)
506
507 addF("internal/runtime/atomic", "Xadd",
508 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
509 v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
510 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
511 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
512 },
513 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
514 addF("internal/runtime/atomic", "Xadd64",
515 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
516 v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
517 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
518 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
519 },
520 sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
521
522 addF("internal/runtime/atomic", "Xadd",
523 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64),
524 sys.ARM64)
525 addF("internal/runtime/atomic", "Xadd64",
526 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64),
527 sys.ARM64)
528
529 addF("internal/runtime/atomic", "Cas",
530 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
531 v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
532 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
533 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
534 },
535 sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
536 addF("internal/runtime/atomic", "Cas64",
537 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
538 v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
539 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
540 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
541 },
542 sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
543 addF("internal/runtime/atomic", "CasRel",
544 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
545 v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
546 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
547 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
548 },
549 sys.PPC64)
550
551 atomicCasEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
552 v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
553 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
554 if needReturn {
555 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
556 }
557 }
558
559 addF("internal/runtime/atomic", "Cas",
560 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, types.TBOOL, atomicCasEmitterARM64),
561 sys.ARM64)
562 addF("internal/runtime/atomic", "Cas64",
563 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64),
564 sys.ARM64)
565
566 atomicCasEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
567 v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
568 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
569 if needReturn {
570 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
571 }
572 }
573
574 makeAtomicCasGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, emit atomicOpEmitter) intrinsicBuilder {
575 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
576
577 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAMCAS, s.sb)
578 v := s.load(types.Types[types.TBOOL], addr)
579 b := s.endBlock()
580 b.Kind = ssa.BlockIf
581 b.SetControl(v)
582 bTrue := s.f.NewBlock(ssa.BlockPlain)
583 bFalse := s.f.NewBlock(ssa.BlockPlain)
584 bEnd := s.f.NewBlock(ssa.BlockPlain)
585 b.AddEdgeTo(bTrue)
586 b.AddEdgeTo(bFalse)
587 b.Likely = ssa.BranchLikely
588
589
590 s.startBlock(bTrue)
591 emit(s, n, args, op1, types.TBOOL, true)
592 s.endBlock().AddEdgeTo(bEnd)
593
594
595 s.startBlock(bFalse)
596 emit(s, n, args, op0, types.TBOOL, true)
597 s.endBlock().AddEdgeTo(bEnd)
598
599
600 s.startBlock(bEnd)
601
602 return s.variable(n, types.Types[types.TBOOL])
603 }
604 }
605
606 addF("internal/runtime/atomic", "Cas",
607 makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, atomicCasEmitterLoong64),
608 sys.Loong64)
609 addF("internal/runtime/atomic", "Cas64",
610 makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, atomicCasEmitterLoong64),
611 sys.Loong64)
612
613
614 addF("internal/runtime/atomic", "And8",
615 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
616 s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
617 return nil
618 },
619 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
620 addF("internal/runtime/atomic", "And",
621 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
622 s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
623 return nil
624 },
625 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
626 addF("internal/runtime/atomic", "Or8",
627 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
628 s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
629 return nil
630 },
631 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
632 addF("internal/runtime/atomic", "Or",
633 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
634 s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
635 return nil
636 },
637 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
638
639
640
641 addF("internal/runtime/atomic", "And8",
642 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd8value, ssa.OpAtomicAnd8valueVariant, types.TUINT8, atomicEmitterARM64),
643 sys.ARM64)
644 addF("internal/runtime/atomic", "Or8",
645 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr8value, ssa.OpAtomicOr8valueVariant, types.TUINT8, atomicEmitterARM64),
646 sys.ARM64)
647 addF("internal/runtime/atomic", "And64",
648 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64value, ssa.OpAtomicAnd64valueVariant, types.TUINT64, atomicEmitterARM64),
649 sys.ARM64)
650 addF("internal/runtime/atomic", "And32",
651 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
652 sys.ARM64)
653 addF("internal/runtime/atomic", "And",
654 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
655 sys.ARM64)
656 addF("internal/runtime/atomic", "Or64",
657 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64value, ssa.OpAtomicOr64valueVariant, types.TUINT64, atomicEmitterARM64),
658 sys.ARM64)
659 addF("internal/runtime/atomic", "Or32",
660 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
661 sys.ARM64)
662 addF("internal/runtime/atomic", "Or",
663 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
664 sys.ARM64)
665
666
667 addF("internal/runtime/atomic", "And64",
668 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
669 v := s.newValue3(ssa.OpAtomicAnd64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
670 p0, p1 := s.split(v)
671 s.vars[memVar] = p1
672 return p0
673 },
674 sys.AMD64, sys.Loong64)
675 addF("internal/runtime/atomic", "And32",
676 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
677 v := s.newValue3(ssa.OpAtomicAnd32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
678 p0, p1 := s.split(v)
679 s.vars[memVar] = p1
680 return p0
681 },
682 sys.AMD64, sys.Loong64)
683 addF("internal/runtime/atomic", "Or64",
684 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
685 v := s.newValue3(ssa.OpAtomicOr64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
686 p0, p1 := s.split(v)
687 s.vars[memVar] = p1
688 return p0
689 },
690 sys.AMD64, sys.Loong64)
691 addF("internal/runtime/atomic", "Or32",
692 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
693 v := s.newValue3(ssa.OpAtomicOr32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
694 p0, p1 := s.split(v)
695 s.vars[memVar] = p1
696 return p0
697 },
698 sys.AMD64, sys.Loong64)
699
700
701 alias("internal/runtime/atomic", "Loadint32", "internal/runtime/atomic", "Load", all...)
702 alias("internal/runtime/atomic", "Loadint64", "internal/runtime/atomic", "Load64", all...)
703 alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load", p4...)
704 alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load64", p8...)
705 alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load", p4...)
706 alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load64", p8...)
707 alias("internal/runtime/atomic", "LoadAcq", "internal/runtime/atomic", "Load", lwatomics...)
708 alias("internal/runtime/atomic", "LoadAcq64", "internal/runtime/atomic", "Load64", lwatomics...)
709 alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...)
710 alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...)
711 alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...)
712 alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...)
713
714
715 alias("internal/runtime/atomic", "Storeint32", "internal/runtime/atomic", "Store", all...)
716 alias("internal/runtime/atomic", "Storeint64", "internal/runtime/atomic", "Store64", all...)
717 alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store", p4...)
718 alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store64", p8...)
719 alias("internal/runtime/atomic", "StoreRel", "internal/runtime/atomic", "Store", lwatomics...)
720 alias("internal/runtime/atomic", "StoreRel64", "internal/runtime/atomic", "Store64", lwatomics...)
721 alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...)
722 alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...)
723 alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...)
724 alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...)
725
726
727 alias("internal/runtime/atomic", "Xchgint32", "internal/runtime/atomic", "Xchg", all...)
728 alias("internal/runtime/atomic", "Xchgint64", "internal/runtime/atomic", "Xchg64", all...)
729 alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg", p4...)
730 alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg64", p8...)
731
732
733 alias("internal/runtime/atomic", "Xaddint32", "internal/runtime/atomic", "Xadd", all...)
734 alias("internal/runtime/atomic", "Xaddint64", "internal/runtime/atomic", "Xadd64", all...)
735 alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd", p4...)
736 alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd64", p8...)
737
738
739 alias("internal/runtime/atomic", "Casint32", "internal/runtime/atomic", "Cas", all...)
740 alias("internal/runtime/atomic", "Casint64", "internal/runtime/atomic", "Cas64", all...)
741 alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas", p4...)
742 alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas64", p8...)
743 alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas", p4...)
744 alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...)
745 alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...)
746
747
748 alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchLoong64)
749 alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchLoong64)
750
751
752 addF("math", "sqrt",
753 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
754 return s.newValue1(ssa.OpSqrt, types.Types[types.TFLOAT64], args[0])
755 },
756 sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
757 addF("math", "Trunc",
758 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
759 return s.newValue1(ssa.OpTrunc, types.Types[types.TFLOAT64], args[0])
760 },
761 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
762 addF("math", "Ceil",
763 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
764 return s.newValue1(ssa.OpCeil, types.Types[types.TFLOAT64], args[0])
765 },
766 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
767 addF("math", "Floor",
768 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
769 return s.newValue1(ssa.OpFloor, types.Types[types.TFLOAT64], args[0])
770 },
771 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
772 addF("math", "Round",
773 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
774 return s.newValue1(ssa.OpRound, types.Types[types.TFLOAT64], args[0])
775 },
776 sys.ARM64, sys.PPC64, sys.S390X)
777 addF("math", "RoundToEven",
778 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
779 return s.newValue1(ssa.OpRoundToEven, types.Types[types.TFLOAT64], args[0])
780 },
781 sys.ARM64, sys.S390X, sys.Wasm)
782 addF("math", "Abs",
783 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
784 return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
785 },
786 sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
787 addF("math", "Copysign",
788 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
789 return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
790 },
791 sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm)
792 addF("math", "FMA",
793 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
794 return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
795 },
796 sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X)
797 addF("math", "FMA",
798 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
799 if cfg.goamd64 >= 3 {
800 return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
801 }
802
803 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasFMA)
804 b := s.endBlock()
805 b.Kind = ssa.BlockIf
806 b.SetControl(v)
807 bTrue := s.f.NewBlock(ssa.BlockPlain)
808 bFalse := s.f.NewBlock(ssa.BlockPlain)
809 bEnd := s.f.NewBlock(ssa.BlockPlain)
810 b.AddEdgeTo(bTrue)
811 b.AddEdgeTo(bFalse)
812 b.Likely = ssa.BranchLikely
813
814
815 s.startBlock(bTrue)
816 s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
817 s.endBlock().AddEdgeTo(bEnd)
818
819
820 s.startBlock(bFalse)
821 s.vars[n] = s.callResult(n, callNormal)
822 s.endBlock().AddEdgeTo(bEnd)
823
824
825 s.startBlock(bEnd)
826 return s.variable(n, types.Types[types.TFLOAT64])
827 },
828 sys.AMD64)
829 addF("math", "FMA",
830 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
831 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARMHasVFPv4, s.sb)
832 v := s.load(types.Types[types.TBOOL], addr)
833 b := s.endBlock()
834 b.Kind = ssa.BlockIf
835 b.SetControl(v)
836 bTrue := s.f.NewBlock(ssa.BlockPlain)
837 bFalse := s.f.NewBlock(ssa.BlockPlain)
838 bEnd := s.f.NewBlock(ssa.BlockPlain)
839 b.AddEdgeTo(bTrue)
840 b.AddEdgeTo(bFalse)
841 b.Likely = ssa.BranchLikely
842
843
844 s.startBlock(bTrue)
845 s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
846 s.endBlock().AddEdgeTo(bEnd)
847
848
849 s.startBlock(bFalse)
850 s.vars[n] = s.callResult(n, callNormal)
851 s.endBlock().AddEdgeTo(bEnd)
852
853
854 s.startBlock(bEnd)
855 return s.variable(n, types.Types[types.TFLOAT64])
856 },
857 sys.ARM)
858
859 makeRoundAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
860 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
861 if cfg.goamd64 >= 2 {
862 return s.newValue1(op, types.Types[types.TFLOAT64], args[0])
863 }
864
865 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasSSE41)
866 b := s.endBlock()
867 b.Kind = ssa.BlockIf
868 b.SetControl(v)
869 bTrue := s.f.NewBlock(ssa.BlockPlain)
870 bFalse := s.f.NewBlock(ssa.BlockPlain)
871 bEnd := s.f.NewBlock(ssa.BlockPlain)
872 b.AddEdgeTo(bTrue)
873 b.AddEdgeTo(bFalse)
874 b.Likely = ssa.BranchLikely
875
876
877 s.startBlock(bTrue)
878 s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0])
879 s.endBlock().AddEdgeTo(bEnd)
880
881
882 s.startBlock(bFalse)
883 s.vars[n] = s.callResult(n, callNormal)
884 s.endBlock().AddEdgeTo(bEnd)
885
886
887 s.startBlock(bEnd)
888 return s.variable(n, types.Types[types.TFLOAT64])
889 }
890 }
891 addF("math", "RoundToEven",
892 makeRoundAMD64(ssa.OpRoundToEven),
893 sys.AMD64)
894 addF("math", "Floor",
895 makeRoundAMD64(ssa.OpFloor),
896 sys.AMD64)
897 addF("math", "Ceil",
898 makeRoundAMD64(ssa.OpCeil),
899 sys.AMD64)
900 addF("math", "Trunc",
901 makeRoundAMD64(ssa.OpTrunc),
902 sys.AMD64)
903
904
905 addF("math/bits", "TrailingZeros64",
906 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
907 return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
908 },
909 sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
910 addF("math/bits", "TrailingZeros64",
911 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
912 lo := s.newValue1(ssa.OpInt64Lo, types.Types[types.TUINT32], args[0])
913 hi := s.newValue1(ssa.OpInt64Hi, types.Types[types.TUINT32], args[0])
914 return s.newValue2(ssa.OpCtz64On32, types.Types[types.TINT], lo, hi)
915 },
916 sys.I386)
917 addF("math/bits", "TrailingZeros32",
918 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
919 return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
920 },
921 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
922 addF("math/bits", "TrailingZeros16",
923 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
924 return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
925 },
926 sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm)
927 addF("math/bits", "TrailingZeros8",
928 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
929 return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
930 },
931 sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm)
932
933 if cfg.goriscv64 >= 22 {
934 addF("math/bits", "TrailingZeros64",
935 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
936 return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
937 },
938 sys.RISCV64)
939 addF("math/bits", "TrailingZeros32",
940 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
941 return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
942 },
943 sys.RISCV64)
944 addF("math/bits", "TrailingZeros16",
945 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
946 return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
947 },
948 sys.RISCV64)
949 addF("math/bits", "TrailingZeros8",
950 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
951 return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
952 },
953 sys.RISCV64)
954 }
955
956
957 alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
958 alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
959
960 addF("math/bits", "ReverseBytes16",
961 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
962 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
963 },
964 sys.Loong64)
965 if cfg.goppc64 >= 10 {
966
967 addF("math/bits", "ReverseBytes16",
968 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
969 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0])
970 },
971 sys.PPC64)
972 }
973 if cfg.goriscv64 >= 22 {
974 addF("math/bits", "ReverseBytes16",
975 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
976 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
977 },
978 sys.RISCV64)
979 }
980
981 addF("math/bits", "Len64",
982 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
983 return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
984 },
985 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
986 addF("math/bits", "Len32",
987 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
988 return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
989 },
990 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
991 addF("math/bits", "Len16",
992 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
993 return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
994 },
995 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
996 addF("math/bits", "Len8",
997 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
998 return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
999 },
1000 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
1001
1002 if cfg.goriscv64 >= 22 {
1003 addF("math/bits", "Len64",
1004 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1005 return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
1006 },
1007 sys.RISCV64)
1008 addF("math/bits", "Len32",
1009 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1010 return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
1011 },
1012 sys.RISCV64)
1013 addF("math/bits", "Len16",
1014 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1015 return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
1016 },
1017 sys.RISCV64)
1018 addF("math/bits", "Len8",
1019 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1020 return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
1021 },
1022 sys.RISCV64)
1023 }
1024
1025 alias("math/bits", "Len", "math/bits", "Len64", p8...)
1026 alias("math/bits", "Len", "math/bits", "Len32", p4...)
1027
1028
1029 addF("math/bits", "Reverse64",
1030 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1031 return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
1032 },
1033 sys.ARM64, sys.Loong64)
1034 addF("math/bits", "Reverse32",
1035 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1036 return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0])
1037 },
1038 sys.ARM64, sys.Loong64)
1039 addF("math/bits", "Reverse16",
1040 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1041 return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0])
1042 },
1043 sys.ARM64, sys.Loong64)
1044 addF("math/bits", "Reverse8",
1045 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1046 return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0])
1047 },
1048 sys.ARM64, sys.Loong64)
1049 addF("math/bits", "Reverse",
1050 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1051 return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
1052 },
1053 sys.ARM64, sys.Loong64)
1054 addF("math/bits", "RotateLeft8",
1055 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1056 return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1])
1057 },
1058 sys.AMD64, sys.RISCV64)
1059 addF("math/bits", "RotateLeft16",
1060 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1061 return s.newValue2(ssa.OpRotateLeft16, types.Types[types.TUINT16], args[0], args[1])
1062 },
1063 sys.AMD64, sys.RISCV64)
1064 addF("math/bits", "RotateLeft32",
1065 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1066 return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
1067 },
1068 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
1069 addF("math/bits", "RotateLeft64",
1070 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1071 return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])
1072 },
1073 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
1074 alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
1075
1076 makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1077 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1078 if cfg.goamd64 >= 2 {
1079 return s.newValue1(op, types.Types[types.TINT], args[0])
1080 }
1081
1082 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasPOPCNT)
1083 b := s.endBlock()
1084 b.Kind = ssa.BlockIf
1085 b.SetControl(v)
1086 bTrue := s.f.NewBlock(ssa.BlockPlain)
1087 bFalse := s.f.NewBlock(ssa.BlockPlain)
1088 bEnd := s.f.NewBlock(ssa.BlockPlain)
1089 b.AddEdgeTo(bTrue)
1090 b.AddEdgeTo(bFalse)
1091 b.Likely = ssa.BranchLikely
1092
1093
1094 s.startBlock(bTrue)
1095 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1096 s.endBlock().AddEdgeTo(bEnd)
1097
1098
1099 s.startBlock(bFalse)
1100 s.vars[n] = s.callResult(n, callNormal)
1101 s.endBlock().AddEdgeTo(bEnd)
1102
1103
1104 s.startBlock(bEnd)
1105 return s.variable(n, types.Types[types.TINT])
1106 }
1107 }
1108
1109 makeOnesCountLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1110 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1111 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
1112 v := s.load(types.Types[types.TBOOL], addr)
1113 b := s.endBlock()
1114 b.Kind = ssa.BlockIf
1115 b.SetControl(v)
1116 bTrue := s.f.NewBlock(ssa.BlockPlain)
1117 bFalse := s.f.NewBlock(ssa.BlockPlain)
1118 bEnd := s.f.NewBlock(ssa.BlockPlain)
1119 b.AddEdgeTo(bTrue)
1120 b.AddEdgeTo(bFalse)
1121 b.Likely = ssa.BranchLikely
1122
1123
1124 s.startBlock(bTrue)
1125 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1126 s.endBlock().AddEdgeTo(bEnd)
1127
1128
1129 s.startBlock(bFalse)
1130 s.vars[n] = s.callResult(n, callNormal)
1131 s.endBlock().AddEdgeTo(bEnd)
1132
1133
1134 s.startBlock(bEnd)
1135 return s.variable(n, types.Types[types.TINT])
1136 }
1137 }
1138
1139 makeOnesCountRISCV64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1140 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1141 if cfg.goriscv64 >= 22 {
1142 return s.newValue1(op, types.Types[types.TINT], args[0])
1143 }
1144
1145 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.RISCV64HasZbb, s.sb)
1146 v := s.load(types.Types[types.TBOOL], addr)
1147 b := s.endBlock()
1148 b.Kind = ssa.BlockIf
1149 b.SetControl(v)
1150 bTrue := s.f.NewBlock(ssa.BlockPlain)
1151 bFalse := s.f.NewBlock(ssa.BlockPlain)
1152 bEnd := s.f.NewBlock(ssa.BlockPlain)
1153 b.AddEdgeTo(bTrue)
1154 b.AddEdgeTo(bFalse)
1155 b.Likely = ssa.BranchLikely
1156
1157
1158 s.startBlock(bTrue)
1159 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1160 s.endBlock().AddEdgeTo(bEnd)
1161
1162
1163 s.startBlock(bFalse)
1164 s.vars[n] = s.callResult(n, callNormal)
1165 s.endBlock().AddEdgeTo(bEnd)
1166
1167
1168 s.startBlock(bEnd)
1169 return s.variable(n, types.Types[types.TINT])
1170 }
1171 }
1172
1173 addF("math/bits", "OnesCount64",
1174 makeOnesCountAMD64(ssa.OpPopCount64),
1175 sys.AMD64)
1176 addF("math/bits", "OnesCount64",
1177 makeOnesCountLoong64(ssa.OpPopCount64),
1178 sys.Loong64)
1179 addF("math/bits", "OnesCount64",
1180 makeOnesCountRISCV64(ssa.OpPopCount64),
1181 sys.RISCV64)
1182 addF("math/bits", "OnesCount64",
1183 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1184 return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
1185 },
1186 sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
1187 addF("math/bits", "OnesCount32",
1188 makeOnesCountAMD64(ssa.OpPopCount32),
1189 sys.AMD64)
1190 addF("math/bits", "OnesCount32",
1191 makeOnesCountLoong64(ssa.OpPopCount32),
1192 sys.Loong64)
1193 addF("math/bits", "OnesCount32",
1194 makeOnesCountRISCV64(ssa.OpPopCount32),
1195 sys.RISCV64)
1196 addF("math/bits", "OnesCount32",
1197 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1198 return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
1199 },
1200 sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
1201 addF("math/bits", "OnesCount16",
1202 makeOnesCountAMD64(ssa.OpPopCount16),
1203 sys.AMD64)
1204 addF("math/bits", "OnesCount16",
1205 makeOnesCountLoong64(ssa.OpPopCount16),
1206 sys.Loong64)
1207 addF("math/bits", "OnesCount16",
1208 makeOnesCountRISCV64(ssa.OpPopCount16),
1209 sys.RISCV64)
1210 addF("math/bits", "OnesCount16",
1211 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1212 return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
1213 },
1214 sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
1215 addF("math/bits", "OnesCount8",
1216 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1217 return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0])
1218 },
1219 sys.S390X, sys.PPC64, sys.Wasm)
1220
1221 if cfg.goriscv64 >= 22 {
1222 addF("math/bits", "OnesCount8",
1223 makeOnesCountRISCV64(ssa.OpPopCount8),
1224 sys.RISCV64)
1225 }
1226
1227 alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...)
1228
1229 add("math/bits", "Mul64",
1230 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1231 return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
1232 },
1233 all...)
1234 alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
1235 alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...)
1236 addF("math/bits", "Add64",
1237 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1238 return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1239 },
1240 sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
1241 alias("math/bits", "Add", "math/bits", "Add64", p8...)
1242 alias("internal/runtime/math", "Add64", "math/bits", "Add64", all...)
1243 addF("math/bits", "Sub64",
1244 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1245 return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1246 },
1247 sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
1248 alias("math/bits", "Sub", "math/bits", "Sub64", p8...)
1249 addF("math/bits", "Div64",
1250 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1251
1252 cmpZero := s.newValue2(s.ssaOp(ir.ONE, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[2], s.zeroVal(types.Types[types.TUINT64]))
1253 s.check(cmpZero, ir.Syms.Panicdivide)
1254 cmpOverflow := s.newValue2(s.ssaOp(ir.OLT, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[0], args[2])
1255 s.check(cmpOverflow, ir.Syms.Panicoverflow)
1256 return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1257 },
1258 sys.AMD64)
1259 alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
1260
1261 alias("internal/runtime/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...)
1262 alias("internal/runtime/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...)
1263 alias("internal/runtime/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...)
1264 alias("internal/runtime/sys", "Len8", "math/bits", "Len8", all...)
1265 alias("internal/runtime/sys", "Len64", "math/bits", "Len64", all...)
1266 alias("internal/runtime/sys", "OnesCount64", "math/bits", "OnesCount64", all...)
1267
1268
1269
1270
1271 alias("sync/atomic", "LoadInt32", "internal/runtime/atomic", "Load", all...)
1272 alias("sync/atomic", "LoadInt64", "internal/runtime/atomic", "Load64", all...)
1273 alias("sync/atomic", "LoadPointer", "internal/runtime/atomic", "Loadp", all...)
1274 alias("sync/atomic", "LoadUint32", "internal/runtime/atomic", "Load", all...)
1275 alias("sync/atomic", "LoadUint64", "internal/runtime/atomic", "Load64", all...)
1276 alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load", p4...)
1277 alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load64", p8...)
1278
1279 alias("sync/atomic", "StoreInt32", "internal/runtime/atomic", "Store", all...)
1280 alias("sync/atomic", "StoreInt64", "internal/runtime/atomic", "Store64", all...)
1281
1282 alias("sync/atomic", "StoreUint32", "internal/runtime/atomic", "Store", all...)
1283 alias("sync/atomic", "StoreUint64", "internal/runtime/atomic", "Store64", all...)
1284 alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store", p4...)
1285 alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store64", p8...)
1286
1287 alias("sync/atomic", "SwapInt32", "internal/runtime/atomic", "Xchg", all...)
1288 alias("sync/atomic", "SwapInt64", "internal/runtime/atomic", "Xchg64", all...)
1289 alias("sync/atomic", "SwapUint32", "internal/runtime/atomic", "Xchg", all...)
1290 alias("sync/atomic", "SwapUint64", "internal/runtime/atomic", "Xchg64", all...)
1291 alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg", p4...)
1292 alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg64", p8...)
1293
1294 alias("sync/atomic", "CompareAndSwapInt32", "internal/runtime/atomic", "Cas", all...)
1295 alias("sync/atomic", "CompareAndSwapInt64", "internal/runtime/atomic", "Cas64", all...)
1296 alias("sync/atomic", "CompareAndSwapUint32", "internal/runtime/atomic", "Cas", all...)
1297 alias("sync/atomic", "CompareAndSwapUint64", "internal/runtime/atomic", "Cas64", all...)
1298 alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas", p4...)
1299 alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas64", p8...)
1300
1301 alias("sync/atomic", "AddInt32", "internal/runtime/atomic", "Xadd", all...)
1302 alias("sync/atomic", "AddInt64", "internal/runtime/atomic", "Xadd64", all...)
1303 alias("sync/atomic", "AddUint32", "internal/runtime/atomic", "Xadd", all...)
1304 alias("sync/atomic", "AddUint64", "internal/runtime/atomic", "Xadd64", all...)
1305 alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...)
1306 alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...)
1307
1308 alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1309 alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1310 alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1311 alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1312 alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1313 alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1314 alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1315 alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1316 alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1317 alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1318
1319
1320 alias("math/big", "mulWW", "math/bits", "Mul64", p8...)
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337 alias("internal/runtime/maps", "bitsetFirst", "internal/runtime/sys", "TrailingZeros64", sys.ArchAMD64)
1338
1339 addF("internal/runtime/maps", "bitsetRemoveBelow",
1340 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1341 b := args[0]
1342 i := args[1]
1343
1344
1345
1346
1347
1348 one := s.constInt64(types.Types[types.TUINT64], 1)
1349
1350 mask := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT64], one, i)
1351 mask = s.newValue2(ssa.OpSub64, types.Types[types.TUINT64], mask, one)
1352 mask = s.newValue1(ssa.OpCom64, types.Types[types.TUINT64], mask)
1353
1354 return s.newValue2(ssa.OpAnd64, types.Types[types.TUINT64], b, mask)
1355 },
1356 sys.AMD64)
1357
1358 addF("internal/runtime/maps", "bitsetLowestSet",
1359 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1360 b := args[0]
1361
1362
1363
1364
1365
1366 one := s.constInt64(types.Types[types.TUINT64], 1)
1367 and := s.newValue2(ssa.OpAnd64, types.Types[types.TUINT64], b, one)
1368 return s.newValue2(ssa.OpEq64, types.Types[types.TBOOL], and, one)
1369 },
1370 sys.AMD64)
1371
1372 addF("internal/runtime/maps", "bitsetShiftOutLowest",
1373 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1374 b := args[0]
1375
1376
1377
1378
1379
1380 one := s.constInt64(types.Types[types.TUINT64], 1)
1381 return s.newValue2(ssa.OpRsh64Ux64, types.Types[types.TUINT64], b, one)
1382 },
1383 sys.AMD64)
1384
1385 addF("internal/runtime/maps", "ctrlGroupMatchH2",
1386 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1387 g := args[0]
1388 h := args[1]
1389
1390
1391
1392 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1393 hfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, h)
1394
1395
1396 var broadcast *ssa.Value
1397 if buildcfg.GOAMD64 >= 4 {
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 broadcast = s.newValue1(ssa.OpAMD64VPBROADCASTB, types.TypeInt128, h)
1409 } else if buildcfg.GOAMD64 >= 2 {
1410
1411
1412 broadcast = s.newValue1(ssa.OpAMD64PSHUFBbroadcast, types.TypeInt128, hfp)
1413 } else {
1414
1415
1416
1417
1418
1419
1420
1421 unpack := s.newValue2(ssa.OpAMD64PUNPCKLBW, types.TypeInt128, hfp, hfp)
1422
1423
1424
1425
1426
1427
1428 broadcast = s.newValue1I(ssa.OpAMD64PSHUFLW, types.TypeInt128, 0, unpack)
1429
1430
1431
1432 }
1433
1434
1435
1436 eq := s.newValue2(ssa.OpAMD64PCMPEQB, types.TypeInt128, broadcast, gfp)
1437
1438
1439
1440
1441
1442
1443
1444
1445 out := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT16], eq)
1446
1447
1448
1449
1450
1451 ret := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1452
1453 return ret
1454 },
1455 sys.AMD64)
1456
1457 addF("internal/runtime/maps", "ctrlGroupMatchEmpty",
1458 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1459
1460
1461
1462
1463 g := args[0]
1464
1465
1466
1467 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1468
1469 if buildcfg.GOAMD64 >= 2 {
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 sign := s.newValue2(ssa.OpAMD64PSIGNB, types.TypeInt128, gfp, gfp)
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505 ret := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT16], sign)
1506
1507
1508
1509
1510
1511
1512 return ret
1513 }
1514
1515
1516
1517
1518 var ctrlsEmpty uint64 = abi.MapCtrlEmpty
1519 e := s.constInt64(types.Types[types.TUINT64], int64(ctrlsEmpty))
1520
1521
1522 efp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, e)
1523
1524
1525
1526 eq := s.newValue2(ssa.OpAMD64PCMPEQB, types.TypeInt128, efp, gfp)
1527
1528
1529
1530
1531
1532
1533
1534
1535 out := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT16], eq)
1536
1537
1538
1539
1540
1541
1542 return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1543 },
1544 sys.AMD64)
1545
1546 addF("internal/runtime/maps", "ctrlGroupMatchEmptyOrDeleted",
1547 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1548
1549
1550
1551
1552
1553
1554
1555 g := args[0]
1556
1557
1558
1559 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569 ret := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT16], gfp)
1570
1571
1572
1573
1574
1575 return ret
1576 },
1577 sys.AMD64)
1578
1579 addF("internal/runtime/maps", "ctrlGroupMatchFull",
1580 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1581
1582
1583
1584
1585
1586
1587 g := args[0]
1588
1589
1590
1591 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601 mask := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT16], gfp)
1602
1603
1604 out := s.newValue1(ssa.OpCom16, types.Types[types.TUINT16], mask)
1605
1606
1607
1608
1609 return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1610 },
1611 sys.AMD64)
1612
1613
1614
1615
1616 add("crypto/internal/constanttime", "Select",
1617 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1618 v, x, y := args[0], args[1], args[2]
1619
1620 var checkOp ssa.Op
1621 var zero *ssa.Value
1622 switch s.config.PtrSize {
1623 case 8:
1624 checkOp = ssa.OpNeq64
1625 zero = s.constInt64(types.Types[types.TINT], 0)
1626 case 4:
1627 checkOp = ssa.OpNeq32
1628 zero = s.constInt32(types.Types[types.TINT], 0)
1629 default:
1630 panic("unreachable")
1631 }
1632 check := s.newValue2(checkOp, types.Types[types.TBOOL], zero, v)
1633
1634 return s.newValue3(ssa.OpCondSelect, types.Types[types.TINT], x, y, check)
1635 },
1636 sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm)
1637 add("crypto/internal/constanttime", "boolToUint8",
1638 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1639 return s.newValue1(ssa.OpCvtBoolToUint8, types.Types[types.TUINT8], args[0])
1640 },
1641 all...)
1642
1643 if buildcfg.Experiment.SIMD {
1644
1645 simdIntrinsics(addF)
1646
1647 addF(simdPackage, "ClearAVXUpperBits",
1648 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1649 s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
1650 return nil
1651 },
1652 sys.AMD64)
1653
1654 addF(simdPackage, "Int8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1655 addF(simdPackage, "Int16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1656 addF(simdPackage, "Int32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1657 addF(simdPackage, "Int64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1658 addF(simdPackage, "Uint8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1659 addF(simdPackage, "Uint16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1660 addF(simdPackage, "Uint32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1661 addF(simdPackage, "Uint64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1662 addF(simdPackage, "Int8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1663 addF(simdPackage, "Int16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1664 addF(simdPackage, "Int32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1665 addF(simdPackage, "Int64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1666 addF(simdPackage, "Uint8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1667 addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1668 addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1669 addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1670
1671
1672 sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
1673 addF(simdPackage, method,
1674 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1675 x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
1676 if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
1677 z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
1678 if z != nil {
1679 return z
1680 }
1681 }
1682 return s.callResult(n, callNormal)
1683 },
1684 sys.AMD64)
1685 }
1686
1687 sfp4("Int32x4.SelectFromPair", ssa.OpconcatSelectedConstantInt32x4, types.TypeVec128)
1688 sfp4("Uint32x4.SelectFromPair", ssa.OpconcatSelectedConstantUint32x4, types.TypeVec128)
1689 sfp4("Float32x4.SelectFromPair", ssa.OpconcatSelectedConstantFloat32x4, types.TypeVec128)
1690
1691 sfp4("Int32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x8, types.TypeVec256)
1692 sfp4("Uint32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x8, types.TypeVec256)
1693 sfp4("Float32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x8, types.TypeVec256)
1694
1695 sfp4("Int32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x16, types.TypeVec512)
1696 sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
1697 sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
1698
1699
1700 sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
1701 addF(simdPackage, method,
1702 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1703 x, a, b, y := args[0], args[1], args[2], args[3]
1704 if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
1705 z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
1706 if z != nil {
1707 return z
1708 }
1709 }
1710 return s.callResult(n, callNormal)
1711 },
1712 sys.AMD64)
1713 }
1714
1715 sfp2("Uint64x2.SelectFromPair", ssa.OpconcatSelectedConstantUint64x2, types.TypeVec128, cscimm2)
1716 sfp2("Int64x2.SelectFromPair", ssa.OpconcatSelectedConstantInt64x2, types.TypeVec128, cscimm2)
1717 sfp2("Float64x2.SelectFromPair", ssa.OpconcatSelectedConstantFloat64x2, types.TypeVec128, cscimm2)
1718
1719 sfp2("Uint64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x4, types.TypeVec256, cscimm2g2)
1720 sfp2("Int64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x4, types.TypeVec256, cscimm2g2)
1721 sfp2("Float64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x4, types.TypeVec256, cscimm2g2)
1722
1723 sfp2("Uint64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x8, types.TypeVec512, cscimm2g4)
1724 sfp2("Int64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x8, types.TypeVec512, cscimm2g4)
1725 sfp2("Float64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x8, types.TypeVec512, cscimm2g4)
1726
1727 }
1728 }
1729
1730 func cscimm4(a, b, c, d uint8) int64 {
1731 return se(a + b<<2 + c<<4 + d<<6)
1732 }
1733
1734 func cscimm2(a, b uint8) int64 {
1735 return se(a + b<<1)
1736 }
1737
1738 func cscimm2g2(a, b uint8) int64 {
1739 g := cscimm2(a, b)
1740 return int64(int8(g + g<<2))
1741 }
1742
1743 func cscimm2g4(a, b uint8) int64 {
1744 g := cscimm2g2(a, b)
1745 return int64(int8(g + g<<4))
1746 }
1747
1748 const (
1749 _LLLL = iota
1750 _HLLL
1751 _LHLL
1752 _HHLL
1753 _LLHL
1754 _HLHL
1755 _LHHL
1756 _HHHL
1757 _LLLH
1758 _HLLH
1759 _LHLH
1760 _HHLH
1761 _LLHH
1762 _HLHH
1763 _LHHH
1764 _HHHH
1765 )
1766
1767 const (
1768 _LL = iota
1769 _HL
1770 _LH
1771 _HH
1772 )
1773
1774 func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
1775 a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
1776 if a > 3 || b > 3 {
1777 return nil
1778 }
1779 pattern := (a&2)>>1 + (b & 2)
1780 a, b = a&1, b&1
1781
1782 switch pattern {
1783 case _LL:
1784 return s.newValue2I(op, t, csc(a, b), x, x)
1785 case _HH:
1786 return s.newValue2I(op, t, csc(a, b), y, y)
1787 case _LH:
1788 return s.newValue2I(op, t, csc(a, b), x, y)
1789 case _HL:
1790 return s.newValue2I(op, t, csc(a, b), y, x)
1791 }
1792 panic("The preceding switch should have been exhaustive")
1793 }
1794
1795 func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
1796 a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
1797 if a > 7 || b > 7 || c > 7 || d > 7 {
1798 return nil
1799 }
1800 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
1801
1802 a, b, c, d = a&3, b&3, c&3, d&3
1803
1804 switch pattern {
1805 case _LLLL:
1806
1807 return s.newValue2I(op, t, cscimm4(a, b, c, d), x, x)
1808 case _HHHH:
1809
1810 return s.newValue2I(op, t, cscimm4(a, b, c, d), y, y)
1811 case _LLHH:
1812 return s.newValue2I(op, t, cscimm4(a, b, c, d), x, y)
1813 case _HHLL:
1814 return s.newValue2I(op, t, cscimm4(a, b, c, d), y, x)
1815
1816 case _HLLL:
1817 z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
1818 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
1819 case _LHLL:
1820 z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
1821 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
1822 case _HLHH:
1823 z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
1824 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
1825 case _LHHH:
1826 z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
1827 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
1828
1829 case _LLLH:
1830 z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
1831 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
1832 case _LLHL:
1833 z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
1834 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
1835
1836 case _HHLH:
1837 z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
1838 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
1839
1840 case _HHHL:
1841 z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
1842 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
1843
1844 case _LHLH:
1845 z := s.newValue2I(op, t, cscimm4(a, c, b, d), x, y)
1846 return s.newValue2I(op, t, se(0b11_01_10_00), z, z)
1847 case _HLHL:
1848 z := s.newValue2I(op, t, cscimm4(b, d, a, c), x, y)
1849 return s.newValue2I(op, t, se(0b01_11_00_10), z, z)
1850 case _HLLH:
1851 z := s.newValue2I(op, t, cscimm4(b, c, a, d), x, y)
1852 return s.newValue2I(op, t, se(0b11_01_00_10), z, z)
1853 case _LHHL:
1854 z := s.newValue2I(op, t, cscimm4(a, d, b, c), x, y)
1855 return s.newValue2I(op, t, se(0b01_11_10_00), z, z)
1856 }
1857 panic("The preceding switch should have been exhaustive")
1858 }
1859
1860
1861
1862 func se(x uint8) int64 {
1863 return int64(int8(x))
1864 }
1865
1866 func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1867 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1868 return s.newValue1(op, t, args[0])
1869 }
1870 }
1871
1872 func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1873 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1874 return s.newValue2(op, t, args[0], args[1])
1875 }
1876 }
1877
1878 func opLen2_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1879 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1880 return s.newValue2(op, t, args[1], args[0])
1881 }
1882 }
1883
1884 func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1885 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1886 return s.newValue3(op, t, args[0], args[1], args[2])
1887 }
1888 }
1889
1890 var ssaVecBySize = map[int64]*types.Type{
1891 16: types.TypeVec128,
1892 32: types.TypeVec256,
1893 64: types.TypeVec512,
1894 }
1895
1896 func opLen3_31Zero3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1897 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1898 if t, ok := ssaVecBySize[args[1].Type.Size()]; !ok {
1899 panic("unknown simd vector size")
1900 } else {
1901 return s.newValue3(op, t, s.newValue0(ssa.OpZeroSIMD, t), args[1], args[0])
1902 }
1903 }
1904 }
1905
1906 func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1907 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1908 return s.newValue3(op, t, args[1], args[0], args[2])
1909 }
1910 }
1911
1912 func opLen3_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1913 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1914 return s.newValue3(op, t, args[2], args[0], args[1])
1915 }
1916 }
1917
1918 func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1919 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1920 return s.newValue4(op, t, args[0], args[1], args[2], args[3])
1921 }
1922 }
1923
1924 func opLen4_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1925 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1926 return s.newValue4(op, t, args[2], args[0], args[1], args[3])
1927 }
1928 }
1929
1930 func opLen4_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1931 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1932 return s.newValue4(op, t, args[2], args[1], args[0], args[3])
1933 }
1934 }
1935
1936 func immJumpTable(s *state, idx *ssa.Value, intrinsicCall *ir.CallExpr, genOp func(*state, int)) *ssa.Value {
1937
1938 bEnd := s.f.NewBlock(ssa.BlockPlain)
1939
1940 if !idx.Type.IsKind(types.TUINT8) {
1941 panic("immJumpTable expects uint8 value")
1942 }
1943
1944
1945 t := types.Types[types.TUINTPTR]
1946 idx = s.conv(nil, idx, idx.Type, t)
1947
1948 b := s.curBlock
1949 b.Kind = ssa.BlockJumpTable
1950 b.Pos = intrinsicCall.Pos()
1951 if base.Flag.Cfg.SpectreIndex {
1952
1953 idx = s.newValue2(ssa.OpSpectreSliceIndex, t, idx, s.uintptrConstant(255))
1954 }
1955 b.SetControl(idx)
1956 targets := [256]*ssa.Block{}
1957 for i := range 256 {
1958 t := s.f.NewBlock(ssa.BlockPlain)
1959 targets[i] = t
1960 b.AddEdgeTo(t)
1961 }
1962 s.endBlock()
1963
1964 for i, t := range targets {
1965 s.startBlock(t)
1966 genOp(s, i)
1967 if t.Kind != ssa.BlockExit {
1968 t.AddEdgeTo(bEnd)
1969 }
1970 s.endBlock()
1971 }
1972
1973 s.startBlock(bEnd)
1974 ret := s.variable(intrinsicCall, intrinsicCall.Type())
1975 return ret
1976 }
1977
1978 func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1979 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1980 if args[1].Op == ssa.OpConst8 {
1981 return s.newValue1I(op, t, args[1].AuxInt<<int64(offset), args[0])
1982 }
1983 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
1984
1985 s.vars[n] = sNew.newValue1I(op, t, int64(int8(idx<<offset)), args[0])
1986 })
1987 }
1988 }
1989
1990 func opLen2Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1991 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1992 if args[1].Op == ssa.OpConst8 {
1993 return s.newValue2I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2])
1994 }
1995 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
1996
1997 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset)), args[0], args[2])
1998 })
1999 }
2000 }
2001
2002 func opLen3Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2003 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2004 if args[1].Op == ssa.OpConst8 {
2005 return s.newValue3I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3])
2006 }
2007 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2008
2009 s.vars[n] = sNew.newValue3I(op, t, int64(int8(idx<<offset)), args[0], args[2], args[3])
2010 })
2011 }
2012 }
2013
2014 func opLen2Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2015 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2016 if args[2].Op == ssa.OpConst8 {
2017 return s.newValue2I(op, t, args[2].AuxInt<<int64(offset), args[0], args[1])
2018 }
2019 return immJumpTable(s, args[2], n, func(sNew *state, idx int) {
2020
2021 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset)), args[0], args[1])
2022 })
2023 }
2024 }
2025
2026
2027 func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2028 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2029 if args[1].Op == ssa.OpConst8 && args[2].Op == ssa.OpConst8 && args[1].AuxInt & ^3 == 0 && args[2].AuxInt & ^3 == 0 {
2030 i1, i2 := args[1].AuxInt, args[2].AuxInt
2031 return s.newValue2I(op, t, int64(int8(i1+i2<<4)), args[0], args[3])
2032 }
2033 four := s.constInt64(types.Types[types.TUINT8], 4)
2034 shifted := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT8], args[2], four)
2035 combined := s.newValue2(ssa.OpAdd8, types.Types[types.TUINT8], args[1], shifted)
2036 return immJumpTable(s, combined, n, func(sNew *state, idx int) {
2037
2038
2039 if idx & ^(3+3<<4) == 0 {
2040 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx)), args[0], args[3])
2041 } else {
2042 sNew.rtcall(ir.Syms.PanicSimdImm, false, nil)
2043 }
2044 })
2045 }
2046 }
2047
2048
2049 func opLen2Imm8_SHA1RNDS4(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2050 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2051 if args[1].Op == ssa.OpConst8 {
2052 return s.newValue2I(op, t, (args[1].AuxInt<<int64(offset))&0b11, args[0], args[2])
2053 }
2054 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2055
2056 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset))&0b11, args[0], args[2])
2057 })
2058 }
2059 }
2060
2061 func opLen3Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2062 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2063 if args[2].Op == ssa.OpConst8 {
2064 return s.newValue3I(op, t, args[2].AuxInt<<int64(offset), args[0], args[1], args[3])
2065 }
2066 return immJumpTable(s, args[2], n, func(sNew *state, idx int) {
2067
2068 s.vars[n] = sNew.newValue3I(op, t, int64(int8(idx<<offset)), args[0], args[1], args[3])
2069 })
2070 }
2071 }
2072
2073 func opLen4Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2074 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2075 if args[1].Op == ssa.OpConst8 {
2076 return s.newValue4I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3], args[4])
2077 }
2078 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2079
2080 s.vars[n] = sNew.newValue4I(op, t, int64(int8(idx<<offset)), args[0], args[2], args[3], args[4])
2081 })
2082 }
2083 }
2084
2085 func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2086 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2087 return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem())
2088 }
2089 }
2090
2091 func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2092 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2093 s.store(args[0].Type, args[1], args[0])
2094 return nil
2095 }
2096 }
2097
2098 var cvtVToMaskOpcodes = map[int]map[int]ssa.Op{
2099 8: {16: ssa.OpCvt16toMask8x16, 32: ssa.OpCvt32toMask8x32, 64: ssa.OpCvt64toMask8x64},
2100 16: {8: ssa.OpCvt8toMask16x8, 16: ssa.OpCvt16toMask16x16, 32: ssa.OpCvt32toMask16x32},
2101 32: {4: ssa.OpCvt8toMask32x4, 8: ssa.OpCvt8toMask32x8, 16: ssa.OpCvt16toMask32x16},
2102 64: {2: ssa.OpCvt8toMask64x2, 4: ssa.OpCvt8toMask64x4, 8: ssa.OpCvt8toMask64x8},
2103 }
2104
2105 var cvtMaskToVOpcodes = map[int]map[int]ssa.Op{
2106 8: {16: ssa.OpCvtMask8x16to16, 32: ssa.OpCvtMask8x32to32, 64: ssa.OpCvtMask8x64to64},
2107 16: {8: ssa.OpCvtMask16x8to8, 16: ssa.OpCvtMask16x16to16, 32: ssa.OpCvtMask16x32to32},
2108 32: {4: ssa.OpCvtMask32x4to8, 8: ssa.OpCvtMask32x8to8, 16: ssa.OpCvtMask32x16to16},
2109 64: {2: ssa.OpCvtMask64x2to8, 4: ssa.OpCvtMask64x4to8, 8: ssa.OpCvtMask64x8to8},
2110 }
2111
2112 func simdCvtVToMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2113 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2114 op := cvtVToMaskOpcodes[elemBits][lanes]
2115 if op == 0 {
2116 panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
2117 }
2118 return s.newValue1(op, types.TypeMask, args[0])
2119 }
2120 }
2121
2122 func simdCvtMaskToV(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2123 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2124 op := cvtMaskToVOpcodes[elemBits][lanes]
2125 if op == 0 {
2126 panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
2127 }
2128 return s.newValue1(op, n.Type(), args[0])
2129 }
2130 }
2131
2132 func simdMaskedLoad(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2133 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2134 return s.newValue3(op, n.Type(), args[0], args[1], s.mem())
2135 }
2136 }
2137
2138 func simdMaskedStore(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2139 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2140 s.vars[memVar] = s.newValue4A(op, types.TypeMem, args[0].Type, args[1], args[2], args[0], s.mem())
2141 return nil
2142 }
2143 }
2144
2145
2146
2147 func findIntrinsic(sym *types.Sym) intrinsicBuilder {
2148 if sym == nil || sym.Pkg == nil {
2149 return nil
2150 }
2151 pkg := sym.Pkg.Path
2152 if sym.Pkg == ir.Pkgs.Runtime {
2153 pkg = "runtime"
2154 }
2155 if base.Flag.Race && pkg == "sync/atomic" {
2156
2157
2158 return nil
2159 }
2160
2161
2162 if Arch.SoftFloat && pkg == "math" {
2163 return nil
2164 }
2165
2166 fn := sym.Name
2167 if ssa.IntrinsicsDisable {
2168 if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") ||
2169 pkg == simdPackage {
2170
2171 } else {
2172 return nil
2173 }
2174 }
2175 return intrinsics.lookup(Arch.LinkArch.Arch, pkg, fn)
2176 }
2177
2178 func IsIntrinsicCall(n *ir.CallExpr) bool {
2179 if n == nil {
2180 return false
2181 }
2182 name, ok := n.Fun.(*ir.Name)
2183 if !ok {
2184 if n.Fun.Op() == ir.OMETHEXPR {
2185 if meth := ir.MethodExprName(n.Fun); meth != nil {
2186 if fn := meth.Func; fn != nil {
2187 return IsIntrinsicSym(fn.Sym())
2188 }
2189 }
2190 }
2191 return false
2192 }
2193 return IsIntrinsicSym(name.Sym())
2194 }
2195
2196 func IsIntrinsicSym(sym *types.Sym) bool {
2197 return findIntrinsic(sym) != nil
2198 }
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208 func GenIntrinsicBody(fn *ir.Func) {
2209 if ir.CurFunc != nil {
2210 base.FatalfAt(fn.Pos(), "enqueueFunc %v inside %v", fn, ir.CurFunc)
2211 }
2212
2213 if base.Flag.LowerR != 0 {
2214 fmt.Println("generate intrinsic for", ir.FuncName(fn))
2215 }
2216
2217 pos := fn.Pos()
2218 ft := fn.Type()
2219 var ret ir.Node
2220
2221
2222
2223
2224
2225
2226
2227
2228 call := ir.NewCallExpr(pos, ir.OCALLFUNC, fn.Nname, nil)
2229 call.Args = ir.RecvParamNames(ft)
2230 call.IsDDD = ft.IsVariadic()
2231 typecheck.Exprs(call.Args)
2232 call.SetTypecheck(1)
2233 call.SetWalked(true)
2234 ret = call
2235 if ft.NumResults() > 0 {
2236 if ft.NumResults() == 1 {
2237 call.SetType(ft.Result(0).Type)
2238 } else {
2239 call.SetType(ft.ResultsTuple())
2240 }
2241 n := ir.NewReturnStmt(base.Pos, nil)
2242 n.Results = []ir.Node{call}
2243 ret = n
2244 }
2245 fn.Body.Append(ret)
2246
2247 if base.Flag.LowerR != 0 {
2248 ir.DumpList("generate intrinsic body", fn.Body)
2249 }
2250
2251 ir.CurFunc = fn
2252 typecheck.Stmts(fn.Body)
2253 ir.CurFunc = nil
2254 }
2255
View as plain text