1
2
3
4
5 package ssagen
6
7 import (
8 "fmt"
9 "internal/abi"
10 "internal/buildcfg"
11
12 "cmd/compile/internal/base"
13 "cmd/compile/internal/ir"
14 "cmd/compile/internal/ssa"
15 "cmd/compile/internal/typecheck"
16 "cmd/compile/internal/types"
17 "cmd/internal/sys"
18 )
19
20 var intrinsics intrinsicBuilders
21
22
23
24 type intrinsicBuilder func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value
25
26 type intrinsicKey struct {
27 arch *sys.Arch
28 pkg string
29 fn string
30 }
31
32
33 type intrinsicBuildConfig struct {
34 instrumenting bool
35
36 go386 string
37 goamd64 int
38 goarm buildcfg.GoarmFeatures
39 goarm64 buildcfg.Goarm64Features
40 gomips string
41 gomips64 string
42 goppc64 int
43 goriscv64 int
44 }
45
46 type intrinsicBuilders map[intrinsicKey]intrinsicBuilder
47
48
49 func (ib intrinsicBuilders) add(arch *sys.Arch, pkg, fn string, b intrinsicBuilder) {
50 if _, found := ib[intrinsicKey{arch, pkg, fn}]; found {
51 panic(fmt.Sprintf("intrinsic already exists for %v.%v on %v", pkg, fn, arch.Name))
52 }
53 ib[intrinsicKey{arch, pkg, fn}] = b
54 }
55
56
57 func (ib intrinsicBuilders) addForArchs(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
58 for _, arch := range archs {
59 ib.add(arch, pkg, fn, b)
60 }
61 }
62
63
64 func (ib intrinsicBuilders) addForFamilies(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
65 for _, arch := range sys.Archs {
66 if arch.InFamily(archFamilies...) {
67 intrinsics.add(arch, pkg, fn, b)
68 }
69 }
70 }
71
72
73
74 func (ib intrinsicBuilders) alias(pkg, fn, targetPkg, targetFn string, archs ...*sys.Arch) {
75
76
77 aliased := false
78 for _, arch := range archs {
79 if b := intrinsics.lookup(arch, targetPkg, targetFn); b != nil {
80 intrinsics.add(arch, pkg, fn, b)
81 aliased = true
82 }
83 }
84 if !aliased {
85 panic(fmt.Sprintf("attempted to alias undefined intrinsic: %s.%s", pkg, fn))
86 }
87 }
88
89
90 func (ib intrinsicBuilders) lookup(arch *sys.Arch, pkg, fn string) intrinsicBuilder {
91 return intrinsics[intrinsicKey{arch, pkg, fn}]
92 }
93
94 func initIntrinsics(cfg *intrinsicBuildConfig) {
95 if cfg == nil {
96 cfg = &intrinsicBuildConfig{
97 instrumenting: base.Flag.Cfg.Instrumenting,
98 go386: buildcfg.GO386,
99 goamd64: buildcfg.GOAMD64,
100 goarm: buildcfg.GOARM,
101 goarm64: buildcfg.GOARM64,
102 gomips: buildcfg.GOMIPS,
103 gomips64: buildcfg.GOMIPS64,
104 goppc64: buildcfg.GOPPC64,
105 goriscv64: buildcfg.GORISCV64,
106 }
107 }
108 intrinsics = intrinsicBuilders{}
109
110 var p4 []*sys.Arch
111 var p8 []*sys.Arch
112 var lwatomics []*sys.Arch
113 for _, a := range sys.Archs {
114 if a.PtrSize == 4 {
115 p4 = append(p4, a)
116 } else {
117 p8 = append(p8, a)
118 }
119 if a.Family != sys.PPC64 {
120 lwatomics = append(lwatomics, a)
121 }
122 }
123 all := sys.Archs[:]
124
125 add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
126 intrinsics.addForArchs(pkg, fn, b, archs...)
127 }
128 addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
129 intrinsics.addForFamilies(pkg, fn, b, archFamilies...)
130 }
131 alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) {
132 intrinsics.alias(pkg, fn, pkg2, fn2, archs...)
133 }
134
135
136 if !cfg.instrumenting {
137 add("runtime", "slicebytetostringtmp",
138 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
139
140
141
142 return s.newValue2(ssa.OpStringMake, n.Type(), args[0], args[1])
143 },
144 all...)
145 }
146 addF("internal/runtime/math", "MulUintptr",
147 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
148 if s.config.PtrSize == 4 {
149 return s.newValue2(ssa.OpMul32uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
150 }
151 return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
152 },
153 sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
154 add("runtime", "KeepAlive",
155 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
156 data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
157 s.vars[memVar] = s.newValue2(ssa.OpKeepAlive, types.TypeMem, data, s.mem())
158 return nil
159 },
160 all...)
161
162 addF("runtime", "publicationBarrier",
163 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
164 s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
165 return nil
166 },
167 sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64)
168
169
170 add("internal/runtime/sys", "GetCallerPC",
171 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
172 return s.newValue0(ssa.OpGetCallerPC, s.f.Config.Types.Uintptr)
173 },
174 all...)
175
176 add("internal/runtime/sys", "GetCallerSP",
177 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
178 return s.newValue1(ssa.OpGetCallerSP, s.f.Config.Types.Uintptr, s.mem())
179 },
180 all...)
181
182 add("internal/runtime/sys", "GetClosurePtr",
183 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
184 return s.newValue0(ssa.OpGetClosurePtr, s.f.Config.Types.Uintptr)
185 },
186 all...)
187
188 addF("internal/runtime/sys", "Bswap32",
189 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
190 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
191 },
192 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X)
193 addF("internal/runtime/sys", "Bswap64",
194 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
195 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
196 },
197 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X)
198
199 addF("runtime", "memequal",
200 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
201 return s.newValue4(ssa.OpMemEq, s.f.Config.Types.Bool, args[0], args[1], args[2], s.mem())
202 },
203 sys.ARM64)
204
205 if cfg.goppc64 >= 10 {
206
207
208 addF("internal/runtime/sys", "Bswap32",
209 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
210 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
211 },
212 sys.PPC64)
213 addF("internal/runtime/sys", "Bswap64",
214 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
215 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
216 },
217 sys.PPC64)
218 }
219
220 if cfg.goriscv64 >= 22 {
221 addF("internal/runtime/sys", "Bswap32",
222 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
223 return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
224 },
225 sys.RISCV64)
226 addF("internal/runtime/sys", "Bswap64",
227 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
228 return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
229 },
230 sys.RISCV64)
231 }
232
233
234 makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
235 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
236 s.vars[memVar] = s.newValue2(op, types.TypeMem, args[0], s.mem())
237 return nil
238 }
239 }
240
241
242
243 addF("internal/runtime/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache),
244 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
245 addF("internal/runtime/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed),
246 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
247
248
249 type atomicOpEmitter func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool)
250
251 addF("internal/runtime/atomic", "Load",
252 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
253 v := s.newValue2(ssa.OpAtomicLoad32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
254 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
255 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
256 },
257 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
258 addF("internal/runtime/atomic", "Load8",
259 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
260 v := s.newValue2(ssa.OpAtomicLoad8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], s.mem())
261 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
262 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
263 },
264 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
265 addF("internal/runtime/atomic", "Load64",
266 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
267 v := s.newValue2(ssa.OpAtomicLoad64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
268 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
269 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
270 },
271 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
272 addF("internal/runtime/atomic", "LoadAcq",
273 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
274 v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
275 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
276 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
277 },
278 sys.PPC64)
279 addF("internal/runtime/atomic", "LoadAcq64",
280 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
281 v := s.newValue2(ssa.OpAtomicLoadAcq64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
282 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
283 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
284 },
285 sys.PPC64)
286 addF("internal/runtime/atomic", "Loadp",
287 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
288 v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
289 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
290 return s.newValue1(ssa.OpSelect0, s.f.Config.Types.BytePtr, v)
291 },
292 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
293
294 addF("internal/runtime/atomic", "Store",
295 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
296 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32, types.TypeMem, args[0], args[1], s.mem())
297 return nil
298 },
299 sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
300 addF("internal/runtime/atomic", "Store8",
301 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
302 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore8, types.TypeMem, args[0], args[1], s.mem())
303 return nil
304 },
305 sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
306 addF("internal/runtime/atomic", "Store64",
307 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
308 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64, types.TypeMem, args[0], args[1], s.mem())
309 return nil
310 },
311 sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
312 addF("internal/runtime/atomic", "StorepNoWB",
313 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
314 s.vars[memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, types.TypeMem, args[0], args[1], s.mem())
315 return nil
316 },
317 sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.RISCV64, sys.S390X)
318 addF("internal/runtime/atomic", "StoreRel",
319 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
320 s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
321 return nil
322 },
323 sys.PPC64)
324 addF("internal/runtime/atomic", "StoreRel64",
325 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
326 s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel64, types.TypeMem, args[0], args[1], s.mem())
327 return nil
328 },
329 sys.PPC64)
330
331 makeAtomicStoreGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
332 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
333
334 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
335 v := s.load(types.Types[types.TBOOL], addr)
336 b := s.endBlock()
337 b.Kind = ssa.BlockIf
338 b.SetControl(v)
339 bTrue := s.f.NewBlock(ssa.BlockPlain)
340 bFalse := s.f.NewBlock(ssa.BlockPlain)
341 bEnd := s.f.NewBlock(ssa.BlockPlain)
342 b.AddEdgeTo(bTrue)
343 b.AddEdgeTo(bFalse)
344 b.Likely = ssa.BranchLikely
345
346
347 s.startBlock(bTrue)
348 emit(s, n, args, op1, typ, false)
349 s.endBlock().AddEdgeTo(bEnd)
350
351
352 s.startBlock(bFalse)
353 emit(s, n, args, op0, typ, false)
354 s.endBlock().AddEdgeTo(bEnd)
355
356
357 s.startBlock(bEnd)
358
359 return nil
360 }
361 }
362
363 atomicStoreEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
364 v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
365 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
366 if needReturn {
367 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
368 }
369 }
370
371 addF("internal/runtime/atomic", "Store8",
372 makeAtomicStoreGuardedIntrinsicLoong64(ssa.OpAtomicStore8, ssa.OpAtomicStore8Variant, types.TUINT8, atomicStoreEmitterLoong64),
373 sys.Loong64)
374 addF("internal/runtime/atomic", "Store",
375 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
376 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32Variant, types.TypeMem, args[0], args[1], s.mem())
377 return nil
378 },
379 sys.Loong64)
380 addF("internal/runtime/atomic", "Store64",
381 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
382 s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64Variant, types.TypeMem, args[0], args[1], s.mem())
383 return nil
384 },
385 sys.Loong64)
386
387 addF("internal/runtime/atomic", "Xchg8",
388 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
389 v := s.newValue3(ssa.OpAtomicExchange8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
390 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
391 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
392 },
393 sys.AMD64, sys.PPC64)
394 addF("internal/runtime/atomic", "Xchg",
395 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
396 v := s.newValue3(ssa.OpAtomicExchange32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
397 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
398 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
399 },
400 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
401 addF("internal/runtime/atomic", "Xchg64",
402 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
403 v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
404 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
405 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
406 },
407 sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
408
409 makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder {
410
411 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
412 if cfg.goarm64.LSE {
413 emit(s, n, args, op1, typ, needReturn)
414 } else {
415
416 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
417 v := s.load(types.Types[types.TBOOL], addr)
418 b := s.endBlock()
419 b.Kind = ssa.BlockIf
420 b.SetControl(v)
421 bTrue := s.f.NewBlock(ssa.BlockPlain)
422 bFalse := s.f.NewBlock(ssa.BlockPlain)
423 bEnd := s.f.NewBlock(ssa.BlockPlain)
424 b.AddEdgeTo(bTrue)
425 b.AddEdgeTo(bFalse)
426 b.Likely = ssa.BranchLikely
427
428
429 s.startBlock(bTrue)
430 emit(s, n, args, op1, typ, needReturn)
431 s.endBlock().AddEdgeTo(bEnd)
432
433
434 s.startBlock(bFalse)
435 emit(s, n, args, op0, typ, needReturn)
436 s.endBlock().AddEdgeTo(bEnd)
437
438
439 s.startBlock(bEnd)
440 }
441 if needReturn {
442 return s.variable(n, types.Types[typ])
443 } else {
444 return nil
445 }
446 }
447 }
448 makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
449 return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, true)
450 }
451 makeAtomicGuardedIntrinsicARM64old := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
452 return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, false)
453 }
454
455 atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
456 v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
457 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
458 if needReturn {
459 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
460 }
461 }
462 addF("internal/runtime/atomic", "Xchg8",
463 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange8, ssa.OpAtomicExchange8Variant, types.TUINT8, atomicEmitterARM64),
464 sys.ARM64)
465 addF("internal/runtime/atomic", "Xchg",
466 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64),
467 sys.ARM64)
468 addF("internal/runtime/atomic", "Xchg64",
469 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64),
470 sys.ARM64)
471
472 makeAtomicXchg8GuardedIntrinsicLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
473 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
474 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
475 v := s.load(types.Types[types.TBOOL], addr)
476 b := s.endBlock()
477 b.Kind = ssa.BlockIf
478 b.SetControl(v)
479 bTrue := s.f.NewBlock(ssa.BlockPlain)
480 bFalse := s.f.NewBlock(ssa.BlockPlain)
481 bEnd := s.f.NewBlock(ssa.BlockPlain)
482 b.AddEdgeTo(bTrue)
483 b.AddEdgeTo(bFalse)
484 b.Likely = ssa.BranchLikely
485
486
487 s.startBlock(bTrue)
488 s.vars[n] = s.newValue3(op, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
489 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, s.vars[n])
490 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], s.vars[n])
491 s.endBlock().AddEdgeTo(bEnd)
492
493
494 s.startBlock(bFalse)
495 s.vars[n] = s.callResult(n, callNormal)
496 s.endBlock().AddEdgeTo(bEnd)
497
498
499 s.startBlock(bEnd)
500 return s.variable(n, types.Types[types.TUINT8])
501 }
502 }
503 addF("internal/runtime/atomic", "Xchg8",
504 makeAtomicXchg8GuardedIntrinsicLoong64(ssa.OpAtomicExchange8Variant),
505 sys.Loong64)
506
507 addF("internal/runtime/atomic", "Xadd",
508 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
509 v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
510 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
511 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
512 },
513 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
514 addF("internal/runtime/atomic", "Xadd64",
515 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
516 v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
517 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
518 return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
519 },
520 sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
521
522 addF("internal/runtime/atomic", "Xadd",
523 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64),
524 sys.ARM64)
525 addF("internal/runtime/atomic", "Xadd64",
526 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64),
527 sys.ARM64)
528
529 addF("internal/runtime/atomic", "Cas",
530 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
531 v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
532 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
533 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
534 },
535 sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
536 addF("internal/runtime/atomic", "Cas64",
537 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
538 v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
539 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
540 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
541 },
542 sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
543 addF("internal/runtime/atomic", "CasRel",
544 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
545 v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
546 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
547 return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
548 },
549 sys.PPC64)
550
551 atomicCasEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
552 v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
553 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
554 if needReturn {
555 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
556 }
557 }
558
559 addF("internal/runtime/atomic", "Cas",
560 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, types.TBOOL, atomicCasEmitterARM64),
561 sys.ARM64)
562 addF("internal/runtime/atomic", "Cas64",
563 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64),
564 sys.ARM64)
565
566 atomicCasEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
567 v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
568 s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
569 if needReturn {
570 s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
571 }
572 }
573
574 makeAtomicCasGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, emit atomicOpEmitter) intrinsicBuilder {
575 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
576
577 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAMCAS, s.sb)
578 v := s.load(types.Types[types.TBOOL], addr)
579 b := s.endBlock()
580 b.Kind = ssa.BlockIf
581 b.SetControl(v)
582 bTrue := s.f.NewBlock(ssa.BlockPlain)
583 bFalse := s.f.NewBlock(ssa.BlockPlain)
584 bEnd := s.f.NewBlock(ssa.BlockPlain)
585 b.AddEdgeTo(bTrue)
586 b.AddEdgeTo(bFalse)
587 b.Likely = ssa.BranchLikely
588
589
590 s.startBlock(bTrue)
591 emit(s, n, args, op1, types.TBOOL, true)
592 s.endBlock().AddEdgeTo(bEnd)
593
594
595 s.startBlock(bFalse)
596 emit(s, n, args, op0, types.TBOOL, true)
597 s.endBlock().AddEdgeTo(bEnd)
598
599
600 s.startBlock(bEnd)
601
602 return s.variable(n, types.Types[types.TBOOL])
603 }
604 }
605
606 addF("internal/runtime/atomic", "Cas",
607 makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, atomicCasEmitterLoong64),
608 sys.Loong64)
609 addF("internal/runtime/atomic", "Cas64",
610 makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, atomicCasEmitterLoong64),
611 sys.Loong64)
612
613
614 addF("internal/runtime/atomic", "And8",
615 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
616 s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
617 return nil
618 },
619 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
620 addF("internal/runtime/atomic", "And",
621 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
622 s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
623 return nil
624 },
625 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
626 addF("internal/runtime/atomic", "Or8",
627 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
628 s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
629 return nil
630 },
631 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
632 addF("internal/runtime/atomic", "Or",
633 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
634 s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
635 return nil
636 },
637 sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
638
639
640
641 addF("internal/runtime/atomic", "And8",
642 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd8value, ssa.OpAtomicAnd8valueVariant, types.TUINT8, atomicEmitterARM64),
643 sys.ARM64)
644 addF("internal/runtime/atomic", "Or8",
645 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr8value, ssa.OpAtomicOr8valueVariant, types.TUINT8, atomicEmitterARM64),
646 sys.ARM64)
647 addF("internal/runtime/atomic", "And64",
648 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64value, ssa.OpAtomicAnd64valueVariant, types.TUINT64, atomicEmitterARM64),
649 sys.ARM64)
650 addF("internal/runtime/atomic", "And32",
651 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
652 sys.ARM64)
653 addF("internal/runtime/atomic", "And",
654 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
655 sys.ARM64)
656 addF("internal/runtime/atomic", "Or64",
657 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64value, ssa.OpAtomicOr64valueVariant, types.TUINT64, atomicEmitterARM64),
658 sys.ARM64)
659 addF("internal/runtime/atomic", "Or32",
660 makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
661 sys.ARM64)
662 addF("internal/runtime/atomic", "Or",
663 makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
664 sys.ARM64)
665
666
667 addF("internal/runtime/atomic", "And64",
668 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
669 v := s.newValue3(ssa.OpAtomicAnd64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
670 p0, p1 := s.split(v)
671 s.vars[memVar] = p1
672 return p0
673 },
674 sys.AMD64, sys.Loong64)
675 addF("internal/runtime/atomic", "And32",
676 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
677 v := s.newValue3(ssa.OpAtomicAnd32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
678 p0, p1 := s.split(v)
679 s.vars[memVar] = p1
680 return p0
681 },
682 sys.AMD64, sys.Loong64)
683 addF("internal/runtime/atomic", "Or64",
684 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
685 v := s.newValue3(ssa.OpAtomicOr64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
686 p0, p1 := s.split(v)
687 s.vars[memVar] = p1
688 return p0
689 },
690 sys.AMD64, sys.Loong64)
691 addF("internal/runtime/atomic", "Or32",
692 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
693 v := s.newValue3(ssa.OpAtomicOr32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
694 p0, p1 := s.split(v)
695 s.vars[memVar] = p1
696 return p0
697 },
698 sys.AMD64, sys.Loong64)
699
700
701 alias("internal/runtime/atomic", "Loadint32", "internal/runtime/atomic", "Load", all...)
702 alias("internal/runtime/atomic", "Loadint64", "internal/runtime/atomic", "Load64", all...)
703 alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load", p4...)
704 alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load64", p8...)
705 alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load", p4...)
706 alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load64", p8...)
707 alias("internal/runtime/atomic", "LoadAcq", "internal/runtime/atomic", "Load", lwatomics...)
708 alias("internal/runtime/atomic", "LoadAcq64", "internal/runtime/atomic", "Load64", lwatomics...)
709 alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...)
710 alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...)
711
712
713 alias("internal/runtime/atomic", "Storeint32", "internal/runtime/atomic", "Store", all...)
714 alias("internal/runtime/atomic", "Storeint64", "internal/runtime/atomic", "Store64", all...)
715 alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store", p4...)
716 alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store64", p8...)
717 alias("internal/runtime/atomic", "StoreRel", "internal/runtime/atomic", "Store", lwatomics...)
718 alias("internal/runtime/atomic", "StoreRel64", "internal/runtime/atomic", "Store64", lwatomics...)
719 alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...)
720 alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...)
721
722
723 alias("internal/runtime/atomic", "Xchgint32", "internal/runtime/atomic", "Xchg", all...)
724 alias("internal/runtime/atomic", "Xchgint64", "internal/runtime/atomic", "Xchg64", all...)
725 alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg", p4...)
726 alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg64", p8...)
727
728
729 alias("internal/runtime/atomic", "Xaddint32", "internal/runtime/atomic", "Xadd", all...)
730 alias("internal/runtime/atomic", "Xaddint64", "internal/runtime/atomic", "Xadd64", all...)
731 alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd", p4...)
732 alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd64", p8...)
733
734
735 alias("internal/runtime/atomic", "Casint32", "internal/runtime/atomic", "Cas", all...)
736 alias("internal/runtime/atomic", "Casint64", "internal/runtime/atomic", "Cas64", all...)
737 alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas", p4...)
738 alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas64", p8...)
739 alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas", p4...)
740 alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...)
741 alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...)
742
743
744 alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchLoong64)
745 alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchLoong64)
746
747
748 addF("math", "sqrt",
749 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
750 return s.newValue1(ssa.OpSqrt, types.Types[types.TFLOAT64], args[0])
751 },
752 sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
753 addF("math", "Trunc",
754 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
755 return s.newValue1(ssa.OpTrunc, types.Types[types.TFLOAT64], args[0])
756 },
757 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
758 addF("math", "Ceil",
759 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
760 return s.newValue1(ssa.OpCeil, types.Types[types.TFLOAT64], args[0])
761 },
762 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
763 addF("math", "Floor",
764 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
765 return s.newValue1(ssa.OpFloor, types.Types[types.TFLOAT64], args[0])
766 },
767 sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
768 addF("math", "Round",
769 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
770 return s.newValue1(ssa.OpRound, types.Types[types.TFLOAT64], args[0])
771 },
772 sys.ARM64, sys.PPC64, sys.S390X)
773 addF("math", "RoundToEven",
774 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
775 return s.newValue1(ssa.OpRoundToEven, types.Types[types.TFLOAT64], args[0])
776 },
777 sys.ARM64, sys.S390X, sys.Wasm)
778 addF("math", "Abs",
779 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
780 return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
781 },
782 sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
783 addF("math", "Copysign",
784 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
785 return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
786 },
787 sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm)
788 addF("math", "FMA",
789 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
790 return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
791 },
792 sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X)
793 addF("math", "FMA",
794 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
795 if cfg.goamd64 >= 3 {
796 return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
797 }
798
799 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasFMA)
800 b := s.endBlock()
801 b.Kind = ssa.BlockIf
802 b.SetControl(v)
803 bTrue := s.f.NewBlock(ssa.BlockPlain)
804 bFalse := s.f.NewBlock(ssa.BlockPlain)
805 bEnd := s.f.NewBlock(ssa.BlockPlain)
806 b.AddEdgeTo(bTrue)
807 b.AddEdgeTo(bFalse)
808 b.Likely = ssa.BranchLikely
809
810
811 s.startBlock(bTrue)
812 s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
813 s.endBlock().AddEdgeTo(bEnd)
814
815
816 s.startBlock(bFalse)
817 s.vars[n] = s.callResult(n, callNormal)
818 s.endBlock().AddEdgeTo(bEnd)
819
820
821 s.startBlock(bEnd)
822 return s.variable(n, types.Types[types.TFLOAT64])
823 },
824 sys.AMD64)
825 addF("math", "FMA",
826 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
827 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARMHasVFPv4, s.sb)
828 v := s.load(types.Types[types.TBOOL], addr)
829 b := s.endBlock()
830 b.Kind = ssa.BlockIf
831 b.SetControl(v)
832 bTrue := s.f.NewBlock(ssa.BlockPlain)
833 bFalse := s.f.NewBlock(ssa.BlockPlain)
834 bEnd := s.f.NewBlock(ssa.BlockPlain)
835 b.AddEdgeTo(bTrue)
836 b.AddEdgeTo(bFalse)
837 b.Likely = ssa.BranchLikely
838
839
840 s.startBlock(bTrue)
841 s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
842 s.endBlock().AddEdgeTo(bEnd)
843
844
845 s.startBlock(bFalse)
846 s.vars[n] = s.callResult(n, callNormal)
847 s.endBlock().AddEdgeTo(bEnd)
848
849
850 s.startBlock(bEnd)
851 return s.variable(n, types.Types[types.TFLOAT64])
852 },
853 sys.ARM)
854
855 makeRoundAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
856 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
857 if cfg.goamd64 >= 2 {
858 return s.newValue1(op, types.Types[types.TFLOAT64], args[0])
859 }
860
861 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasSSE41)
862 b := s.endBlock()
863 b.Kind = ssa.BlockIf
864 b.SetControl(v)
865 bTrue := s.f.NewBlock(ssa.BlockPlain)
866 bFalse := s.f.NewBlock(ssa.BlockPlain)
867 bEnd := s.f.NewBlock(ssa.BlockPlain)
868 b.AddEdgeTo(bTrue)
869 b.AddEdgeTo(bFalse)
870 b.Likely = ssa.BranchLikely
871
872
873 s.startBlock(bTrue)
874 s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0])
875 s.endBlock().AddEdgeTo(bEnd)
876
877
878 s.startBlock(bFalse)
879 s.vars[n] = s.callResult(n, callNormal)
880 s.endBlock().AddEdgeTo(bEnd)
881
882
883 s.startBlock(bEnd)
884 return s.variable(n, types.Types[types.TFLOAT64])
885 }
886 }
887 addF("math", "RoundToEven",
888 makeRoundAMD64(ssa.OpRoundToEven),
889 sys.AMD64)
890 addF("math", "Floor",
891 makeRoundAMD64(ssa.OpFloor),
892 sys.AMD64)
893 addF("math", "Ceil",
894 makeRoundAMD64(ssa.OpCeil),
895 sys.AMD64)
896 addF("math", "Trunc",
897 makeRoundAMD64(ssa.OpTrunc),
898 sys.AMD64)
899
900
901 addF("math/bits", "TrailingZeros64",
902 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
903 return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
904 },
905 sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
906 addF("math/bits", "TrailingZeros64",
907 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
908 lo := s.newValue1(ssa.OpInt64Lo, types.Types[types.TUINT32], args[0])
909 hi := s.newValue1(ssa.OpInt64Hi, types.Types[types.TUINT32], args[0])
910 return s.newValue2(ssa.OpCtz64On32, types.Types[types.TINT], lo, hi)
911 },
912 sys.I386)
913 addF("math/bits", "TrailingZeros32",
914 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
915 return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
916 },
917 sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
918 addF("math/bits", "TrailingZeros16",
919 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
920 return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
921 },
922 sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm)
923 addF("math/bits", "TrailingZeros8",
924 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
925 return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
926 },
927 sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm)
928
929 if cfg.goriscv64 >= 22 {
930 addF("math/bits", "TrailingZeros64",
931 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
932 return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
933 },
934 sys.RISCV64)
935 addF("math/bits", "TrailingZeros32",
936 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
937 return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
938 },
939 sys.RISCV64)
940 addF("math/bits", "TrailingZeros16",
941 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
942 return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
943 },
944 sys.RISCV64)
945 addF("math/bits", "TrailingZeros8",
946 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
947 return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
948 },
949 sys.RISCV64)
950 }
951
952
953 alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
954 alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
955
956 addF("math/bits", "ReverseBytes16",
957 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
958 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
959 },
960 sys.Loong64)
961 if cfg.goppc64 >= 10 {
962
963 addF("math/bits", "ReverseBytes16",
964 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
965 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0])
966 },
967 sys.PPC64)
968 }
969 if cfg.goriscv64 >= 22 {
970 addF("math/bits", "ReverseBytes16",
971 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
972 return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
973 },
974 sys.RISCV64)
975 }
976
977 addF("math/bits", "Len64",
978 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
979 return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
980 },
981 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
982 addF("math/bits", "Len32",
983 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
984 return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
985 },
986 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
987 addF("math/bits", "Len16",
988 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
989 return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
990 },
991 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
992 addF("math/bits", "Len8",
993 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
994 return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
995 },
996 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
997
998 if cfg.goriscv64 >= 22 {
999 addF("math/bits", "Len64",
1000 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1001 return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
1002 },
1003 sys.RISCV64)
1004 addF("math/bits", "Len32",
1005 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1006 return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
1007 },
1008 sys.RISCV64)
1009 addF("math/bits", "Len16",
1010 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1011 return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
1012 },
1013 sys.RISCV64)
1014 addF("math/bits", "Len8",
1015 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1016 return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
1017 },
1018 sys.RISCV64)
1019 }
1020
1021 alias("math/bits", "Len", "math/bits", "Len64", p8...)
1022 alias("math/bits", "Len", "math/bits", "Len32", p4...)
1023
1024
1025 addF("math/bits", "Reverse64",
1026 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1027 return s.newValue1(ssa.OpBitRev64, types.Types[types.TUINT64], args[0])
1028 },
1029 sys.ARM64, sys.Loong64)
1030 addF("math/bits", "Reverse32",
1031 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1032 return s.newValue1(ssa.OpBitRev32, types.Types[types.TUINT32], args[0])
1033 },
1034 sys.ARM64, sys.Loong64)
1035 addF("math/bits", "Reverse16",
1036 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1037 return s.newValue1(ssa.OpBitRev16, types.Types[types.TUINT16], args[0])
1038 },
1039 sys.ARM64, sys.Loong64)
1040 addF("math/bits", "Reverse8",
1041 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1042 return s.newValue1(ssa.OpBitRev8, types.Types[types.TUINT8], args[0])
1043 },
1044 sys.ARM64, sys.Loong64)
1045 addF("math/bits", "Reverse",
1046 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1047 return s.newValue1(ssa.OpBitRev64, types.Types[types.TUINT], args[0])
1048 },
1049 sys.ARM64, sys.Loong64)
1050 addF("math/bits", "RotateLeft8",
1051 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1052 return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1])
1053 },
1054 sys.AMD64, sys.RISCV64)
1055 addF("math/bits", "RotateLeft16",
1056 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1057 return s.newValue2(ssa.OpRotateLeft16, types.Types[types.TUINT16], args[0], args[1])
1058 },
1059 sys.AMD64, sys.RISCV64)
1060 addF("math/bits", "RotateLeft32",
1061 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1062 return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
1063 },
1064 sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
1065 addF("math/bits", "RotateLeft64",
1066 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1067 return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])
1068 },
1069 sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
1070 alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
1071
1072 makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1073 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1074 if cfg.goamd64 >= 2 {
1075 return s.newValue1(op, types.Types[types.TINT], args[0])
1076 }
1077
1078 v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasPOPCNT)
1079 b := s.endBlock()
1080 b.Kind = ssa.BlockIf
1081 b.SetControl(v)
1082 bTrue := s.f.NewBlock(ssa.BlockPlain)
1083 bFalse := s.f.NewBlock(ssa.BlockPlain)
1084 bEnd := s.f.NewBlock(ssa.BlockPlain)
1085 b.AddEdgeTo(bTrue)
1086 b.AddEdgeTo(bFalse)
1087 b.Likely = ssa.BranchLikely
1088
1089
1090 s.startBlock(bTrue)
1091 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1092 s.endBlock().AddEdgeTo(bEnd)
1093
1094
1095 s.startBlock(bFalse)
1096 s.vars[n] = s.callResult(n, callNormal)
1097 s.endBlock().AddEdgeTo(bEnd)
1098
1099
1100 s.startBlock(bEnd)
1101 return s.variable(n, types.Types[types.TINT])
1102 }
1103 }
1104
1105 makeOnesCountLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1106 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1107 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
1108 v := s.load(types.Types[types.TBOOL], addr)
1109 b := s.endBlock()
1110 b.Kind = ssa.BlockIf
1111 b.SetControl(v)
1112 bTrue := s.f.NewBlock(ssa.BlockPlain)
1113 bFalse := s.f.NewBlock(ssa.BlockPlain)
1114 bEnd := s.f.NewBlock(ssa.BlockPlain)
1115 b.AddEdgeTo(bTrue)
1116 b.AddEdgeTo(bFalse)
1117 b.Likely = ssa.BranchLikely
1118
1119
1120 s.startBlock(bTrue)
1121 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1122 s.endBlock().AddEdgeTo(bEnd)
1123
1124
1125 s.startBlock(bFalse)
1126 s.vars[n] = s.callResult(n, callNormal)
1127 s.endBlock().AddEdgeTo(bEnd)
1128
1129
1130 s.startBlock(bEnd)
1131 return s.variable(n, types.Types[types.TINT])
1132 }
1133 }
1134
1135 makeOnesCountRISCV64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1136 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1137 if cfg.goriscv64 >= 22 {
1138 return s.newValue1(op, types.Types[types.TINT], args[0])
1139 }
1140
1141 addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.RISCV64HasZbb, s.sb)
1142 v := s.load(types.Types[types.TBOOL], addr)
1143 b := s.endBlock()
1144 b.Kind = ssa.BlockIf
1145 b.SetControl(v)
1146 bTrue := s.f.NewBlock(ssa.BlockPlain)
1147 bFalse := s.f.NewBlock(ssa.BlockPlain)
1148 bEnd := s.f.NewBlock(ssa.BlockPlain)
1149 b.AddEdgeTo(bTrue)
1150 b.AddEdgeTo(bFalse)
1151 b.Likely = ssa.BranchLikely
1152
1153
1154 s.startBlock(bTrue)
1155 s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
1156 s.endBlock().AddEdgeTo(bEnd)
1157
1158
1159 s.startBlock(bFalse)
1160 s.vars[n] = s.callResult(n, callNormal)
1161 s.endBlock().AddEdgeTo(bEnd)
1162
1163
1164 s.startBlock(bEnd)
1165 return s.variable(n, types.Types[types.TINT])
1166 }
1167 }
1168
1169 addF("math/bits", "OnesCount64",
1170 makeOnesCountAMD64(ssa.OpPopCount64),
1171 sys.AMD64)
1172 addF("math/bits", "OnesCount64",
1173 makeOnesCountLoong64(ssa.OpPopCount64),
1174 sys.Loong64)
1175 addF("math/bits", "OnesCount64",
1176 makeOnesCountRISCV64(ssa.OpPopCount64),
1177 sys.RISCV64)
1178 addF("math/bits", "OnesCount64",
1179 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1180 return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
1181 },
1182 sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
1183 addF("math/bits", "OnesCount32",
1184 makeOnesCountAMD64(ssa.OpPopCount32),
1185 sys.AMD64)
1186 addF("math/bits", "OnesCount32",
1187 makeOnesCountLoong64(ssa.OpPopCount32),
1188 sys.Loong64)
1189 addF("math/bits", "OnesCount32",
1190 makeOnesCountRISCV64(ssa.OpPopCount32),
1191 sys.RISCV64)
1192 addF("math/bits", "OnesCount32",
1193 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1194 return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
1195 },
1196 sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
1197 addF("math/bits", "OnesCount16",
1198 makeOnesCountAMD64(ssa.OpPopCount16),
1199 sys.AMD64)
1200 addF("math/bits", "OnesCount16",
1201 makeOnesCountLoong64(ssa.OpPopCount16),
1202 sys.Loong64)
1203 addF("math/bits", "OnesCount16",
1204 makeOnesCountRISCV64(ssa.OpPopCount16),
1205 sys.RISCV64)
1206 addF("math/bits", "OnesCount16",
1207 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1208 return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
1209 },
1210 sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
1211 addF("math/bits", "OnesCount8",
1212 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1213 return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0])
1214 },
1215 sys.S390X, sys.PPC64, sys.Wasm)
1216
1217 if cfg.goriscv64 >= 22 {
1218 addF("math/bits", "OnesCount8",
1219 makeOnesCountRISCV64(ssa.OpPopCount8),
1220 sys.RISCV64)
1221 }
1222
1223 alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...)
1224
1225 add("math/bits", "Mul64",
1226 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1227 return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
1228 },
1229 all...)
1230 alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
1231 addF("math/bits", "Add64",
1232 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1233 return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1234 },
1235 sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
1236 alias("math/bits", "Add", "math/bits", "Add64", p8...)
1237 alias("internal/runtime/math", "Add64", "math/bits", "Add64", all...)
1238 addF("math/bits", "Sub64",
1239 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1240 return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1241 },
1242 sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
1243 alias("math/bits", "Sub", "math/bits", "Sub64", p8...)
1244 addF("math/bits", "Div64",
1245 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1246
1247 cmpZero := s.newValue2(s.ssaOp(ir.ONE, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[2], s.zeroVal(types.Types[types.TUINT64]))
1248 s.check(cmpZero, ir.Syms.Panicdivide)
1249 cmpOverflow := s.newValue2(s.ssaOp(ir.OLT, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[0], args[2])
1250 s.check(cmpOverflow, ir.Syms.Panicoverflow)
1251 return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
1252 },
1253 sys.AMD64)
1254 alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
1255
1256 alias("internal/runtime/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...)
1257 alias("internal/runtime/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...)
1258 alias("internal/runtime/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...)
1259 alias("internal/runtime/sys", "Len8", "math/bits", "Len8", all...)
1260 alias("internal/runtime/sys", "Len64", "math/bits", "Len64", all...)
1261 alias("internal/runtime/sys", "OnesCount64", "math/bits", "OnesCount64", all...)
1262
1263
1264
1265
1266 alias("sync/atomic", "LoadInt32", "internal/runtime/atomic", "Load", all...)
1267 alias("sync/atomic", "LoadInt64", "internal/runtime/atomic", "Load64", all...)
1268 alias("sync/atomic", "LoadPointer", "internal/runtime/atomic", "Loadp", all...)
1269 alias("sync/atomic", "LoadUint32", "internal/runtime/atomic", "Load", all...)
1270 alias("sync/atomic", "LoadUint64", "internal/runtime/atomic", "Load64", all...)
1271 alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load", p4...)
1272 alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load64", p8...)
1273
1274 alias("sync/atomic", "StoreInt32", "internal/runtime/atomic", "Store", all...)
1275 alias("sync/atomic", "StoreInt64", "internal/runtime/atomic", "Store64", all...)
1276
1277 alias("sync/atomic", "StoreUint32", "internal/runtime/atomic", "Store", all...)
1278 alias("sync/atomic", "StoreUint64", "internal/runtime/atomic", "Store64", all...)
1279 alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store", p4...)
1280 alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store64", p8...)
1281
1282 alias("sync/atomic", "SwapInt32", "internal/runtime/atomic", "Xchg", all...)
1283 alias("sync/atomic", "SwapInt64", "internal/runtime/atomic", "Xchg64", all...)
1284 alias("sync/atomic", "SwapUint32", "internal/runtime/atomic", "Xchg", all...)
1285 alias("sync/atomic", "SwapUint64", "internal/runtime/atomic", "Xchg64", all...)
1286 alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg", p4...)
1287 alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg64", p8...)
1288
1289 alias("sync/atomic", "CompareAndSwapInt32", "internal/runtime/atomic", "Cas", all...)
1290 alias("sync/atomic", "CompareAndSwapInt64", "internal/runtime/atomic", "Cas64", all...)
1291 alias("sync/atomic", "CompareAndSwapUint32", "internal/runtime/atomic", "Cas", all...)
1292 alias("sync/atomic", "CompareAndSwapUint64", "internal/runtime/atomic", "Cas64", all...)
1293 alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas", p4...)
1294 alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas64", p8...)
1295
1296 alias("sync/atomic", "AddInt32", "internal/runtime/atomic", "Xadd", all...)
1297 alias("sync/atomic", "AddInt64", "internal/runtime/atomic", "Xadd64", all...)
1298 alias("sync/atomic", "AddUint32", "internal/runtime/atomic", "Xadd", all...)
1299 alias("sync/atomic", "AddUint64", "internal/runtime/atomic", "Xadd64", all...)
1300 alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...)
1301 alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...)
1302
1303 alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1304 alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1305 alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1306 alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1307 alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1308 alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1309 alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1310 alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1311 alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1312 alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
1313
1314
1315 alias("math/big", "mulWW", "math/bits", "Mul64", p8...)
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332 alias("internal/runtime/maps", "bitsetFirst", "internal/runtime/sys", "TrailingZeros64", sys.ArchAMD64)
1333
1334 addF("internal/runtime/maps", "bitsetRemoveBelow",
1335 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1336 b := args[0]
1337 i := args[1]
1338
1339
1340
1341
1342
1343 one := s.constInt64(types.Types[types.TUINT64], 1)
1344
1345 mask := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT64], one, i)
1346 mask = s.newValue2(ssa.OpSub64, types.Types[types.TUINT64], mask, one)
1347 mask = s.newValue1(ssa.OpCom64, types.Types[types.TUINT64], mask)
1348
1349 return s.newValue2(ssa.OpAnd64, types.Types[types.TUINT64], b, mask)
1350 },
1351 sys.AMD64)
1352
1353 addF("internal/runtime/maps", "bitsetLowestSet",
1354 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1355 b := args[0]
1356
1357
1358
1359
1360
1361 one := s.constInt64(types.Types[types.TUINT64], 1)
1362 and := s.newValue2(ssa.OpAnd64, types.Types[types.TUINT64], b, one)
1363 return s.newValue2(ssa.OpEq64, types.Types[types.TBOOL], and, one)
1364 },
1365 sys.AMD64)
1366
1367 addF("internal/runtime/maps", "bitsetShiftOutLowest",
1368 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1369 b := args[0]
1370
1371
1372
1373
1374
1375 one := s.constInt64(types.Types[types.TUINT64], 1)
1376 return s.newValue2(ssa.OpRsh64Ux64, types.Types[types.TUINT64], b, one)
1377 },
1378 sys.AMD64)
1379
1380 addF("internal/runtime/maps", "ctrlGroupMatchH2",
1381 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1382 g := args[0]
1383 h := args[1]
1384
1385
1386
1387 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1388 hfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, h)
1389
1390
1391 var broadcast *ssa.Value
1392 if buildcfg.GOAMD64 >= 4 {
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403 broadcast = s.newValue1(ssa.OpAMD64VPBROADCASTB, types.TypeInt128, h)
1404 } else if buildcfg.GOAMD64 >= 2 {
1405
1406
1407 broadcast = s.newValue1(ssa.OpAMD64PSHUFBbroadcast, types.TypeInt128, hfp)
1408 } else {
1409
1410
1411
1412
1413
1414
1415
1416 unpack := s.newValue2(ssa.OpAMD64PUNPCKLBW, types.TypeInt128, hfp, hfp)
1417
1418
1419
1420
1421
1422
1423 broadcast = s.newValue1I(ssa.OpAMD64PSHUFLW, types.TypeInt128, 0, unpack)
1424
1425
1426
1427 }
1428
1429
1430
1431 eq := s.newValue2(ssa.OpAMD64PCMPEQB, types.TypeInt128, broadcast, gfp)
1432
1433
1434
1435
1436
1437
1438
1439
1440 out := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT8], eq)
1441
1442
1443
1444
1445
1446 ret := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1447
1448 return ret
1449 },
1450 sys.AMD64)
1451
1452 addF("internal/runtime/maps", "ctrlGroupMatchEmpty",
1453 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1454
1455
1456
1457
1458 g := args[0]
1459
1460
1461
1462 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1463
1464 if buildcfg.GOAMD64 >= 2 {
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489 sign := s.newValue2(ssa.OpAMD64PSIGNB, types.TypeInt128, gfp, gfp)
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 ret := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT64], sign)
1501
1502
1503
1504
1505
1506
1507 return ret
1508 }
1509
1510
1511
1512
1513 var ctrlsEmpty uint64 = abi.MapCtrlEmpty
1514 e := s.constInt64(types.Types[types.TUINT64], int64(ctrlsEmpty))
1515
1516
1517 efp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, e)
1518
1519
1520
1521 eq := s.newValue2(ssa.OpAMD64PCMPEQB, types.TypeInt128, efp, gfp)
1522
1523
1524
1525
1526
1527
1528
1529
1530 out := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT8], eq)
1531
1532
1533
1534
1535
1536
1537 return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1538 },
1539 sys.AMD64)
1540
1541 addF("internal/runtime/maps", "ctrlGroupMatchEmptyOrDeleted",
1542 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1543
1544
1545
1546
1547
1548
1549
1550 g := args[0]
1551
1552
1553
1554 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564 ret := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT64], gfp)
1565
1566
1567
1568
1569
1570 return ret
1571 },
1572 sys.AMD64)
1573
1574 addF("internal/runtime/maps", "ctrlGroupMatchFull",
1575 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1576
1577
1578
1579
1580
1581
1582 g := args[0]
1583
1584
1585
1586 gfp := s.newValue1(ssa.OpAMD64MOVQi2f, types.TypeInt128, g)
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596 mask := s.newValue1(ssa.OpAMD64PMOVMSKB, types.Types[types.TUINT8], gfp)
1597
1598
1599 out := s.newValue1(ssa.OpCom8, types.Types[types.TUINT8], mask)
1600
1601
1602
1603
1604 return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
1605 },
1606 sys.AMD64)
1607
1608
1609
1610
1611 hasCMOV := []*sys.Arch{sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm}
1612 if cfg.goriscv64 >= 23 {
1613 hasCMOV = append(hasCMOV, sys.ArchRISCV64)
1614 }
1615 add("crypto/internal/constanttime", "Select",
1616 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1617 v, x, y := args[0], args[1], args[2]
1618
1619 var checkOp ssa.Op
1620 var zero *ssa.Value
1621 switch s.config.PtrSize {
1622 case 8:
1623 checkOp = ssa.OpNeq64
1624 zero = s.constInt64(types.Types[types.TINT], 0)
1625 case 4:
1626 checkOp = ssa.OpNeq32
1627 zero = s.constInt32(types.Types[types.TINT], 0)
1628 default:
1629 panic("unreachable")
1630 }
1631 check := s.newValue2(checkOp, types.Types[types.TBOOL], zero, v)
1632
1633 return s.newValue3(ssa.OpCondSelect, types.Types[types.TINT], x, y, check)
1634 }, hasCMOV...)
1635 add("crypto/internal/constanttime", "boolToUint8",
1636 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1637 return s.newValue1(ssa.OpCvtBoolToUint8, types.Types[types.TUINT8], args[0])
1638 },
1639 all...)
1640
1641 if buildcfg.Experiment.SIMD {
1642
1643 simdIntrinsics(addF)
1644
1645 addF(simdPackage, "ClearAVXUpperBits",
1646 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1647 s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
1648 return nil
1649 },
1650 sys.AMD64)
1651
1652 addF(simdPackage, "Int8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1653 addF(simdPackage, "Int16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1654 addF(simdPackage, "Int32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1655 addF(simdPackage, "Int64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1656 addF(simdPackage, "Uint8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1657 addF(simdPackage, "Uint16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1658 addF(simdPackage, "Uint32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1659 addF(simdPackage, "Uint64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1660 addF(simdPackage, "Int8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1661 addF(simdPackage, "Int16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1662 addF(simdPackage, "Int32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1663 addF(simdPackage, "Int64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1664 addF(simdPackage, "Uint8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1665 addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1666 addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1667 addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
1668 addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64)
1669 addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64)
1670 addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64)
1671 addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64)
1672 addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64)
1673 addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64)
1674
1675
1676 sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
1677 addF(simdPackage, method,
1678 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1679 x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
1680 if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
1681 z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
1682 if z != nil {
1683 return z
1684 }
1685 }
1686 return s.callResult(n, callNormal)
1687 },
1688 sys.AMD64)
1689 }
1690
1691 sfp4("Int32x4.SelectFromPair", ssa.OpconcatSelectedConstantInt32x4, types.TypeVec128)
1692 sfp4("Uint32x4.SelectFromPair", ssa.OpconcatSelectedConstantUint32x4, types.TypeVec128)
1693 sfp4("Float32x4.SelectFromPair", ssa.OpconcatSelectedConstantFloat32x4, types.TypeVec128)
1694
1695 sfp4("Int32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x8, types.TypeVec256)
1696 sfp4("Uint32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x8, types.TypeVec256)
1697 sfp4("Float32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x8, types.TypeVec256)
1698
1699 sfp4("Int32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x16, types.TypeVec512)
1700 sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
1701 sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
1702
1703
1704 sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
1705 addF(simdPackage, method,
1706 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1707 x, a, b, y := args[0], args[1], args[2], args[3]
1708 if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
1709 z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
1710 if z != nil {
1711 return z
1712 }
1713 }
1714 return s.callResult(n, callNormal)
1715 },
1716 sys.AMD64)
1717 }
1718
1719 sfp2("Uint64x2.SelectFromPair", ssa.OpconcatSelectedConstantUint64x2, types.TypeVec128, cscimm2)
1720 sfp2("Int64x2.SelectFromPair", ssa.OpconcatSelectedConstantInt64x2, types.TypeVec128, cscimm2)
1721 sfp2("Float64x2.SelectFromPair", ssa.OpconcatSelectedConstantFloat64x2, types.TypeVec128, cscimm2)
1722
1723 sfp2("Uint64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x4, types.TypeVec256, cscimm2g2)
1724 sfp2("Int64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x4, types.TypeVec256, cscimm2g2)
1725 sfp2("Float64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x4, types.TypeVec256, cscimm2g2)
1726
1727 sfp2("Uint64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x8, types.TypeVec512, cscimm2g4)
1728 sfp2("Int64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x8, types.TypeVec512, cscimm2g4)
1729 sfp2("Float64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x8, types.TypeVec512, cscimm2g4)
1730
1731 }
1732 }
1733
1734 func cscimm4(a, b, c, d uint8) int64 {
1735 return se(a + b<<2 + c<<4 + d<<6)
1736 }
1737
1738 func cscimm2(a, b uint8) int64 {
1739 return se(a + b<<1)
1740 }
1741
1742 func cscimm2g2(a, b uint8) int64 {
1743 g := cscimm2(a, b)
1744 return int64(int8(g + g<<2))
1745 }
1746
1747 func cscimm2g4(a, b uint8) int64 {
1748 g := cscimm2g2(a, b)
1749 return int64(int8(g + g<<4))
1750 }
1751
1752 const (
1753 _LLLL = iota
1754 _HLLL
1755 _LHLL
1756 _HHLL
1757 _LLHL
1758 _HLHL
1759 _LHHL
1760 _HHHL
1761 _LLLH
1762 _HLLH
1763 _LHLH
1764 _HHLH
1765 _LLHH
1766 _HLHH
1767 _LHHH
1768 _HHHH
1769 )
1770
1771 const (
1772 _LL = iota
1773 _HL
1774 _LH
1775 _HH
1776 )
1777
1778 func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
1779 a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
1780 if a > 3 || b > 3 {
1781 return nil
1782 }
1783 pattern := (a&2)>>1 + (b & 2)
1784 a, b = a&1, b&1
1785
1786 switch pattern {
1787 case _LL:
1788 return s.newValue2I(op, t, csc(a, b), x, x)
1789 case _HH:
1790 return s.newValue2I(op, t, csc(a, b), y, y)
1791 case _LH:
1792 return s.newValue2I(op, t, csc(a, b), x, y)
1793 case _HL:
1794 return s.newValue2I(op, t, csc(a, b), y, x)
1795 }
1796 panic("The preceding switch should have been exhaustive")
1797 }
1798
1799 func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
1800 a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
1801 if a > 7 || b > 7 || c > 7 || d > 7 {
1802 return nil
1803 }
1804 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
1805
1806 a, b, c, d = a&3, b&3, c&3, d&3
1807
1808 switch pattern {
1809 case _LLLL:
1810
1811 return s.newValue2I(op, t, cscimm4(a, b, c, d), x, x)
1812 case _HHHH:
1813
1814 return s.newValue2I(op, t, cscimm4(a, b, c, d), y, y)
1815 case _LLHH:
1816 return s.newValue2I(op, t, cscimm4(a, b, c, d), x, y)
1817 case _HHLL:
1818 return s.newValue2I(op, t, cscimm4(a, b, c, d), y, x)
1819
1820 case _HLLL:
1821 z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
1822 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
1823 case _LHLL:
1824 z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
1825 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
1826 case _HLHH:
1827 z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
1828 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
1829 case _LHHH:
1830 z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
1831 return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
1832
1833 case _LLLH:
1834 z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
1835 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
1836 case _LLHL:
1837 z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
1838 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
1839
1840 case _HHLH:
1841 z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
1842 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
1843
1844 case _HHHL:
1845 z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
1846 return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
1847
1848 case _LHLH:
1849 z := s.newValue2I(op, t, cscimm4(a, c, b, d), x, y)
1850 return s.newValue2I(op, t, se(0b11_01_10_00), z, z)
1851 case _HLHL:
1852 z := s.newValue2I(op, t, cscimm4(b, d, a, c), x, y)
1853 return s.newValue2I(op, t, se(0b01_11_00_10), z, z)
1854 case _HLLH:
1855 z := s.newValue2I(op, t, cscimm4(b, c, a, d), x, y)
1856 return s.newValue2I(op, t, se(0b11_01_00_10), z, z)
1857 case _LHHL:
1858 z := s.newValue2I(op, t, cscimm4(a, d, b, c), x, y)
1859 return s.newValue2I(op, t, se(0b01_11_10_00), z, z)
1860 }
1861 panic("The preceding switch should have been exhaustive")
1862 }
1863
1864
1865
1866 func se(x uint8) int64 {
1867 return int64(int8(x))
1868 }
1869
1870 func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1871 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1872 return s.newValue1(op, t, args[0])
1873 }
1874 }
1875
1876 func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1877 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1878 return s.newValue2(op, t, args[0], args[1])
1879 }
1880 }
1881
1882 func opLen2_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1883 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1884 return s.newValue2(op, t, args[1], args[0])
1885 }
1886 }
1887
1888 func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1889 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1890 return s.newValue3(op, t, args[0], args[1], args[2])
1891 }
1892 }
1893
1894 var ssaVecBySize = map[int64]*types.Type{
1895 16: types.TypeVec128,
1896 32: types.TypeVec256,
1897 64: types.TypeVec512,
1898 }
1899
1900 func opLen3_31Zero3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1901 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1902 if t, ok := ssaVecBySize[args[1].Type.Size()]; !ok {
1903 panic("unknown simd vector size")
1904 } else {
1905 return s.newValue3(op, t, s.newValue0(ssa.OpZeroSIMD, t), args[1], args[0])
1906 }
1907 }
1908 }
1909
1910 func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1911 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1912 return s.newValue3(op, t, args[1], args[0], args[2])
1913 }
1914 }
1915
1916 func opLen3_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1917 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1918 return s.newValue3(op, t, args[2], args[0], args[1])
1919 }
1920 }
1921
1922 func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1923 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1924 return s.newValue4(op, t, args[0], args[1], args[2], args[3])
1925 }
1926 }
1927
1928 func opLen4_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1929 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1930 return s.newValue4(op, t, args[2], args[0], args[1], args[3])
1931 }
1932 }
1933
1934 func opLen4_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1935 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1936 return s.newValue4(op, t, args[2], args[1], args[0], args[3])
1937 }
1938 }
1939
1940 func immJumpTable(s *state, idx *ssa.Value, intrinsicCall *ir.CallExpr, genOp func(*state, int)) *ssa.Value {
1941
1942 bEnd := s.f.NewBlock(ssa.BlockPlain)
1943
1944 if !idx.Type.IsKind(types.TUINT8) {
1945 panic("immJumpTable expects uint8 value")
1946 }
1947
1948
1949 t := types.Types[types.TUINTPTR]
1950 idx = s.conv(nil, idx, idx.Type, t)
1951
1952 b := s.curBlock
1953 b.Kind = ssa.BlockJumpTable
1954 b.Pos = intrinsicCall.Pos()
1955 if base.Flag.Cfg.SpectreIndex {
1956
1957 idx = s.newValue2(ssa.OpSpectreSliceIndex, t, idx, s.uintptrConstant(255))
1958 }
1959 b.SetControl(idx)
1960 targets := [256]*ssa.Block{}
1961 for i := range 256 {
1962 t := s.f.NewBlock(ssa.BlockPlain)
1963 targets[i] = t
1964 b.AddEdgeTo(t)
1965 }
1966 s.endBlock()
1967
1968 for i, t := range targets {
1969 s.startBlock(t)
1970 genOp(s, i)
1971 if t.Kind != ssa.BlockExit {
1972 t.AddEdgeTo(bEnd)
1973 }
1974 s.endBlock()
1975 }
1976
1977 s.startBlock(bEnd)
1978 ret := s.variable(intrinsicCall, intrinsicCall.Type())
1979 return ret
1980 }
1981
1982 func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1983 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1984 if args[1].Op == ssa.OpConst8 {
1985 return s.newValue1I(op, t, args[1].AuxInt<<int64(offset), args[0])
1986 }
1987 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
1988
1989 s.vars[n] = sNew.newValue1I(op, t, int64(int8(idx<<offset)), args[0])
1990 })
1991 }
1992 }
1993
1994 func opLen2Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1995 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
1996 if args[1].Op == ssa.OpConst8 {
1997 return s.newValue2I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2])
1998 }
1999 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2000
2001 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset)), args[0], args[2])
2002 })
2003 }
2004 }
2005
2006 func opLen3Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2007 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2008 if args[1].Op == ssa.OpConst8 {
2009 return s.newValue3I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3])
2010 }
2011 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2012
2013 s.vars[n] = sNew.newValue3I(op, t, int64(int8(idx<<offset)), args[0], args[2], args[3])
2014 })
2015 }
2016 }
2017
2018 func opLen2Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2019 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2020 if args[2].Op == ssa.OpConst8 {
2021 return s.newValue2I(op, t, args[2].AuxInt<<int64(offset), args[0], args[1])
2022 }
2023 return immJumpTable(s, args[2], n, func(sNew *state, idx int) {
2024
2025 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset)), args[0], args[1])
2026 })
2027 }
2028 }
2029
2030
2031 func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2032 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2033 if args[1].Op == ssa.OpConst8 && args[2].Op == ssa.OpConst8 && args[1].AuxInt & ^3 == 0 && args[2].AuxInt & ^3 == 0 {
2034 i1, i2 := args[1].AuxInt, args[2].AuxInt
2035 return s.newValue2I(op, t, int64(int8(i1+i2<<4)), args[0], args[3])
2036 }
2037 four := s.constInt64(types.Types[types.TUINT8], 4)
2038 shifted := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT8], args[2], four)
2039 combined := s.newValue2(ssa.OpAdd8, types.Types[types.TUINT8], args[1], shifted)
2040 return immJumpTable(s, combined, n, func(sNew *state, idx int) {
2041
2042
2043 if idx & ^(3+3<<4) == 0 {
2044 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx)), args[0], args[3])
2045 } else {
2046 sNew.rtcall(ir.Syms.PanicSimdImm, false, nil)
2047 }
2048 })
2049 }
2050 }
2051
2052
2053 func opLen2Imm8_SHA1RNDS4(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2054 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2055 if args[1].Op == ssa.OpConst8 {
2056 return s.newValue2I(op, t, (args[1].AuxInt<<int64(offset))&0b11, args[0], args[2])
2057 }
2058 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2059
2060 s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx<<offset))&0b11, args[0], args[2])
2061 })
2062 }
2063 }
2064
2065 func opLen3Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2066 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2067 if args[2].Op == ssa.OpConst8 {
2068 return s.newValue3I(op, t, args[2].AuxInt<<int64(offset), args[0], args[1], args[3])
2069 }
2070 return immJumpTable(s, args[2], n, func(sNew *state, idx int) {
2071
2072 s.vars[n] = sNew.newValue3I(op, t, int64(int8(idx<<offset)), args[0], args[1], args[3])
2073 })
2074 }
2075 }
2076
2077 func opLen4Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2078 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2079 if args[1].Op == ssa.OpConst8 {
2080 return s.newValue4I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3], args[4])
2081 }
2082 return immJumpTable(s, args[1], n, func(sNew *state, idx int) {
2083
2084 s.vars[n] = sNew.newValue4I(op, t, int64(int8(idx<<offset)), args[0], args[2], args[3], args[4])
2085 })
2086 }
2087 }
2088
2089 func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2090 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2091 return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem())
2092 }
2093 }
2094
2095 func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2096 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2097 s.store(args[0].Type, args[1], args[0])
2098 return nil
2099 }
2100 }
2101
2102 var cvtVToMaskOpcodes = map[int]map[int]ssa.Op{
2103 8: {16: ssa.OpCvt16toMask8x16, 32: ssa.OpCvt32toMask8x32, 64: ssa.OpCvt64toMask8x64},
2104 16: {8: ssa.OpCvt8toMask16x8, 16: ssa.OpCvt16toMask16x16, 32: ssa.OpCvt32toMask16x32},
2105 32: {4: ssa.OpCvt8toMask32x4, 8: ssa.OpCvt8toMask32x8, 16: ssa.OpCvt16toMask32x16},
2106 64: {2: ssa.OpCvt8toMask64x2, 4: ssa.OpCvt8toMask64x4, 8: ssa.OpCvt8toMask64x8},
2107 }
2108
2109 var cvtMaskToVOpcodes = map[int]map[int]ssa.Op{
2110 8: {16: ssa.OpCvtMask8x16to16, 32: ssa.OpCvtMask8x32to32, 64: ssa.OpCvtMask8x64to64},
2111 16: {8: ssa.OpCvtMask16x8to8, 16: ssa.OpCvtMask16x16to16, 32: ssa.OpCvtMask16x32to32},
2112 32: {4: ssa.OpCvtMask32x4to8, 8: ssa.OpCvtMask32x8to8, 16: ssa.OpCvtMask32x16to16},
2113 64: {2: ssa.OpCvtMask64x2to8, 4: ssa.OpCvtMask64x4to8, 8: ssa.OpCvtMask64x8to8},
2114 }
2115
2116 func simdCvtVToMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2117 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2118 op := cvtVToMaskOpcodes[elemBits][lanes]
2119 if op == 0 {
2120 panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
2121 }
2122 return s.newValue1(op, types.TypeMask, args[0])
2123 }
2124 }
2125
2126 func simdCvtMaskToV(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2127 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2128 op := cvtMaskToVOpcodes[elemBits][lanes]
2129 if op == 0 {
2130 panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
2131 }
2132 return s.newValue1(op, n.Type(), args[0])
2133 }
2134 }
2135
2136 func simdMaskedLoad(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2137 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2138 return s.newValue3(op, n.Type(), args[0], args[1], s.mem())
2139 }
2140 }
2141
2142 func simdMaskedStore(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2143 return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
2144 s.vars[memVar] = s.newValue4A(op, types.TypeMem, args[0].Type, args[1], args[2], args[0], s.mem())
2145 return nil
2146 }
2147 }
2148
2149
2150
2151 func findIntrinsic(sym *types.Sym) intrinsicBuilder {
2152 if sym == nil || sym.Pkg == nil {
2153 return nil
2154 }
2155 pkg := sym.Pkg.Path
2156 if sym.Pkg == ir.Pkgs.Runtime {
2157 pkg = "runtime"
2158 }
2159 if base.Flag.Race && pkg == "sync/atomic" {
2160
2161
2162 return nil
2163 }
2164
2165
2166 if Arch.SoftFloat && pkg == "math" {
2167 return nil
2168 }
2169
2170 fn := sym.Name
2171 if ssa.IntrinsicsDisable {
2172 if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GetCallerSP" || fn == "GetClosurePtr") ||
2173 pkg == simdPackage {
2174
2175 } else {
2176 return nil
2177 }
2178 }
2179 return intrinsics.lookup(Arch.LinkArch.Arch, pkg, fn)
2180 }
2181
2182 func IsIntrinsicCall(n *ir.CallExpr) bool {
2183 if n == nil {
2184 return false
2185 }
2186 name, ok := n.Fun.(*ir.Name)
2187 if !ok {
2188 if n.Fun.Op() == ir.OMETHEXPR {
2189 if meth := ir.MethodExprName(n.Fun); meth != nil {
2190 if fn := meth.Func; fn != nil {
2191 return IsIntrinsicSym(fn.Sym())
2192 }
2193 }
2194 }
2195 return false
2196 }
2197 return IsIntrinsicSym(name.Sym())
2198 }
2199
2200 func IsIntrinsicSym(sym *types.Sym) bool {
2201 return findIntrinsic(sym) != nil
2202 }
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212 func GenIntrinsicBody(fn *ir.Func) {
2213 if ir.CurFunc != nil {
2214 base.FatalfAt(fn.Pos(), "enqueueFunc %v inside %v", fn, ir.CurFunc)
2215 }
2216
2217 if base.Flag.LowerR != 0 {
2218 fmt.Println("generate intrinsic for", ir.FuncName(fn))
2219 }
2220
2221 pos := fn.Pos()
2222 ft := fn.Type()
2223 var ret ir.Node
2224
2225
2226
2227
2228
2229
2230
2231
2232 call := ir.NewCallExpr(pos, ir.OCALLFUNC, fn.Nname, nil)
2233 call.Args = ir.RecvParamNames(ft)
2234 call.IsDDD = ft.IsVariadic()
2235 typecheck.Exprs(call.Args)
2236 call.SetTypecheck(1)
2237 call.SetWalked(true)
2238 ret = call
2239 if ft.NumResults() > 0 {
2240 if ft.NumResults() == 1 {
2241 call.SetType(ft.Result(0).Type)
2242 } else {
2243 call.SetType(ft.ResultsTuple())
2244 }
2245 n := ir.NewReturnStmt(base.Pos, nil)
2246 n.Results = []ir.Node{call}
2247 ret = n
2248 }
2249 fn.Body.Append(ret)
2250
2251 if base.Flag.LowerR != 0 {
2252 ir.DumpList("generate intrinsic body", fn.Body)
2253 }
2254
2255 ir.CurFunc = fn
2256 typecheck.Stmts(fn.Body)
2257 ir.CurFunc = nil
2258 }
2259
View as plain text