rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  // mergeSym merges two symbolic offsets. There is no real merging of
   250  // offsets, we just pick the non-nil one.
   251  func mergeSym(x, y Sym) Sym {
   252  	if x == nil {
   253  		return y
   254  	}
   255  	if y == nil {
   256  		return x
   257  	}
   258  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   259  }
   260  
   261  func canMergeSym(x, y Sym) bool {
   262  	return x == nil || y == nil
   263  }
   264  
   265  // canMergeLoadClobber reports whether the load can be merged into target without
   266  // invalidating the schedule.
   267  // It also checks that the other non-load argument x is something we
   268  // are ok with clobbering.
   269  func canMergeLoadClobber(target, load, x *Value) bool {
   270  	// The register containing x is going to get clobbered.
   271  	// Don't merge if we still need the value of x.
   272  	// We don't have liveness information here, but we can
   273  	// approximate x dying with:
   274  	//  1) target is x's only use.
   275  	//  2) target is not in a deeper loop than x.
   276  	if x.Uses != 1 {
   277  		return false
   278  	}
   279  	loopnest := x.Block.Func.loopnest()
   280  	loopnest.calculateDepths()
   281  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   282  		return false
   283  	}
   284  	return canMergeLoad(target, load)
   285  }
   286  
   287  // canMergeLoad reports whether the load can be merged into target without
   288  // invalidating the schedule.
   289  func canMergeLoad(target, load *Value) bool {
   290  	if target.Block.ID != load.Block.ID {
   291  		// If the load is in a different block do not merge it.
   292  		return false
   293  	}
   294  
   295  	// We can't merge the load into the target if the load
   296  	// has more than one use.
   297  	if load.Uses != 1 {
   298  		return false
   299  	}
   300  
   301  	mem := load.MemoryArg()
   302  
   303  	// We need the load's memory arg to still be alive at target. That
   304  	// can't be the case if one of target's args depends on a memory
   305  	// state that is a successor of load's memory arg.
   306  	//
   307  	// For example, it would be invalid to merge load into target in
   308  	// the following situation because newmem has killed oldmem
   309  	// before target is reached:
   310  	//     load = read ... oldmem
   311  	//   newmem = write ... oldmem
   312  	//     arg0 = read ... newmem
   313  	//   target = add arg0 load
   314  	//
   315  	// If the argument comes from a different block then we can exclude
   316  	// it immediately because it must dominate load (which is in the
   317  	// same block as target).
   318  	var args []*Value
   319  	for _, a := range target.Args {
   320  		if a != load && a.Block.ID == target.Block.ID {
   321  			args = append(args, a)
   322  		}
   323  	}
   324  
   325  	// memPreds contains memory states known to be predecessors of load's
   326  	// memory state. It is lazily initialized.
   327  	var memPreds map[*Value]bool
   328  	for i := 0; len(args) > 0; i++ {
   329  		const limit = 100
   330  		if i >= limit {
   331  			// Give up if we have done a lot of iterations.
   332  			return false
   333  		}
   334  		v := args[len(args)-1]
   335  		args = args[:len(args)-1]
   336  		if target.Block.ID != v.Block.ID {
   337  			// Since target and load are in the same block
   338  			// we can stop searching when we leave the block.
   339  			continue
   340  		}
   341  		if v.Op == OpPhi {
   342  			// A Phi implies we have reached the top of the block.
   343  			// The memory phi, if it exists, is always
   344  			// the first logical store in the block.
   345  			continue
   346  		}
   347  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   348  			// We could handle this situation however it is likely
   349  			// to be very rare.
   350  			return false
   351  		}
   352  		if v.Op.SymEffect()&SymAddr != 0 {
   353  			// This case prevents an operation that calculates the
   354  			// address of a local variable from being forced to schedule
   355  			// before its corresponding VarDef.
   356  			// See issue 28445.
   357  			//   v1 = LOAD ...
   358  			//   v2 = VARDEF
   359  			//   v3 = LEAQ
   360  			//   v4 = CMPQ v1 v3
   361  			// We don't want to combine the CMPQ with the load, because
   362  			// that would force the CMPQ to schedule before the VARDEF, which
   363  			// in turn requires the LEAQ to schedule before the VARDEF.
   364  			return false
   365  		}
   366  		if v.Type.IsMemory() {
   367  			if memPreds == nil {
   368  				// Initialise a map containing memory states
   369  				// known to be predecessors of load's memory
   370  				// state.
   371  				memPreds = make(map[*Value]bool)
   372  				m := mem
   373  				const limit = 50
   374  				for i := 0; i < limit; i++ {
   375  					if m.Op == OpPhi {
   376  						// The memory phi, if it exists, is always
   377  						// the first logical store in the block.
   378  						break
   379  					}
   380  					if m.Block.ID != target.Block.ID {
   381  						break
   382  					}
   383  					if !m.Type.IsMemory() {
   384  						break
   385  					}
   386  					memPreds[m] = true
   387  					if len(m.Args) == 0 {
   388  						break
   389  					}
   390  					m = m.MemoryArg()
   391  				}
   392  			}
   393  
   394  			// We can merge if v is a predecessor of mem.
   395  			//
   396  			// For example, we can merge load into target in the
   397  			// following scenario:
   398  			//      x = read ... v
   399  			//    mem = write ... v
   400  			//   load = read ... mem
   401  			// target = add x load
   402  			if memPreds[v] {
   403  				continue
   404  			}
   405  			return false
   406  		}
   407  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   408  			// If v takes mem as an input then we know mem
   409  			// is valid at this point.
   410  			continue
   411  		}
   412  		for _, a := range v.Args {
   413  			if target.Block.ID == a.Block.ID {
   414  				args = append(args, a)
   415  			}
   416  		}
   417  	}
   418  
   419  	return true
   420  }
   421  
   422  // isSameCall reports whether sym is the same as the given named symbol.
   423  func isSameCall(sym interface{}, name string) bool {
   424  	fn := sym.(*AuxCall).Fn
   425  	return fn != nil && fn.String() == name
   426  }
   427  
   428  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   429  func canLoadUnaligned(c *Config) bool {
   430  	return c.ctxt.Arch.Alignment == 1
   431  }
   432  
   433  // nlzX returns the number of leading zeros.
   434  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   435  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   436  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   437  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   438  
   439  // ntzX returns the number of trailing zeros.
   440  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   441  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   442  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   443  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   444  
   445  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   446  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   447  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   448  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   449  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   450  
   451  // nto returns the number of trailing ones.
   452  func nto(x int64) int64 {
   453  	return int64(ntz64(^x))
   454  }
   455  
   456  // logX returns logarithm of n base 2.
   457  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   458  func log8(n int8) int64 {
   459  	return int64(bits.Len8(uint8(n))) - 1
   460  }
   461  func log16(n int16) int64 {
   462  	return int64(bits.Len16(uint16(n))) - 1
   463  }
   464  func log32(n int32) int64 {
   465  	return int64(bits.Len32(uint32(n))) - 1
   466  }
   467  func log64(n int64) int64 {
   468  	return int64(bits.Len64(uint64(n))) - 1
   469  }
   470  
   471  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   472  // Rounds down.
   473  func log2uint32(n int64) int64 {
   474  	return int64(bits.Len32(uint32(n))) - 1
   475  }
   476  
   477  // isPowerOfTwoX functions report whether n is a power of 2.
   478  func isPowerOfTwo8(n int8) bool {
   479  	return n > 0 && n&(n-1) == 0
   480  }
   481  func isPowerOfTwo16(n int16) bool {
   482  	return n > 0 && n&(n-1) == 0
   483  }
   484  func isPowerOfTwo32(n int32) bool {
   485  	return n > 0 && n&(n-1) == 0
   486  }
   487  func isPowerOfTwo64(n int64) bool {
   488  	return n > 0 && n&(n-1) == 0
   489  }
   490  
   491  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   492  func isUint64PowerOfTwo(in int64) bool {
   493  	n := uint64(in)
   494  	return n > 0 && n&(n-1) == 0
   495  }
   496  
   497  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   498  func isUint32PowerOfTwo(in int64) bool {
   499  	n := uint64(uint32(in))
   500  	return n > 0 && n&(n-1) == 0
   501  }
   502  
   503  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   504  func is32Bit(n int64) bool {
   505  	return n == int64(int32(n))
   506  }
   507  
   508  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   509  func is16Bit(n int64) bool {
   510  	return n == int64(int16(n))
   511  }
   512  
   513  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   514  func is8Bit(n int64) bool {
   515  	return n == int64(int8(n))
   516  }
   517  
   518  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   519  func isU8Bit(n int64) bool {
   520  	return n == int64(uint8(n))
   521  }
   522  
   523  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   524  func isU12Bit(n int64) bool {
   525  	return 0 <= n && n < (1<<12)
   526  }
   527  
   528  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   529  func isU16Bit(n int64) bool {
   530  	return n == int64(uint16(n))
   531  }
   532  
   533  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   534  func isU32Bit(n int64) bool {
   535  	return n == int64(uint32(n))
   536  }
   537  
   538  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   539  func is20Bit(n int64) bool {
   540  	return -(1<<19) <= n && n < (1<<19)
   541  }
   542  
   543  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   544  func b2i(b bool) int64 {
   545  	if b {
   546  		return 1
   547  	}
   548  	return 0
   549  }
   550  
   551  // b2i32 translates a boolean value to 0 or 1.
   552  func b2i32(b bool) int32 {
   553  	if b {
   554  		return 1
   555  	}
   556  	return 0
   557  }
   558  
   559  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   560  // A shift is bounded if it is shifting by less than the width of the shifted value.
   561  func shiftIsBounded(v *Value) bool {
   562  	return v.AuxInt != 0
   563  }
   564  
   565  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   566  // generated code as much as possible.
   567  func canonLessThan(x, y *Value) bool {
   568  	if x.Op != y.Op {
   569  		return x.Op < y.Op
   570  	}
   571  	if !x.Pos.SameFileAndLine(y.Pos) {
   572  		return x.Pos.Before(y.Pos)
   573  	}
   574  	return x.ID < y.ID
   575  }
   576  
   577  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   578  // of the mantissa. It will panic if the truncation results in lost information.
   579  func truncate64Fto32F(f float64) float32 {
   580  	if !isExactFloat32(f) {
   581  		panic("truncate64Fto32F: truncation is not exact")
   582  	}
   583  	if !math.IsNaN(f) {
   584  		return float32(f)
   585  	}
   586  	// NaN bit patterns aren't necessarily preserved across conversion
   587  	// instructions so we need to do the conversion manually.
   588  	b := math.Float64bits(f)
   589  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   590  	//          | sign                  | exponent   | mantissa       |
   591  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   592  	return math.Float32frombits(r)
   593  }
   594  
   595  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   596  // pattern of the mantissa.
   597  func extend32Fto64F(f float32) float64 {
   598  	if !math.IsNaN(float64(f)) {
   599  		return float64(f)
   600  	}
   601  	// NaN bit patterns aren't necessarily preserved across conversion
   602  	// instructions so we need to do the conversion manually.
   603  	b := uint64(math.Float32bits(f))
   604  	//   | sign                  | exponent      | mantissa                    |
   605  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   606  	return math.Float64frombits(r)
   607  }
   608  
   609  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   610  func DivisionNeedsFixUp(v *Value) bool {
   611  	return v.AuxInt == 0
   612  }
   613  
   614  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   615  func auxFrom64F(f float64) int64 {
   616  	if f != f {
   617  		panic("can't encode a NaN in AuxInt field")
   618  	}
   619  	return int64(math.Float64bits(f))
   620  }
   621  
   622  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   623  func auxFrom32F(f float32) int64 {
   624  	if f != f {
   625  		panic("can't encode a NaN in AuxInt field")
   626  	}
   627  	return int64(math.Float64bits(extend32Fto64F(f)))
   628  }
   629  
   630  // auxTo32F decodes a float32 from the AuxInt value provided.
   631  func auxTo32F(i int64) float32 {
   632  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   633  }
   634  
   635  // auxTo64F decodes a float64 from the AuxInt value provided.
   636  func auxTo64F(i int64) float64 {
   637  	return math.Float64frombits(uint64(i))
   638  }
   639  
   640  func auxIntToBool(i int64) bool {
   641  	if i == 0 {
   642  		return false
   643  	}
   644  	return true
   645  }
   646  func auxIntToInt8(i int64) int8 {
   647  	return int8(i)
   648  }
   649  func auxIntToInt16(i int64) int16 {
   650  	return int16(i)
   651  }
   652  func auxIntToInt32(i int64) int32 {
   653  	return int32(i)
   654  }
   655  func auxIntToInt64(i int64) int64 {
   656  	return i
   657  }
   658  func auxIntToUint8(i int64) uint8 {
   659  	return uint8(i)
   660  }
   661  func auxIntToFloat32(i int64) float32 {
   662  	return float32(math.Float64frombits(uint64(i)))
   663  }
   664  func auxIntToFloat64(i int64) float64 {
   665  	return math.Float64frombits(uint64(i))
   666  }
   667  func auxIntToValAndOff(i int64) ValAndOff {
   668  	return ValAndOff(i)
   669  }
   670  func auxIntToArm64BitField(i int64) arm64BitField {
   671  	return arm64BitField(i)
   672  }
   673  func auxIntToInt128(x int64) int128 {
   674  	if x != 0 {
   675  		panic("nonzero int128 not allowed")
   676  	}
   677  	return 0
   678  }
   679  func auxIntToFlagConstant(x int64) flagConstant {
   680  	return flagConstant(x)
   681  }
   682  
   683  func auxIntToOp(cc int64) Op {
   684  	return Op(cc)
   685  }
   686  
   687  func boolToAuxInt(b bool) int64 {
   688  	if b {
   689  		return 1
   690  	}
   691  	return 0
   692  }
   693  func int8ToAuxInt(i int8) int64 {
   694  	return int64(i)
   695  }
   696  func int16ToAuxInt(i int16) int64 {
   697  	return int64(i)
   698  }
   699  func int32ToAuxInt(i int32) int64 {
   700  	return int64(i)
   701  }
   702  func int64ToAuxInt(i int64) int64 {
   703  	return int64(i)
   704  }
   705  func uint8ToAuxInt(i uint8) int64 {
   706  	return int64(int8(i))
   707  }
   708  func float32ToAuxInt(f float32) int64 {
   709  	return int64(math.Float64bits(float64(f)))
   710  }
   711  func float64ToAuxInt(f float64) int64 {
   712  	return int64(math.Float64bits(f))
   713  }
   714  func valAndOffToAuxInt(v ValAndOff) int64 {
   715  	return int64(v)
   716  }
   717  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   718  	return int64(v)
   719  }
   720  func int128ToAuxInt(x int128) int64 {
   721  	if x != 0 {
   722  		panic("nonzero int128 not allowed")
   723  	}
   724  	return 0
   725  }
   726  func flagConstantToAuxInt(x flagConstant) int64 {
   727  	return int64(x)
   728  }
   729  
   730  func opToAuxInt(o Op) int64 {
   731  	return int64(o)
   732  }
   733  
   734  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   735  type Aux interface {
   736  	CanBeAnSSAAux()
   737  }
   738  
   739  // for now only used to mark moves that need to avoid clobbering flags
   740  type auxMark bool
   741  
   742  func (auxMark) CanBeAnSSAAux() {}
   743  
   744  var AuxMark auxMark
   745  
   746  // stringAux wraps string values for use in Aux.
   747  type stringAux string
   748  
   749  func (stringAux) CanBeAnSSAAux() {}
   750  
   751  func auxToString(i Aux) string {
   752  	return string(i.(stringAux))
   753  }
   754  func auxToSym(i Aux) Sym {
   755  	// TODO: kind of a hack - allows nil interface through
   756  	s, _ := i.(Sym)
   757  	return s
   758  }
   759  func auxToType(i Aux) *types.Type {
   760  	return i.(*types.Type)
   761  }
   762  func auxToCall(i Aux) *AuxCall {
   763  	return i.(*AuxCall)
   764  }
   765  func auxToS390xCCMask(i Aux) s390x.CCMask {
   766  	return i.(s390x.CCMask)
   767  }
   768  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   769  	return i.(s390x.RotateParams)
   770  }
   771  
   772  func StringToAux(s string) Aux {
   773  	return stringAux(s)
   774  }
   775  func symToAux(s Sym) Aux {
   776  	return s
   777  }
   778  func callToAux(s *AuxCall) Aux {
   779  	return s
   780  }
   781  func typeToAux(t *types.Type) Aux {
   782  	return t
   783  }
   784  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   785  	return c
   786  }
   787  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   788  	return r
   789  }
   790  
   791  // uaddOvf reports whether unsigned a+b would overflow.
   792  func uaddOvf(a, b int64) bool {
   793  	return uint64(a)+uint64(b) < uint64(a)
   794  }
   795  
   796  // loadLSymOffset simulates reading a word at an offset into a
   797  // read-only symbol's runtime memory. If it would read a pointer to
   798  // another symbol, that symbol is returned. Otherwise, it returns nil.
   799  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   800  	if lsym.Type != objabi.SRODATA {
   801  		return nil
   802  	}
   803  
   804  	for _, r := range lsym.R {
   805  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   806  			return r.Sym
   807  		}
   808  	}
   809  
   810  	return nil
   811  }
   812  
   813  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   814  	v.Op = OpStaticLECall
   815  	auxcall := v.Aux.(*AuxCall)
   816  	auxcall.Fn = sym
   817  	// Remove first arg
   818  	v.Args[0].Uses--
   819  	copy(v.Args[0:], v.Args[1:])
   820  	v.Args[len(v.Args)-1] = nil // aid GC
   821  	v.Args = v.Args[:len(v.Args)-1]
   822  	if f := v.Block.Func; f.pass.debug > 0 {
   823  		f.Warnl(v.Pos, "de-virtualizing call")
   824  	}
   825  	return v
   826  }
   827  
   828  // isSamePtr reports whether p1 and p2 point to the same address.
   829  func isSamePtr(p1, p2 *Value) bool {
   830  	if p1 == p2 {
   831  		return true
   832  	}
   833  	if p1.Op != p2.Op {
   834  		return false
   835  	}
   836  	switch p1.Op {
   837  	case OpOffPtr:
   838  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   839  	case OpAddr, OpLocalAddr:
   840  		return p1.Aux == p2.Aux
   841  	case OpAddPtr:
   842  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   843  	}
   844  	return false
   845  }
   846  
   847  func isStackPtr(v *Value) bool {
   848  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   849  		v = v.Args[0]
   850  	}
   851  	return v.Op == OpSP || v.Op == OpLocalAddr
   852  }
   853  
   854  // disjoint reports whether the memory region specified by [p1:p1+n1)
   855  // does not overlap with [p2:p2+n2).
   856  // A return value of false does not imply the regions overlap.
   857  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   858  	if n1 == 0 || n2 == 0 {
   859  		return true
   860  	}
   861  	if p1 == p2 {
   862  		return false
   863  	}
   864  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   865  		base, offset = ptr, 0
   866  		for base.Op == OpOffPtr {
   867  			offset += base.AuxInt
   868  			base = base.Args[0]
   869  		}
   870  		if opcodeTable[base.Op].nilCheck {
   871  			base = base.Args[0]
   872  		}
   873  		return base, offset
   874  	}
   875  	p1, off1 := baseAndOffset(p1)
   876  	p2, off2 := baseAndOffset(p2)
   877  	if isSamePtr(p1, p2) {
   878  		return !overlap(off1, n1, off2, n2)
   879  	}
   880  	// p1 and p2 are not the same, so if they are both OpAddrs then
   881  	// they point to different variables.
   882  	// If one pointer is on the stack and the other is an argument
   883  	// then they can't overlap.
   884  	switch p1.Op {
   885  	case OpAddr, OpLocalAddr:
   886  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   887  			return true
   888  		}
   889  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   890  	case OpArg, OpArgIntReg:
   891  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   892  			return true
   893  		}
   894  	case OpSP:
   895  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   896  	}
   897  	return false
   898  }
   899  
   900  // moveSize returns the number of bytes an aligned MOV instruction moves.
   901  func moveSize(align int64, c *Config) int64 {
   902  	switch {
   903  	case align%8 == 0 && c.PtrSize == 8:
   904  		return 8
   905  	case align%4 == 0:
   906  		return 4
   907  	case align%2 == 0:
   908  		return 2
   909  	}
   910  	return 1
   911  }
   912  
   913  // mergePoint finds a block among a's blocks which dominates b and is itself
   914  // dominated by all of a's blocks. Returns nil if it can't find one.
   915  // Might return nil even if one does exist.
   916  func mergePoint(b *Block, a ...*Value) *Block {
   917  	// Walk backward from b looking for one of the a's blocks.
   918  
   919  	// Max distance
   920  	d := 100
   921  
   922  	for d > 0 {
   923  		for _, x := range a {
   924  			if b == x.Block {
   925  				goto found
   926  			}
   927  		}
   928  		if len(b.Preds) > 1 {
   929  			// Don't know which way to go back. Abort.
   930  			return nil
   931  		}
   932  		b = b.Preds[0].b
   933  		d--
   934  	}
   935  	return nil // too far away
   936  found:
   937  	// At this point, r is the first value in a that we find by walking backwards.
   938  	// if we return anything, r will be it.
   939  	r := b
   940  
   941  	// Keep going, counting the other a's that we find. They must all dominate r.
   942  	na := 0
   943  	for d > 0 {
   944  		for _, x := range a {
   945  			if b == x.Block {
   946  				na++
   947  			}
   948  		}
   949  		if na == len(a) {
   950  			// Found all of a in a backwards walk. We can return r.
   951  			return r
   952  		}
   953  		if len(b.Preds) > 1 {
   954  			return nil
   955  		}
   956  		b = b.Preds[0].b
   957  		d--
   958  
   959  	}
   960  	return nil // too far away
   961  }
   962  
   963  // clobber invalidates values. Returns true.
   964  // clobber is used by rewrite rules to:
   965  //
   966  //	A) make sure the values are really dead and never used again.
   967  //	B) decrement use counts of the values' args.
   968  func clobber(vv ...*Value) bool {
   969  	for _, v := range vv {
   970  		v.reset(OpInvalid)
   971  		// Note: leave v.Block intact.  The Block field is used after clobber.
   972  	}
   973  	return true
   974  }
   975  
   976  // clobberIfDead resets v when use count is 1. Returns true.
   977  // clobberIfDead is used by rewrite rules to decrement
   978  // use counts of v's args when v is dead and never used.
   979  func clobberIfDead(v *Value) bool {
   980  	if v.Uses == 1 {
   981  		v.reset(OpInvalid)
   982  	}
   983  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   984  	return true
   985  }
   986  
   987  // noteRule is an easy way to track if a rule is matched when writing
   988  // new ones.  Make the rule of interest also conditional on
   989  //
   990  //	noteRule("note to self: rule of interest matched")
   991  //
   992  // and that message will print when the rule matches.
   993  func noteRule(s string) bool {
   994  	fmt.Println(s)
   995  	return true
   996  }
   997  
   998  // countRule increments Func.ruleMatches[key].
   999  // If Func.ruleMatches is non-nil at the end
  1000  // of compilation, it will be printed to stdout.
  1001  // This is intended to make it easier to find which functions
  1002  // which contain lots of rules matches when developing new rules.
  1003  func countRule(v *Value, key string) bool {
  1004  	f := v.Block.Func
  1005  	if f.ruleMatches == nil {
  1006  		f.ruleMatches = make(map[string]int)
  1007  	}
  1008  	f.ruleMatches[key]++
  1009  	return true
  1010  }
  1011  
  1012  // warnRule generates compiler debug output with string s when
  1013  // v is not in autogenerated code, cond is true and the rule has fired.
  1014  func warnRule(cond bool, v *Value, s string) bool {
  1015  	if pos := v.Pos; pos.Line() > 1 && cond {
  1016  		v.Block.Func.Warnl(pos, s)
  1017  	}
  1018  	return true
  1019  }
  1020  
  1021  // for a pseudo-op like (LessThan x), extract x.
  1022  func flagArg(v *Value) *Value {
  1023  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1024  		return nil
  1025  	}
  1026  	return v.Args[0]
  1027  }
  1028  
  1029  // arm64Negate finds the complement to an ARM64 condition code,
  1030  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1031  //
  1032  // For floating point, it's more subtle because NaN is unordered. We do
  1033  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1034  func arm64Negate(op Op) Op {
  1035  	switch op {
  1036  	case OpARM64LessThan:
  1037  		return OpARM64GreaterEqual
  1038  	case OpARM64LessThanU:
  1039  		return OpARM64GreaterEqualU
  1040  	case OpARM64GreaterThan:
  1041  		return OpARM64LessEqual
  1042  	case OpARM64GreaterThanU:
  1043  		return OpARM64LessEqualU
  1044  	case OpARM64LessEqual:
  1045  		return OpARM64GreaterThan
  1046  	case OpARM64LessEqualU:
  1047  		return OpARM64GreaterThanU
  1048  	case OpARM64GreaterEqual:
  1049  		return OpARM64LessThan
  1050  	case OpARM64GreaterEqualU:
  1051  		return OpARM64LessThanU
  1052  	case OpARM64Equal:
  1053  		return OpARM64NotEqual
  1054  	case OpARM64NotEqual:
  1055  		return OpARM64Equal
  1056  	case OpARM64LessThanF:
  1057  		return OpARM64NotLessThanF
  1058  	case OpARM64NotLessThanF:
  1059  		return OpARM64LessThanF
  1060  	case OpARM64LessEqualF:
  1061  		return OpARM64NotLessEqualF
  1062  	case OpARM64NotLessEqualF:
  1063  		return OpARM64LessEqualF
  1064  	case OpARM64GreaterThanF:
  1065  		return OpARM64NotGreaterThanF
  1066  	case OpARM64NotGreaterThanF:
  1067  		return OpARM64GreaterThanF
  1068  	case OpARM64GreaterEqualF:
  1069  		return OpARM64NotGreaterEqualF
  1070  	case OpARM64NotGreaterEqualF:
  1071  		return OpARM64GreaterEqualF
  1072  	default:
  1073  		panic("unreachable")
  1074  	}
  1075  }
  1076  
  1077  // arm64Invert evaluates (InvertFlags op), which
  1078  // is the same as altering the condition codes such
  1079  // that the same result would be produced if the arguments
  1080  // to the flag-generating instruction were reversed, e.g.
  1081  // (InvertFlags (CMP x y)) -> (CMP y x)
  1082  func arm64Invert(op Op) Op {
  1083  	switch op {
  1084  	case OpARM64LessThan:
  1085  		return OpARM64GreaterThan
  1086  	case OpARM64LessThanU:
  1087  		return OpARM64GreaterThanU
  1088  	case OpARM64GreaterThan:
  1089  		return OpARM64LessThan
  1090  	case OpARM64GreaterThanU:
  1091  		return OpARM64LessThanU
  1092  	case OpARM64LessEqual:
  1093  		return OpARM64GreaterEqual
  1094  	case OpARM64LessEqualU:
  1095  		return OpARM64GreaterEqualU
  1096  	case OpARM64GreaterEqual:
  1097  		return OpARM64LessEqual
  1098  	case OpARM64GreaterEqualU:
  1099  		return OpARM64LessEqualU
  1100  	case OpARM64Equal, OpARM64NotEqual:
  1101  		return op
  1102  	case OpARM64LessThanF:
  1103  		return OpARM64GreaterThanF
  1104  	case OpARM64GreaterThanF:
  1105  		return OpARM64LessThanF
  1106  	case OpARM64LessEqualF:
  1107  		return OpARM64GreaterEqualF
  1108  	case OpARM64GreaterEqualF:
  1109  		return OpARM64LessEqualF
  1110  	case OpARM64NotLessThanF:
  1111  		return OpARM64NotGreaterThanF
  1112  	case OpARM64NotGreaterThanF:
  1113  		return OpARM64NotLessThanF
  1114  	case OpARM64NotLessEqualF:
  1115  		return OpARM64NotGreaterEqualF
  1116  	case OpARM64NotGreaterEqualF:
  1117  		return OpARM64NotLessEqualF
  1118  	default:
  1119  		panic("unreachable")
  1120  	}
  1121  }
  1122  
  1123  // evaluate an ARM64 op against a flags value
  1124  // that is potentially constant; return 1 for true,
  1125  // -1 for false, and 0 for not constant.
  1126  func ccARM64Eval(op Op, flags *Value) int {
  1127  	fop := flags.Op
  1128  	if fop == OpARM64InvertFlags {
  1129  		return -ccARM64Eval(op, flags.Args[0])
  1130  	}
  1131  	if fop != OpARM64FlagConstant {
  1132  		return 0
  1133  	}
  1134  	fc := flagConstant(flags.AuxInt)
  1135  	b2i := func(b bool) int {
  1136  		if b {
  1137  			return 1
  1138  		}
  1139  		return -1
  1140  	}
  1141  	switch op {
  1142  	case OpARM64Equal:
  1143  		return b2i(fc.eq())
  1144  	case OpARM64NotEqual:
  1145  		return b2i(fc.ne())
  1146  	case OpARM64LessThan:
  1147  		return b2i(fc.lt())
  1148  	case OpARM64LessThanU:
  1149  		return b2i(fc.ult())
  1150  	case OpARM64GreaterThan:
  1151  		return b2i(fc.gt())
  1152  	case OpARM64GreaterThanU:
  1153  		return b2i(fc.ugt())
  1154  	case OpARM64LessEqual:
  1155  		return b2i(fc.le())
  1156  	case OpARM64LessEqualU:
  1157  		return b2i(fc.ule())
  1158  	case OpARM64GreaterEqual:
  1159  		return b2i(fc.ge())
  1160  	case OpARM64GreaterEqualU:
  1161  		return b2i(fc.uge())
  1162  	}
  1163  	return 0
  1164  }
  1165  
  1166  // logRule logs the use of the rule s. This will only be enabled if
  1167  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1168  func logRule(s string) {
  1169  	if ruleFile == nil {
  1170  		// Open a log file to write log to. We open in append
  1171  		// mode because all.bash runs the compiler lots of times,
  1172  		// and we want the concatenation of all of those logs.
  1173  		// This means, of course, that users need to rm the old log
  1174  		// to get fresh data.
  1175  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1176  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1177  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1178  		if err != nil {
  1179  			panic(err)
  1180  		}
  1181  		ruleFile = w
  1182  	}
  1183  	_, err := fmt.Fprintln(ruleFile, s)
  1184  	if err != nil {
  1185  		panic(err)
  1186  	}
  1187  }
  1188  
  1189  var ruleFile io.Writer
  1190  
  1191  func min(x, y int64) int64 {
  1192  	if x < y {
  1193  		return x
  1194  	}
  1195  	return y
  1196  }
  1197  func max(x, y int64) int64 {
  1198  	if x > y {
  1199  		return x
  1200  	}
  1201  	return y
  1202  }
  1203  
  1204  func isConstZero(v *Value) bool {
  1205  	switch v.Op {
  1206  	case OpConstNil:
  1207  		return true
  1208  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1209  		return v.AuxInt == 0
  1210  	}
  1211  	return false
  1212  }
  1213  
  1214  // reciprocalExact64 reports whether 1/c is exactly representable.
  1215  func reciprocalExact64(c float64) bool {
  1216  	b := math.Float64bits(c)
  1217  	man := b & (1<<52 - 1)
  1218  	if man != 0 {
  1219  		return false // not a power of 2, denormal, or NaN
  1220  	}
  1221  	exp := b >> 52 & (1<<11 - 1)
  1222  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1223  	// changes the exponent to 0x7fe-exp.
  1224  	switch exp {
  1225  	case 0:
  1226  		return false // ±0
  1227  	case 0x7ff:
  1228  		return false // ±inf
  1229  	case 0x7fe:
  1230  		return false // exponent is not representable
  1231  	default:
  1232  		return true
  1233  	}
  1234  }
  1235  
  1236  // reciprocalExact32 reports whether 1/c is exactly representable.
  1237  func reciprocalExact32(c float32) bool {
  1238  	b := math.Float32bits(c)
  1239  	man := b & (1<<23 - 1)
  1240  	if man != 0 {
  1241  		return false // not a power of 2, denormal, or NaN
  1242  	}
  1243  	exp := b >> 23 & (1<<8 - 1)
  1244  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1245  	// changes the exponent to 0xfe-exp.
  1246  	switch exp {
  1247  	case 0:
  1248  		return false // ±0
  1249  	case 0xff:
  1250  		return false // ±inf
  1251  	case 0xfe:
  1252  		return false // exponent is not representable
  1253  	default:
  1254  		return true
  1255  	}
  1256  }
  1257  
  1258  // check if an immediate can be directly encoded into an ARM's instruction.
  1259  func isARMImmRot(v uint32) bool {
  1260  	for i := 0; i < 16; i++ {
  1261  		if v&^0xff == 0 {
  1262  			return true
  1263  		}
  1264  		v = v<<2 | v>>30
  1265  	}
  1266  
  1267  	return false
  1268  }
  1269  
  1270  // overlap reports whether the ranges given by the given offset and
  1271  // size pairs overlap.
  1272  func overlap(offset1, size1, offset2, size2 int64) bool {
  1273  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1274  		return true
  1275  	}
  1276  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1277  		return true
  1278  	}
  1279  	return false
  1280  }
  1281  
  1282  func areAdjacentOffsets(off1, off2, size int64) bool {
  1283  	return off1+size == off2 || off1 == off2+size
  1284  }
  1285  
  1286  // check if value zeroes out upper 32-bit of 64-bit register.
  1287  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1288  // because it catches same amount of cases as 4.
  1289  func zeroUpper32Bits(x *Value, depth int) bool {
  1290  	switch x.Op {
  1291  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1292  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1293  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1294  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1295  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1296  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1297  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1298  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1299  		OpAMD64SHLL, OpAMD64SHLLconst:
  1300  		return true
  1301  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1302  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1303  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1304  		return true
  1305  	case OpArg: // note: but not ArgIntReg
  1306  		// amd64 always loads args from the stack unsigned.
  1307  		// most other architectures load them sign/zero extended based on the type.
  1308  		return x.Type.Size() == 4 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1309  	case OpPhi, OpSelect0, OpSelect1:
  1310  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1311  		// just limit recursion depth.
  1312  		if depth <= 0 {
  1313  			return false
  1314  		}
  1315  		for i := range x.Args {
  1316  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1317  				return false
  1318  			}
  1319  		}
  1320  		return true
  1321  
  1322  	}
  1323  	return false
  1324  }
  1325  
  1326  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1327  func zeroUpper48Bits(x *Value, depth int) bool {
  1328  	switch x.Op {
  1329  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1330  		return true
  1331  	case OpArg: // note: but not ArgIntReg
  1332  		return x.Type.Size() == 2 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1333  	case OpPhi, OpSelect0, OpSelect1:
  1334  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1335  		// just limit recursion depth.
  1336  		if depth <= 0 {
  1337  			return false
  1338  		}
  1339  		for i := range x.Args {
  1340  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1341  				return false
  1342  			}
  1343  		}
  1344  		return true
  1345  
  1346  	}
  1347  	return false
  1348  }
  1349  
  1350  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1351  func zeroUpper56Bits(x *Value, depth int) bool {
  1352  	switch x.Op {
  1353  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1354  		return true
  1355  	case OpArg: // note: but not ArgIntReg
  1356  		return x.Type.Size() == 1 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1357  	case OpPhi, OpSelect0, OpSelect1:
  1358  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1359  		// just limit recursion depth.
  1360  		if depth <= 0 {
  1361  			return false
  1362  		}
  1363  		for i := range x.Args {
  1364  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1365  				return false
  1366  			}
  1367  		}
  1368  		return true
  1369  
  1370  	}
  1371  	return false
  1372  }
  1373  
  1374  func isInlinableMemclr(c *Config, sz int64) bool {
  1375  	if sz < 0 {
  1376  		return false
  1377  	}
  1378  	// TODO: expand this check to allow other architectures
  1379  	// see CL 454255 and issue 56997
  1380  	switch c.arch {
  1381  	case "amd64", "arm64":
  1382  		return true
  1383  	case "ppc64le", "ppc64":
  1384  		return sz < 512
  1385  	}
  1386  	return false
  1387  }
  1388  
  1389  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1390  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1391  // safe, either because Move will do all of its loads before any of its stores, or
  1392  // because the arguments are known to be disjoint.
  1393  // This is used as a check for replacing memmove with Move ops.
  1394  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1395  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1396  	// Move ops may or may not be faster for large sizes depending on how the platform
  1397  	// lowers them, so we only perform this optimization on platforms that we know to
  1398  	// have fast Move ops.
  1399  	switch c.arch {
  1400  	case "amd64":
  1401  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1402  	case "386", "arm64":
  1403  		return sz <= 8
  1404  	case "s390x", "ppc64", "ppc64le":
  1405  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1406  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1407  		return sz <= 4
  1408  	}
  1409  	return false
  1410  }
  1411  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1412  	return isInlinableMemmove(dst, src, sz, c)
  1413  }
  1414  
  1415  // logLargeCopy logs the occurrence of a large copy.
  1416  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1417  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1418  func logLargeCopy(v *Value, s int64) bool {
  1419  	if s < 128 {
  1420  		return true
  1421  	}
  1422  	if logopt.Enabled() {
  1423  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1424  	}
  1425  	return true
  1426  }
  1427  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1428  	if s < 128 {
  1429  		return
  1430  	}
  1431  	if logopt.Enabled() {
  1432  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1433  	}
  1434  }
  1435  
  1436  // hasSmallRotate reports whether the architecture has rotate instructions
  1437  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1438  func hasSmallRotate(c *Config) bool {
  1439  	switch c.arch {
  1440  	case "amd64", "386":
  1441  		return true
  1442  	default:
  1443  		return false
  1444  	}
  1445  }
  1446  
  1447  func supportsPPC64PCRel() bool {
  1448  	// PCRel is currently supported for >= power10, linux only
  1449  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1450  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1451  }
  1452  
  1453  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1454  	if sh < 0 || sh >= sz {
  1455  		panic("PPC64 shift arg sh out of range")
  1456  	}
  1457  	if mb < 0 || mb >= sz {
  1458  		panic("PPC64 shift arg mb out of range")
  1459  	}
  1460  	if me < 0 || me >= sz {
  1461  		panic("PPC64 shift arg me out of range")
  1462  	}
  1463  	return int32(sh<<16 | mb<<8 | me)
  1464  }
  1465  
  1466  func GetPPC64Shiftsh(auxint int64) int64 {
  1467  	return int64(int8(auxint >> 16))
  1468  }
  1469  
  1470  func GetPPC64Shiftmb(auxint int64) int64 {
  1471  	return int64(int8(auxint >> 8))
  1472  }
  1473  
  1474  func GetPPC64Shiftme(auxint int64) int64 {
  1475  	return int64(int8(auxint))
  1476  }
  1477  
  1478  // Test if this value can encoded as a mask for a rlwinm like
  1479  // operation.  Masks can also extend from the msb and wrap to
  1480  // the lsb too.  That is, the valid masks are 32 bit strings
  1481  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1482  func isPPC64WordRotateMask(v64 int64) bool {
  1483  	// Isolate rightmost 1 (if none 0) and add.
  1484  	v := uint32(v64)
  1485  	vp := (v & -v) + v
  1486  	// Likewise, for the wrapping case.
  1487  	vn := ^v
  1488  	vpn := (vn & -vn) + vn
  1489  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1490  }
  1491  
  1492  // Compress mask and shift into single value of the form
  1493  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1494  // be used to regenerate the input mask.
  1495  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1496  	var mb, me, mbn, men int
  1497  
  1498  	// Determine boundaries and then decode them
  1499  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1500  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1501  	} else if nbits == 32 {
  1502  		mb = bits.LeadingZeros32(uint32(mask))
  1503  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1504  		mbn = bits.LeadingZeros32(^uint32(mask))
  1505  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1506  	} else {
  1507  		mb = bits.LeadingZeros64(uint64(mask))
  1508  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1509  		mbn = bits.LeadingZeros64(^uint64(mask))
  1510  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1511  	}
  1512  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1513  	if mb == 0 && me == int(nbits) {
  1514  		// swap the inverted values
  1515  		mb, me = men, mbn
  1516  	}
  1517  
  1518  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1519  }
  1520  
  1521  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1522  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1523  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1524  // operations can be combined. This functions assumes the two opcodes can
  1525  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1526  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1527  	mb := s
  1528  	r := 64 - s
  1529  	// A larger mb is a smaller mask.
  1530  	if (encoded>>8)&0xFF < mb {
  1531  		encoded = (encoded &^ 0xFF00) | mb<<8
  1532  	}
  1533  	// The rotate is expected to be 0.
  1534  	if (encoded & 0xFF0000) != 0 {
  1535  		panic("non-zero rotate")
  1536  	}
  1537  	return encoded | r<<16
  1538  }
  1539  
  1540  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1541  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1542  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1543  	auxint := uint64(sauxint)
  1544  	rotate = int64((auxint >> 16) & 0xFF)
  1545  	mb = int64((auxint >> 8) & 0xFF)
  1546  	me = int64((auxint >> 0) & 0xFF)
  1547  	nbits := int64((auxint >> 24) & 0xFF)
  1548  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1549  	if mb > me {
  1550  		mask = ^mask
  1551  	}
  1552  	if nbits == 32 {
  1553  		mask = uint64(uint32(mask))
  1554  	}
  1555  
  1556  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1557  	// is inclusive.
  1558  	me = (me - 1) & (nbits - 1)
  1559  	return
  1560  }
  1561  
  1562  // This verifies that the mask is a set of
  1563  // consecutive bits including the least
  1564  // significant bit.
  1565  func isPPC64ValidShiftMask(v int64) bool {
  1566  	if (v != 0) && ((v+1)&v) == 0 {
  1567  		return true
  1568  	}
  1569  	return false
  1570  }
  1571  
  1572  func getPPC64ShiftMaskLength(v int64) int64 {
  1573  	return int64(bits.Len64(uint64(v)))
  1574  }
  1575  
  1576  // Decompose a shift right into an equivalent rotate/mask,
  1577  // and return mask & m.
  1578  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1579  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1580  	return m & int64(smask)
  1581  }
  1582  
  1583  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1584  func mergePPC64AndSrwi(m, s int64) int64 {
  1585  	mask := mergePPC64RShiftMask(m, s, 32)
  1586  	if !isPPC64WordRotateMask(mask) {
  1587  		return 0
  1588  	}
  1589  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1590  }
  1591  
  1592  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1593  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1594  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1595  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1596  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1597  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1598  
  1599  	// Rewrite mask to apply after the final left shift.
  1600  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1601  
  1602  	r_1 := 32 - srw
  1603  	r_2 := GetPPC64Shiftsh(sld)
  1604  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1605  
  1606  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1607  		return 0
  1608  	}
  1609  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1610  }
  1611  
  1612  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1613  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1614  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1615  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1616  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1617  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1618  
  1619  	// Rewrite mask to apply after the final left shift.
  1620  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1621  
  1622  	r_1 := 64 - srd
  1623  	r_2 := GetPPC64Shiftsh(sld)
  1624  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1625  
  1626  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1627  		return 0
  1628  	}
  1629  	// This combine only works when selecting and shifting the lower 32 bits.
  1630  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1631  	if v1&mask_3 != 0 {
  1632  		return 0
  1633  	}
  1634  	return encodePPC64RotateMask(int64(r_3-32), int64(mask_3), 32)
  1635  }
  1636  
  1637  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1638  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1639  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1640  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1641  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1642  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1643  
  1644  	// combine the masks, and adjust for the final left shift.
  1645  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1646  	r_2 := GetPPC64Shiftsh(int64(sld))
  1647  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1648  
  1649  	// Verify the result is still a valid bitmask of <= 32 bits.
  1650  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1651  		return 0
  1652  	}
  1653  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1654  }
  1655  
  1656  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1657  // or 0 if they cannot be merged.
  1658  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1659  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1660  	mask_out := (mask_rlw & uint64(mask))
  1661  
  1662  	// Verify the result is still a valid bitmask of <= 32 bits.
  1663  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1664  		return 0
  1665  	}
  1666  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1667  }
  1668  
  1669  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1670  // or 0 if they cannot be merged.
  1671  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1672  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1673  
  1674  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1675  	r_mask := bits.RotateLeft32(mask, int(r))
  1676  
  1677  	mask_out := (mask_rlw & uint64(r_mask))
  1678  
  1679  	// Verify the result is still a valid bitmask of <= 32 bits.
  1680  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1681  		return 0
  1682  	}
  1683  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1684  }
  1685  
  1686  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1687  // or 0 if they cannot be merged.
  1688  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1689  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1690  	if mb > me || mb < sldi {
  1691  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1692  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1693  		return 0
  1694  	}
  1695  	// combine the masks, and adjust for the final left shift.
  1696  	mask_3 := mask_1 << sldi
  1697  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1698  
  1699  	// Verify the result is still a valid bitmask of <= 32 bits.
  1700  	if uint64(uint32(mask_3)) != mask_3 {
  1701  		return 0
  1702  	}
  1703  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1704  }
  1705  
  1706  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1707  // or return 0 if they cannot be combined.
  1708  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1709  	if sld > srw || srw >= 32 {
  1710  		return 0
  1711  	}
  1712  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1713  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1714  	mask := (mask_r & mask_l) << uint(sld)
  1715  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1716  }
  1717  
  1718  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1719  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1720  // of op.
  1721  //
  1722  // E.g consider the case:
  1723  // a = (ADD x y)
  1724  // b = (CMPconst [0] a)
  1725  // c = (OR a z)
  1726  //
  1727  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1728  // would produce:
  1729  // a  = (ADD x y)
  1730  // a' = (ADDCC x y)
  1731  // a” = (Select0 a')
  1732  // b  = (CMPconst [0] a”)
  1733  // c  = (OR a z)
  1734  //
  1735  // which makes it impossible to rewrite the second user. Instead the result
  1736  // of this conversion is:
  1737  // a' = (ADDCC x y)
  1738  // a  = (Select0 a')
  1739  // b  = (CMPconst [0] a)
  1740  // c  = (OR a z)
  1741  //
  1742  // Which makes it trivial to rewrite b using a lowering rule.
  1743  func convertPPC64OpToOpCC(op *Value) *Value {
  1744  	ccOpMap := map[Op]Op{
  1745  		OpPPC64ADD:      OpPPC64ADDCC,
  1746  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1747  		OpPPC64AND:      OpPPC64ANDCC,
  1748  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1749  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1750  		OpPPC64OR:       OpPPC64ORCC,
  1751  		OpPPC64SUB:      OpPPC64SUBCC,
  1752  		OpPPC64NEG:      OpPPC64NEGCC,
  1753  		OpPPC64NOR:      OpPPC64NORCC,
  1754  		OpPPC64XOR:      OpPPC64XORCC,
  1755  	}
  1756  	b := op.Block
  1757  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1758  	opCC.AddArgs(op.Args...)
  1759  	op.reset(OpSelect0)
  1760  	op.AddArgs(opCC)
  1761  	return op
  1762  }
  1763  
  1764  // Convenience function to rotate a 32 bit constant value by another constant.
  1765  func rotateLeft32(v, rotate int64) int64 {
  1766  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1767  }
  1768  
  1769  func rotateRight64(v, rotate int64) int64 {
  1770  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1771  }
  1772  
  1773  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1774  func armBFAuxInt(lsb, width int64) arm64BitField {
  1775  	if lsb < 0 || lsb > 63 {
  1776  		panic("ARM(64) bit field lsb constant out of range")
  1777  	}
  1778  	if width < 1 || lsb+width > 64 {
  1779  		panic("ARM(64) bit field width constant out of range")
  1780  	}
  1781  	return arm64BitField(width | lsb<<8)
  1782  }
  1783  
  1784  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1785  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1786  	return int64(uint64(bfc) >> 8)
  1787  }
  1788  
  1789  // returns the width part of the auxInt field of arm64 bitfield ops.
  1790  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1791  	return int64(bfc) & 0xff
  1792  }
  1793  
  1794  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1795  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1796  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1797  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1798  }
  1799  
  1800  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1801  func arm64BFWidth(mask, rshift int64) int64 {
  1802  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1803  	if shiftedMask == 0 {
  1804  		panic("ARM64 BF mask is zero")
  1805  	}
  1806  	return nto(shiftedMask)
  1807  }
  1808  
  1809  // sizeof returns the size of t in bytes.
  1810  // It will panic if t is not a *types.Type.
  1811  func sizeof(t interface{}) int64 {
  1812  	return t.(*types.Type).Size()
  1813  }
  1814  
  1815  // registerizable reports whether t is a primitive type that fits in
  1816  // a register. It assumes float64 values will always fit into registers
  1817  // even if that isn't strictly true.
  1818  func registerizable(b *Block, typ *types.Type) bool {
  1819  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1820  		return true
  1821  	}
  1822  	if typ.IsInteger() {
  1823  		return typ.Size() <= b.Func.Config.RegSize
  1824  	}
  1825  	return false
  1826  }
  1827  
  1828  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1829  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1830  	f := v.Block.Func
  1831  	if !f.Config.Race {
  1832  		return false
  1833  	}
  1834  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1835  		return false
  1836  	}
  1837  	for _, b := range f.Blocks {
  1838  		for _, v := range b.Values {
  1839  			switch v.Op {
  1840  			case OpStaticCall, OpStaticLECall:
  1841  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1842  				// Allow calls to panic*
  1843  				s := v.Aux.(*AuxCall).Fn.String()
  1844  				switch s {
  1845  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1846  					"runtime.panicdivide", "runtime.panicwrap",
  1847  					"runtime.panicshift":
  1848  					continue
  1849  				}
  1850  				// If we encountered any call, we need to keep racefunc*,
  1851  				// for accurate stacktraces.
  1852  				return false
  1853  			case OpPanicBounds, OpPanicExtend:
  1854  				// Note: these are panic generators that are ok (like the static calls above).
  1855  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1856  				// We must keep the race functions if there are any other call types.
  1857  				return false
  1858  			}
  1859  		}
  1860  	}
  1861  	if isSameCall(sym, "runtime.racefuncenter") {
  1862  		// TODO REGISTER ABI this needs to be cleaned up.
  1863  		// If we're removing racefuncenter, remove its argument as well.
  1864  		if v.Args[0].Op != OpStore {
  1865  			if v.Op == OpStaticLECall {
  1866  				// there is no store, yet.
  1867  				return true
  1868  			}
  1869  			return false
  1870  		}
  1871  		mem := v.Args[0].Args[2]
  1872  		v.Args[0].reset(OpCopy)
  1873  		v.Args[0].AddArg(mem)
  1874  	}
  1875  	return true
  1876  }
  1877  
  1878  // symIsRO reports whether sym is a read-only global.
  1879  func symIsRO(sym interface{}) bool {
  1880  	lsym := sym.(*obj.LSym)
  1881  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1882  }
  1883  
  1884  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1885  func symIsROZero(sym Sym) bool {
  1886  	lsym := sym.(*obj.LSym)
  1887  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1888  		return false
  1889  	}
  1890  	for _, b := range lsym.P {
  1891  		if b != 0 {
  1892  			return false
  1893  		}
  1894  	}
  1895  	return true
  1896  }
  1897  
  1898  // isFixed32 returns true if the int32 at offset off in symbol sym
  1899  // is known and constant.
  1900  func isFixed32(c *Config, sym Sym, off int64) bool {
  1901  	return isFixed(c, sym, off, 4)
  1902  }
  1903  
  1904  // isFixed returns true if the range [off,off+size] of the symbol sym
  1905  // is known and constant.
  1906  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1907  	lsym := sym.(*obj.LSym)
  1908  	if lsym.Extra == nil {
  1909  		return false
  1910  	}
  1911  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1912  		if off == 2*c.PtrSize && size == 4 {
  1913  			return true // type hash field
  1914  		}
  1915  	}
  1916  	return false
  1917  }
  1918  func fixed32(c *Config, sym Sym, off int64) int32 {
  1919  	lsym := sym.(*obj.LSym)
  1920  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1921  		if off == 2*c.PtrSize {
  1922  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1923  		}
  1924  	}
  1925  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1926  	return 0
  1927  }
  1928  
  1929  // isFixedSym returns true if the contents of sym at the given offset
  1930  // is known and is the constant address of another symbol.
  1931  func isFixedSym(sym Sym, off int64) bool {
  1932  	lsym := sym.(*obj.LSym)
  1933  	switch {
  1934  	case lsym.Type == objabi.SRODATA:
  1935  		// itabs, dictionaries
  1936  	default:
  1937  		return false
  1938  	}
  1939  	for _, r := range lsym.R {
  1940  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1941  			return true
  1942  		}
  1943  	}
  1944  	return false
  1945  }
  1946  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1947  	lsym := sym.(*obj.LSym)
  1948  	for _, r := range lsym.R {
  1949  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  1950  			if strings.HasPrefix(r.Sym.Name, "type:") {
  1951  				// In case we're loading a type out of a dictionary, we need to record
  1952  				// that the containing function might put that type in an interface.
  1953  				// That information is currently recorded in relocations in the dictionary,
  1954  				// but if we perform this load at compile time then the dictionary
  1955  				// might be dead.
  1956  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1957  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  1958  				// Same, but if we're using an itab we need to record that the
  1959  				// itab._type might be put in an interface.
  1960  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1961  			}
  1962  			return r.Sym
  1963  		}
  1964  	}
  1965  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  1966  	return nil
  1967  }
  1968  
  1969  // read8 reads one byte from the read-only global sym at offset off.
  1970  func read8(sym interface{}, off int64) uint8 {
  1971  	lsym := sym.(*obj.LSym)
  1972  	if off >= int64(len(lsym.P)) || off < 0 {
  1973  		// Invalid index into the global sym.
  1974  		// This can happen in dead code, so we don't want to panic.
  1975  		// Just return any value, it will eventually get ignored.
  1976  		// See issue 29215.
  1977  		return 0
  1978  	}
  1979  	return lsym.P[off]
  1980  }
  1981  
  1982  // read16 reads two bytes from the read-only global sym at offset off.
  1983  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  1984  	lsym := sym.(*obj.LSym)
  1985  	// lsym.P is written lazily.
  1986  	// Bytes requested after the end of lsym.P are 0.
  1987  	var src []byte
  1988  	if 0 <= off && off < int64(len(lsym.P)) {
  1989  		src = lsym.P[off:]
  1990  	}
  1991  	buf := make([]byte, 2)
  1992  	copy(buf, src)
  1993  	return byteorder.Uint16(buf)
  1994  }
  1995  
  1996  // read32 reads four bytes from the read-only global sym at offset off.
  1997  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  1998  	lsym := sym.(*obj.LSym)
  1999  	var src []byte
  2000  	if 0 <= off && off < int64(len(lsym.P)) {
  2001  		src = lsym.P[off:]
  2002  	}
  2003  	buf := make([]byte, 4)
  2004  	copy(buf, src)
  2005  	return byteorder.Uint32(buf)
  2006  }
  2007  
  2008  // read64 reads eight bytes from the read-only global sym at offset off.
  2009  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  2010  	lsym := sym.(*obj.LSym)
  2011  	var src []byte
  2012  	if 0 <= off && off < int64(len(lsym.P)) {
  2013  		src = lsym.P[off:]
  2014  	}
  2015  	buf := make([]byte, 8)
  2016  	copy(buf, src)
  2017  	return byteorder.Uint64(buf)
  2018  }
  2019  
  2020  // sequentialAddresses reports true if it can prove that x + n == y
  2021  func sequentialAddresses(x, y *Value, n int64) bool {
  2022  	if x == y && n == 0 {
  2023  		return true
  2024  	}
  2025  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2026  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2027  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2028  		return true
  2029  	}
  2030  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2031  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2032  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2033  		return true
  2034  	}
  2035  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2036  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2037  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2038  		return true
  2039  	}
  2040  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2041  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2042  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2043  		return true
  2044  	}
  2045  	return false
  2046  }
  2047  
  2048  // flagConstant represents the result of a compile-time comparison.
  2049  // The sense of these flags does not necessarily represent the hardware's notion
  2050  // of a flags register - these are just a compile-time construct.
  2051  // We happen to match the semantics to those of arm/arm64.
  2052  // Note that these semantics differ from x86: the carry flag has the opposite
  2053  // sense on a subtraction!
  2054  //
  2055  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2056  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2057  //	 (because it does x + ^y + C).
  2058  //
  2059  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2060  type flagConstant uint8
  2061  
  2062  // N reports whether the result of an operation is negative (high bit set).
  2063  func (fc flagConstant) N() bool {
  2064  	return fc&1 != 0
  2065  }
  2066  
  2067  // Z reports whether the result of an operation is 0.
  2068  func (fc flagConstant) Z() bool {
  2069  	return fc&2 != 0
  2070  }
  2071  
  2072  // C reports whether an unsigned add overflowed (carry), or an
  2073  // unsigned subtract did not underflow (borrow).
  2074  func (fc flagConstant) C() bool {
  2075  	return fc&4 != 0
  2076  }
  2077  
  2078  // V reports whether a signed operation overflowed or underflowed.
  2079  func (fc flagConstant) V() bool {
  2080  	return fc&8 != 0
  2081  }
  2082  
  2083  func (fc flagConstant) eq() bool {
  2084  	return fc.Z()
  2085  }
  2086  func (fc flagConstant) ne() bool {
  2087  	return !fc.Z()
  2088  }
  2089  func (fc flagConstant) lt() bool {
  2090  	return fc.N() != fc.V()
  2091  }
  2092  func (fc flagConstant) le() bool {
  2093  	return fc.Z() || fc.lt()
  2094  }
  2095  func (fc flagConstant) gt() bool {
  2096  	return !fc.Z() && fc.ge()
  2097  }
  2098  func (fc flagConstant) ge() bool {
  2099  	return fc.N() == fc.V()
  2100  }
  2101  func (fc flagConstant) ult() bool {
  2102  	return !fc.C()
  2103  }
  2104  func (fc flagConstant) ule() bool {
  2105  	return fc.Z() || fc.ult()
  2106  }
  2107  func (fc flagConstant) ugt() bool {
  2108  	return !fc.Z() && fc.uge()
  2109  }
  2110  func (fc flagConstant) uge() bool {
  2111  	return fc.C()
  2112  }
  2113  
  2114  func (fc flagConstant) ltNoov() bool {
  2115  	return fc.lt() && !fc.V()
  2116  }
  2117  func (fc flagConstant) leNoov() bool {
  2118  	return fc.le() && !fc.V()
  2119  }
  2120  func (fc flagConstant) gtNoov() bool {
  2121  	return fc.gt() && !fc.V()
  2122  }
  2123  func (fc flagConstant) geNoov() bool {
  2124  	return fc.ge() && !fc.V()
  2125  }
  2126  
  2127  func (fc flagConstant) String() string {
  2128  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2129  }
  2130  
  2131  type flagConstantBuilder struct {
  2132  	N bool
  2133  	Z bool
  2134  	C bool
  2135  	V bool
  2136  }
  2137  
  2138  func (fcs flagConstantBuilder) encode() flagConstant {
  2139  	var fc flagConstant
  2140  	if fcs.N {
  2141  		fc |= 1
  2142  	}
  2143  	if fcs.Z {
  2144  		fc |= 2
  2145  	}
  2146  	if fcs.C {
  2147  		fc |= 4
  2148  	}
  2149  	if fcs.V {
  2150  		fc |= 8
  2151  	}
  2152  	return fc
  2153  }
  2154  
  2155  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2156  //  - the results of the C flag are different
  2157  //  - the results of the V flag when y==minint are different
  2158  
  2159  // addFlags64 returns the flags that would be set from computing x+y.
  2160  func addFlags64(x, y int64) flagConstant {
  2161  	var fcb flagConstantBuilder
  2162  	fcb.Z = x+y == 0
  2163  	fcb.N = x+y < 0
  2164  	fcb.C = uint64(x+y) < uint64(x)
  2165  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2166  	return fcb.encode()
  2167  }
  2168  
  2169  // subFlags64 returns the flags that would be set from computing x-y.
  2170  func subFlags64(x, y int64) flagConstant {
  2171  	var fcb flagConstantBuilder
  2172  	fcb.Z = x-y == 0
  2173  	fcb.N = x-y < 0
  2174  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2175  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2176  	return fcb.encode()
  2177  }
  2178  
  2179  // addFlags32 returns the flags that would be set from computing x+y.
  2180  func addFlags32(x, y int32) flagConstant {
  2181  	var fcb flagConstantBuilder
  2182  	fcb.Z = x+y == 0
  2183  	fcb.N = x+y < 0
  2184  	fcb.C = uint32(x+y) < uint32(x)
  2185  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2186  	return fcb.encode()
  2187  }
  2188  
  2189  // subFlags32 returns the flags that would be set from computing x-y.
  2190  func subFlags32(x, y int32) flagConstant {
  2191  	var fcb flagConstantBuilder
  2192  	fcb.Z = x-y == 0
  2193  	fcb.N = x-y < 0
  2194  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2195  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2196  	return fcb.encode()
  2197  }
  2198  
  2199  // logicFlags64 returns flags set to the sign/zeroness of x.
  2200  // C and V are set to false.
  2201  func logicFlags64(x int64) flagConstant {
  2202  	var fcb flagConstantBuilder
  2203  	fcb.Z = x == 0
  2204  	fcb.N = x < 0
  2205  	return fcb.encode()
  2206  }
  2207  
  2208  // logicFlags32 returns flags set to the sign/zeroness of x.
  2209  // C and V are set to false.
  2210  func logicFlags32(x int32) flagConstant {
  2211  	var fcb flagConstantBuilder
  2212  	fcb.Z = x == 0
  2213  	fcb.N = x < 0
  2214  	return fcb.encode()
  2215  }
  2216  
  2217  func makeJumpTableSym(b *Block) *obj.LSym {
  2218  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2219  	// The jump table symbol is accessed only from the function symbol.
  2220  	s.Set(obj.AttrStatic, true)
  2221  	return s
  2222  }
  2223  
  2224  // canRotate reports whether the architecture supports
  2225  // rotates of integer registers with the given number of bits.
  2226  func canRotate(c *Config, bits int64) bool {
  2227  	if bits > c.PtrSize*8 {
  2228  		// Don't rewrite to rotates bigger than the machine word.
  2229  		return false
  2230  	}
  2231  	switch c.arch {
  2232  	case "386", "amd64", "arm64", "riscv64":
  2233  		return true
  2234  	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
  2235  		return bits >= 32
  2236  	default:
  2237  		return false
  2238  	}
  2239  }
  2240  
  2241  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2242  func isARM64bitcon(x uint64) bool {
  2243  	if x == 1<<64-1 || x == 0 {
  2244  		return false
  2245  	}
  2246  	// determine the period and sign-extend a unit to 64 bits
  2247  	switch {
  2248  	case x != x>>32|x<<32:
  2249  		// period is 64
  2250  		// nothing to do
  2251  	case x != x>>16|x<<48:
  2252  		// period is 32
  2253  		x = uint64(int64(int32(x)))
  2254  	case x != x>>8|x<<56:
  2255  		// period is 16
  2256  		x = uint64(int64(int16(x)))
  2257  	case x != x>>4|x<<60:
  2258  		// period is 8
  2259  		x = uint64(int64(int8(x)))
  2260  	default:
  2261  		// period is 4 or 2, always true
  2262  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2263  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2264  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2265  		// 0101, 1010             -- 01   rotate, repeat
  2266  		return true
  2267  	}
  2268  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2269  }
  2270  
  2271  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2272  func sequenceOfOnes(x uint64) bool {
  2273  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2274  	y += x
  2275  	return (y-1)&y == 0
  2276  }
  2277  
  2278  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2279  func isARM64addcon(v int64) bool {
  2280  	/* uimm12 or uimm24? */
  2281  	if v < 0 {
  2282  		return false
  2283  	}
  2284  	if (v & 0xFFF) == 0 {
  2285  		v >>= 12
  2286  	}
  2287  	return v <= 0xFFF
  2288  }
  2289  
  2290  // setPos sets the position of v to pos, then returns true.
  2291  // Useful for setting the result of a rewrite's position to
  2292  // something other than the default.
  2293  func setPos(v *Value, pos src.XPos) bool {
  2294  	v.Pos = pos
  2295  	return true
  2296  }
  2297
View as plain text