Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		if debug > 1 {
    61  			fmt.Printf("%s: iter %d\n", f.pass.name, iters)
    62  		}
    63  		change := false
    64  		deadChange := false
    65  		for _, b := range f.Blocks {
    66  			var b0 *Block
    67  			if debug > 1 {
    68  				fmt.Printf("%s: start block\n", f.pass.name)
    69  				b0 = new(Block)
    70  				*b0 = *b
    71  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    72  			}
    73  			for i, c := range b.ControlValues() {
    74  				for c.Op == OpCopy {
    75  					c = c.Args[0]
    76  					b.ReplaceControl(i, c)
    77  				}
    78  			}
    79  			if rb(b) {
    80  				change = true
    81  				if debug > 1 {
    82  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    83  				}
    84  			}
    85  			for j, v := range b.Values {
    86  				if debug > 1 {
    87  					fmt.Printf("%s: consider %v\n", f.pass.name, v.LongString())
    88  				}
    89  				var v0 *Value
    90  				if debug > 1 {
    91  					v0 = new(Value)
    92  					*v0 = *v
    93  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    94  				}
    95  				if v.Uses == 0 && v.removeable() {
    96  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    97  						// Reset any values that are now unused, so that we decrement
    98  						// the use count of all of its arguments.
    99  						// Not quite a deadcode pass, because it does not handle cycles.
   100  						// But it should help Uses==1 rules to fire.
   101  						v.reset(OpInvalid)
   102  						deadChange = true
   103  					}
   104  					// No point rewriting values which aren't used.
   105  					continue
   106  				}
   107  
   108  				vchange := phielimValue(v)
   109  				if vchange && debug > 1 {
   110  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   111  				}
   112  
   113  				// Eliminate copy inputs.
   114  				// If any copy input becomes unused, mark it
   115  				// as invalid and discard its argument. Repeat
   116  				// recursively on the discarded argument.
   117  				// This phase helps remove phantom "dead copy" uses
   118  				// of a value so that a x.Uses==1 rule condition
   119  				// fires reliably.
   120  				for i, a := range v.Args {
   121  					if a.Op != OpCopy {
   122  						continue
   123  					}
   124  					aa := copySource(a)
   125  					v.SetArg(i, aa)
   126  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   127  					// to hold it.  The first candidate is the value that will replace a (aa),
   128  					// if it shares the same block and line and is eligible.
   129  					// The second option is v, which has a as an input.  Because aa is earlier in
   130  					// the data flow, it is the better choice.
   131  					if a.Pos.IsStmt() == src.PosIsStmt {
   132  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   133  							aa.Pos = aa.Pos.WithIsStmt()
   134  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   135  							v.Pos = v.Pos.WithIsStmt()
   136  						} else {
   137  							// Record the lost line and look for a new home after all rewrites are complete.
   138  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   139  							// line to appear in more than one block, but only one block is stored, so if both end
   140  							// up here, then one will be lost.
   141  							pendingLines.set(a.Pos, int32(a.Block.ID))
   142  						}
   143  						a.Pos = a.Pos.WithNotStmt()
   144  					}
   145  					vchange = true
   146  					for a.Uses == 0 {
   147  						b := a.Args[0]
   148  						a.reset(OpInvalid)
   149  						a = b
   150  					}
   151  				}
   152  				if vchange && debug > 1 {
   153  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   154  				}
   155  
   156  				// apply rewrite function
   157  				if rv(v) {
   158  					vchange = true
   159  					// If value changed to a poor choice for a statement boundary, move the boundary
   160  					if v.Pos.IsStmt() == src.PosIsStmt {
   161  						if k := nextGoodStatementIndex(v, j, b); k != j {
   162  							v.Pos = v.Pos.WithNotStmt()
   163  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   164  						}
   165  					}
   166  				}
   167  
   168  				change = change || vchange
   169  				if vchange && debug > 1 {
   170  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   171  				}
   172  			}
   173  		}
   174  		if !change && !deadChange {
   175  			break
   176  		}
   177  		iters++
   178  		if (iters > itersLimit || debug >= 2) && change {
   179  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   180  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   181  			// and the maximum value encountered during make.bash is 12.
   182  			// Start checking for cycles. (This is too expensive to do routinely.)
   183  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   184  			if states == nil {
   185  				states = make(map[string]bool)
   186  			}
   187  			h := f.rewriteHash()
   188  			if _, ok := states[h]; ok {
   189  				// We've found a cycle.
   190  				// To diagnose it, set debug to 2 and start again,
   191  				// so that we'll print all rules applied until we complete another cycle.
   192  				// If debug is already >= 2, we've already done that, so it's time to crash.
   193  				if debug < 2 {
   194  					debug = 2
   195  					states = make(map[string]bool)
   196  				} else {
   197  					f.Fatalf("rewrite cycle detected")
   198  				}
   199  			}
   200  			states[h] = true
   201  		}
   202  	}
   203  	// remove clobbered values
   204  	for _, b := range f.Blocks {
   205  		j := 0
   206  		for i, v := range b.Values {
   207  			vl := v.Pos
   208  			if v.Op == OpInvalid {
   209  				if v.Pos.IsStmt() == src.PosIsStmt {
   210  					pendingLines.set(vl, int32(b.ID))
   211  				}
   212  				f.freeValue(v)
   213  				continue
   214  			}
   215  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   216  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   217  					pendingLines.remove(vl)
   218  					v.Pos = v.Pos.WithIsStmt()
   219  				}
   220  			}
   221  			if i != j {
   222  				b.Values[j] = v
   223  			}
   224  			j++
   225  		}
   226  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   227  			b.Pos = b.Pos.WithIsStmt()
   228  			pendingLines.remove(b.Pos)
   229  		}
   230  		b.truncateValues(j)
   231  	}
   232  }
   233  
   234  // Common functions called from rewriting rules
   235  
   236  func is64BitFloat(t *types.Type) bool {
   237  	return t.Size() == 8 && t.IsFloat()
   238  }
   239  
   240  func is32BitFloat(t *types.Type) bool {
   241  	return t.Size() == 4 && t.IsFloat()
   242  }
   243  
   244  func is64BitInt(t *types.Type) bool {
   245  	return t.Size() == 8 && t.IsInteger()
   246  }
   247  
   248  func is32BitInt(t *types.Type) bool {
   249  	return t.Size() == 4 && t.IsInteger()
   250  }
   251  
   252  func is16BitInt(t *types.Type) bool {
   253  	return t.Size() == 2 && t.IsInteger()
   254  }
   255  
   256  func is8BitInt(t *types.Type) bool {
   257  	return t.Size() == 1 && t.IsInteger()
   258  }
   259  
   260  func isPtr(t *types.Type) bool {
   261  	return t.IsPtrShaped()
   262  }
   263  
   264  func copyCompatibleType(t1, t2 *types.Type) bool {
   265  	if t1.Size() != t2.Size() {
   266  		return false
   267  	}
   268  	if t1.IsInteger() {
   269  		return t2.IsInteger()
   270  	}
   271  	if isPtr(t1) {
   272  		return isPtr(t2)
   273  	}
   274  	return t1.Compare(t2) == types.CMPeq
   275  }
   276  
   277  // mergeSym merges two symbolic offsets. There is no real merging of
   278  // offsets, we just pick the non-nil one.
   279  func mergeSym(x, y Sym) Sym {
   280  	if x == nil {
   281  		return y
   282  	}
   283  	if y == nil {
   284  		return x
   285  	}
   286  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   287  }
   288  
   289  func canMergeSym(x, y Sym) bool {
   290  	return x == nil || y == nil
   291  }
   292  
   293  // canMergeLoadClobber reports whether the load can be merged into target without
   294  // invalidating the schedule.
   295  // It also checks that the other non-load argument x is something we
   296  // are ok with clobbering.
   297  func canMergeLoadClobber(target, load, x *Value) bool {
   298  	// The register containing x is going to get clobbered.
   299  	// Don't merge if we still need the value of x.
   300  	// We don't have liveness information here, but we can
   301  	// approximate x dying with:
   302  	//  1) target is x's only use.
   303  	//  2) target is not in a deeper loop than x.
   304  	switch {
   305  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   306  		// This is a simple detector to determine that x is probably
   307  		// not live after target. (It does not need to be perfect,
   308  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   309  		// We have:
   310  		//   x = Phi(?, target)
   311  		//   target = Op(load, x)
   312  		// Because target has only one use as a Phi argument, we can schedule it
   313  		// very late. Hopefully, later than the other use of x. (The other use died
   314  		// between x and target, or exists on another branch entirely).
   315  	case x.Uses > 1:
   316  		return false
   317  	}
   318  	loopnest := x.Block.Func.loopnest()
   319  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   320  		return false
   321  	}
   322  	return canMergeLoad(target, load)
   323  }
   324  
   325  // canMergeLoad reports whether the load can be merged into target without
   326  // invalidating the schedule.
   327  func canMergeLoad(target, load *Value) bool {
   328  	if target.Block.ID != load.Block.ID {
   329  		// If the load is in a different block do not merge it.
   330  		return false
   331  	}
   332  
   333  	// We can't merge the load into the target if the load
   334  	// has more than one use.
   335  	if load.Uses != 1 {
   336  		return false
   337  	}
   338  
   339  	mem := load.MemoryArg()
   340  
   341  	// We need the load's memory arg to still be alive at target. That
   342  	// can't be the case if one of target's args depends on a memory
   343  	// state that is a successor of load's memory arg.
   344  	//
   345  	// For example, it would be invalid to merge load into target in
   346  	// the following situation because newmem has killed oldmem
   347  	// before target is reached:
   348  	//     load = read ... oldmem
   349  	//   newmem = write ... oldmem
   350  	//     arg0 = read ... newmem
   351  	//   target = add arg0 load
   352  	//
   353  	// If the argument comes from a different block then we can exclude
   354  	// it immediately because it must dominate load (which is in the
   355  	// same block as target).
   356  	var args []*Value
   357  	for _, a := range target.Args {
   358  		if a != load && a.Block.ID == target.Block.ID {
   359  			args = append(args, a)
   360  		}
   361  	}
   362  
   363  	// memPreds contains memory states known to be predecessors of load's
   364  	// memory state. It is lazily initialized.
   365  	var memPreds map[*Value]bool
   366  	for i := 0; len(args) > 0; i++ {
   367  		const limit = 100
   368  		if i >= limit {
   369  			// Give up if we have done a lot of iterations.
   370  			return false
   371  		}
   372  		v := args[len(args)-1]
   373  		args = args[:len(args)-1]
   374  		if target.Block.ID != v.Block.ID {
   375  			// Since target and load are in the same block
   376  			// we can stop searching when we leave the block.
   377  			continue
   378  		}
   379  		if v.Op == OpPhi {
   380  			// A Phi implies we have reached the top of the block.
   381  			// The memory phi, if it exists, is always
   382  			// the first logical store in the block.
   383  			continue
   384  		}
   385  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   386  			// We could handle this situation however it is likely
   387  			// to be very rare.
   388  			return false
   389  		}
   390  		if v.Op.SymEffect()&SymAddr != 0 {
   391  			// This case prevents an operation that calculates the
   392  			// address of a local variable from being forced to schedule
   393  			// before its corresponding VarDef.
   394  			// See issue 28445.
   395  			//   v1 = LOAD ...
   396  			//   v2 = VARDEF
   397  			//   v3 = LEAQ
   398  			//   v4 = CMPQ v1 v3
   399  			// We don't want to combine the CMPQ with the load, because
   400  			// that would force the CMPQ to schedule before the VARDEF, which
   401  			// in turn requires the LEAQ to schedule before the VARDEF.
   402  			return false
   403  		}
   404  		if v.Type.IsMemory() {
   405  			if memPreds == nil {
   406  				// Initialise a map containing memory states
   407  				// known to be predecessors of load's memory
   408  				// state.
   409  				memPreds = make(map[*Value]bool)
   410  				m := mem
   411  				const limit = 50
   412  				for i := 0; i < limit; i++ {
   413  					if m.Op == OpPhi {
   414  						// The memory phi, if it exists, is always
   415  						// the first logical store in the block.
   416  						break
   417  					}
   418  					if m.Block.ID != target.Block.ID {
   419  						break
   420  					}
   421  					if !m.Type.IsMemory() {
   422  						break
   423  					}
   424  					memPreds[m] = true
   425  					if len(m.Args) == 0 {
   426  						break
   427  					}
   428  					m = m.MemoryArg()
   429  				}
   430  			}
   431  
   432  			// We can merge if v is a predecessor of mem.
   433  			//
   434  			// For example, we can merge load into target in the
   435  			// following scenario:
   436  			//      x = read ... v
   437  			//    mem = write ... v
   438  			//   load = read ... mem
   439  			// target = add x load
   440  			if memPreds[v] {
   441  				continue
   442  			}
   443  			return false
   444  		}
   445  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   446  			// If v takes mem as an input then we know mem
   447  			// is valid at this point.
   448  			continue
   449  		}
   450  		for _, a := range v.Args {
   451  			if target.Block.ID == a.Block.ID {
   452  				args = append(args, a)
   453  			}
   454  		}
   455  	}
   456  
   457  	return true
   458  }
   459  
   460  // isSameCall reports whether aux is the same as the given named symbol.
   461  func isSameCall(aux Aux, name string) bool {
   462  	fn := aux.(*AuxCall).Fn
   463  	return fn != nil && fn.String() == name
   464  }
   465  
   466  func isMalloc(aux Aux) bool {
   467  	return isNewObject(aux) || isSpecializedMalloc(aux)
   468  }
   469  
   470  func isNewObject(aux Aux) bool {
   471  	fn := aux.(*AuxCall).Fn
   472  	return fn != nil && fn.String() == "runtime.newobject"
   473  }
   474  
   475  func isSpecializedMalloc(aux Aux) bool {
   476  	fn := aux.(*AuxCall).Fn
   477  	if fn == nil {
   478  		return false
   479  	}
   480  	name := fn.String()
   481  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   482  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   483  		strings.HasPrefix(name, "runtime.mallocTiny")
   484  }
   485  
   486  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   487  func canLoadUnaligned(c *Config) bool {
   488  	return c.ctxt.Arch.Alignment == 1
   489  }
   490  
   491  // nlzX returns the number of leading zeros.
   492  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   493  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   494  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   495  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   496  
   497  // ntzX returns the number of trailing zeros.
   498  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   499  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   500  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   501  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   502  
   503  // oneBit reports whether x contains exactly one set bit.
   504  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   505  	return x&(x-1) == 0 && x != 0
   506  }
   507  
   508  // nto returns the number of trailing ones.
   509  func nto(x int64) int64 {
   510  	return int64(ntz64(^x))
   511  }
   512  
   513  // logX returns logarithm of n base 2.
   514  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   515  func log8(n int8) int64   { return log8u(uint8(n)) }
   516  func log16(n int16) int64 { return log16u(uint16(n)) }
   517  func log32(n int32) int64 { return log32u(uint32(n)) }
   518  func log64(n int64) int64 { return log64u(uint64(n)) }
   519  
   520  // logXu returns the logarithm of n base 2.
   521  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   522  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   523  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   524  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   525  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   526  
   527  // isPowerOfTwoX functions report whether n is a power of 2.
   528  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   529  	return n > 0 && n&(n-1) == 0
   530  }
   531  
   532  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   533  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   534  	return n != 0 && n&(n-1) == 0
   535  }
   536  
   537  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   538  func is32Bit(n int64) bool {
   539  	return n == int64(int32(n))
   540  }
   541  
   542  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   543  func is16Bit(n int64) bool {
   544  	return n == int64(int16(n))
   545  }
   546  
   547  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   548  func is8Bit(n int64) bool {
   549  	return n == int64(int8(n))
   550  }
   551  
   552  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   553  func isU8Bit(n int64) bool {
   554  	return n == int64(uint8(n))
   555  }
   556  
   557  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   558  func is12Bit(n int64) bool {
   559  	return -(1<<11) <= n && n < (1<<11)
   560  }
   561  
   562  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   563  func isU12Bit(n int64) bool {
   564  	return 0 <= n && n < (1<<12)
   565  }
   566  
   567  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   568  func isU16Bit(n int64) bool {
   569  	return n == int64(uint16(n))
   570  }
   571  
   572  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   573  func isU32Bit(n int64) bool {
   574  	return n == int64(uint32(n))
   575  }
   576  
   577  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   578  func is20Bit(n int64) bool {
   579  	return -(1<<19) <= n && n < (1<<19)
   580  }
   581  
   582  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   583  func b2i(b bool) int64 {
   584  	if b {
   585  		return 1
   586  	}
   587  	return 0
   588  }
   589  
   590  // b2i32 translates a boolean value to 0 or 1.
   591  func b2i32(b bool) int32 {
   592  	if b {
   593  		return 1
   594  	}
   595  	return 0
   596  }
   597  
   598  func canMulStrengthReduce(config *Config, x int64) bool {
   599  	_, ok := config.mulRecipes[x]
   600  	return ok
   601  }
   602  func canMulStrengthReduce32(config *Config, x int32) bool {
   603  	_, ok := config.mulRecipes[int64(x)]
   604  	return ok
   605  }
   606  
   607  // mulStrengthReduce returns v*x evaluated at the location
   608  // (block and source position) of m.
   609  // canMulStrengthReduce must have returned true.
   610  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   611  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   612  }
   613  
   614  // mulStrengthReduce32 returns v*x evaluated at the location
   615  // (block and source position) of m.
   616  // canMulStrengthReduce32 must have returned true.
   617  // The upper 32 bits of m might be set to junk.
   618  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   619  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   620  }
   621  
   622  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   623  // A shift is bounded if it is shifting by less than the width of the shifted value.
   624  func shiftIsBounded(v *Value) bool {
   625  	return v.AuxInt != 0
   626  }
   627  
   628  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   629  // generated code as much as possible.
   630  func canonLessThan(x, y *Value) bool {
   631  	if x.Op != y.Op {
   632  		return x.Op < y.Op
   633  	}
   634  	if !x.Pos.SameFileAndLine(y.Pos) {
   635  		return x.Pos.Before(y.Pos)
   636  	}
   637  	return x.ID < y.ID
   638  }
   639  
   640  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   641  // of the mantissa. It will panic if the truncation results in lost information.
   642  func truncate64Fto32F(f float64) float32 {
   643  	if !isExactFloat32(f) {
   644  		panic("truncate64Fto32F: truncation is not exact")
   645  	}
   646  	if !math.IsNaN(f) {
   647  		return float32(f)
   648  	}
   649  	// NaN bit patterns aren't necessarily preserved across conversion
   650  	// instructions so we need to do the conversion manually.
   651  	b := math.Float64bits(f)
   652  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   653  	//          | sign                  | exponent   | mantissa       |
   654  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   655  	return math.Float32frombits(r)
   656  }
   657  
   658  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   659  func DivisionNeedsFixUp(v *Value) bool {
   660  	return v.AuxInt == 0
   661  }
   662  
   663  // auxTo32F decodes a float32 from the AuxInt value provided.
   664  func auxTo32F(i int64) float32 {
   665  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   666  }
   667  
   668  func auxIntToBool(i int64) bool {
   669  	if i == 0 {
   670  		return false
   671  	}
   672  	return true
   673  }
   674  func auxIntToInt8(i int64) int8 {
   675  	return int8(i)
   676  }
   677  func auxIntToInt16(i int64) int16 {
   678  	return int16(i)
   679  }
   680  func auxIntToInt32(i int64) int32 {
   681  	return int32(i)
   682  }
   683  func auxIntToInt64(i int64) int64 {
   684  	return i
   685  }
   686  func auxIntToUint8(i int64) uint8 {
   687  	return uint8(i)
   688  }
   689  func auxIntToFloat32(i int64) float32 {
   690  	return float32(math.Float64frombits(uint64(i)))
   691  }
   692  func auxIntToFloat64(i int64) float64 {
   693  	return math.Float64frombits(uint64(i))
   694  }
   695  func auxIntToValAndOff(i int64) ValAndOff {
   696  	return ValAndOff(i)
   697  }
   698  func auxIntToArm64BitField(i int64) arm64BitField {
   699  	return arm64BitField(i)
   700  }
   701  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   702  	var params arm64ConditionalParams
   703  	params.cond = Op(i & 0xffff)
   704  	i >>= 16
   705  	params.nzcv = uint8(i & 0x0f)
   706  	i >>= 4
   707  	params.constValue = uint8(i & 0x1f)
   708  	i >>= 5
   709  	params.ind = i == 1
   710  	return params
   711  }
   712  func auxIntToFlagConstant(x int64) flagConstant {
   713  	return flagConstant(x)
   714  }
   715  
   716  func auxIntToOp(cc int64) Op {
   717  	return Op(cc)
   718  }
   719  
   720  func boolToAuxInt(b bool) int64 {
   721  	if b {
   722  		return 1
   723  	}
   724  	return 0
   725  }
   726  func int8ToAuxInt(i int8) int64 {
   727  	return int64(i)
   728  }
   729  func int16ToAuxInt(i int16) int64 {
   730  	return int64(i)
   731  }
   732  func int32ToAuxInt(i int32) int64 {
   733  	return int64(i)
   734  }
   735  func int64ToAuxInt(i int64) int64 {
   736  	return i
   737  }
   738  func uint8ToAuxInt(i uint8) int64 {
   739  	return int64(int8(i))
   740  }
   741  func float32ToAuxInt(f float32) int64 {
   742  	return int64(math.Float64bits(float64(f)))
   743  }
   744  func float64ToAuxInt(f float64) int64 {
   745  	return int64(math.Float64bits(f))
   746  }
   747  func valAndOffToAuxInt(v ValAndOff) int64 {
   748  	return int64(v)
   749  }
   750  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   751  	return int64(v)
   752  }
   753  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   754  	if v.cond&^0xffff != 0 {
   755  		panic("condition value exceeds 16 bits")
   756  	}
   757  
   758  	var i int64
   759  	if v.ind {
   760  		i = 1 << 25
   761  	}
   762  	i |= int64(v.constValue) << 20
   763  	i |= int64(v.nzcv) << 16
   764  	i |= int64(v.cond)
   765  	return i
   766  }
   767  
   768  func float64ExactBits(f float64, c float64) bool {
   769  	return math.Float64bits(f) == math.Float64bits(c)
   770  }
   771  
   772  func flagConstantToAuxInt(x flagConstant) int64 {
   773  	return int64(x)
   774  }
   775  
   776  func opToAuxInt(o Op) int64 {
   777  	return int64(o)
   778  }
   779  
   780  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   781  type Aux interface {
   782  	CanBeAnSSAAux()
   783  }
   784  
   785  // for now only used to mark moves that need to avoid clobbering flags
   786  type auxMark bool
   787  
   788  func (auxMark) CanBeAnSSAAux() {}
   789  
   790  var AuxMark auxMark
   791  
   792  // stringAux wraps string values for use in Aux.
   793  type stringAux string
   794  
   795  func (stringAux) CanBeAnSSAAux() {}
   796  
   797  func auxToString(i Aux) string {
   798  	return string(i.(stringAux))
   799  }
   800  func auxToSym(i Aux) Sym {
   801  	// TODO: kind of a hack - allows nil interface through
   802  	s, _ := i.(Sym)
   803  	return s
   804  }
   805  func auxToType(i Aux) *types.Type {
   806  	return i.(*types.Type)
   807  }
   808  func auxToCall(i Aux) *AuxCall {
   809  	return i.(*AuxCall)
   810  }
   811  func auxToS390xCCMask(i Aux) s390x.CCMask {
   812  	return i.(s390x.CCMask)
   813  }
   814  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   815  	return i.(s390x.RotateParams)
   816  }
   817  
   818  func StringToAux(s string) Aux {
   819  	return stringAux(s)
   820  }
   821  func symToAux(s Sym) Aux {
   822  	return s
   823  }
   824  func callToAux(s *AuxCall) Aux {
   825  	return s
   826  }
   827  func typeToAux(t *types.Type) Aux {
   828  	return t
   829  }
   830  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   831  	return c
   832  }
   833  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   834  	return r
   835  }
   836  
   837  // uaddOvf reports whether unsigned a+b would overflow.
   838  func uaddOvf(a, b int64) bool {
   839  	return uint64(a)+uint64(b) < uint64(a)
   840  }
   841  
   842  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   843  	v.Op = OpStaticLECall
   844  	auxcall := v.Aux.(*AuxCall)
   845  	auxcall.Fn = sym
   846  	// Remove first arg
   847  	v.Args[0].Uses--
   848  	copy(v.Args[0:], v.Args[1:])
   849  	v.Args[len(v.Args)-1] = nil // aid GC
   850  	v.Args = v.Args[:len(v.Args)-1]
   851  	if f := v.Block.Func; f.pass.debug > 0 {
   852  		f.Warnl(v.Pos, "de-virtualizing call")
   853  	}
   854  	return v
   855  }
   856  
   857  // isSamePtr reports whether p1 and p2 point to the same address.
   858  func isSamePtr(p1, p2 *Value) bool {
   859  	if p1 == p2 {
   860  		return true
   861  	}
   862  	if p1.Op != p2.Op {
   863  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   864  			p1 = p1.Args[0]
   865  		}
   866  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   867  			p2 = p2.Args[0]
   868  		}
   869  		if p1 == p2 {
   870  			return true
   871  		}
   872  		if p1.Op != p2.Op {
   873  			return false
   874  		}
   875  	}
   876  	switch p1.Op {
   877  	case OpOffPtr:
   878  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   879  	case OpAddr, OpLocalAddr:
   880  		return p1.Aux == p2.Aux
   881  	case OpAddPtr:
   882  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   883  	}
   884  	return false
   885  }
   886  
   887  func isStackPtr(v *Value) bool {
   888  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   889  		v = v.Args[0]
   890  	}
   891  	return v.Op == OpSP || v.Op == OpLocalAddr
   892  }
   893  
   894  // disjoint reports whether the memory region specified by [p1:p1+n1)
   895  // does not overlap with [p2:p2+n2).
   896  // A return value of false does not imply the regions overlap.
   897  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   898  	if n1 == 0 || n2 == 0 {
   899  		return true
   900  	}
   901  	if p1 == p2 {
   902  		return false
   903  	}
   904  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   905  		base, offset = ptr, 0
   906  		for base.Op == OpOffPtr {
   907  			offset += base.AuxInt
   908  			base = base.Args[0]
   909  		}
   910  		if opcodeTable[base.Op].nilCheck {
   911  			base = base.Args[0]
   912  		}
   913  		return base, offset
   914  	}
   915  
   916  	// Run types-based analysis
   917  	if disjointTypes(p1.Type, p2.Type) {
   918  		return true
   919  	}
   920  
   921  	p1, off1 := baseAndOffset(p1)
   922  	p2, off2 := baseAndOffset(p2)
   923  	if isSamePtr(p1, p2) {
   924  		return !overlap(off1, n1, off2, n2)
   925  	}
   926  	// p1 and p2 are not the same, so if they are both OpAddrs then
   927  	// they point to different variables.
   928  	// If one pointer is on the stack and the other is an argument
   929  	// then they can't overlap.
   930  	switch p1.Op {
   931  	case OpAddr, OpLocalAddr:
   932  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   933  			return true
   934  		}
   935  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   936  	case OpArg, OpArgIntReg:
   937  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   938  			return true
   939  		}
   940  	case OpSP:
   941  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   942  	}
   943  	return false
   944  }
   945  
   946  // disjointTypes reports whether a memory region pointed to by a pointer of type
   947  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   948  // based on type aliasing rules.
   949  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   950  	// Unsafe pointer can alias with anything.
   951  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   952  		return false
   953  	}
   954  
   955  	if !t1.IsPtr() || !t2.IsPtr() {
   956  		panic("disjointTypes: one of arguments is not a pointer")
   957  	}
   958  
   959  	t1 = t1.Elem()
   960  	t2 = t2.Elem()
   961  
   962  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   963  	// type.HasPointers check doesn't work for them correctly.
   964  	if t1.NotInHeap() || t2.NotInHeap() {
   965  		return false
   966  	}
   967  
   968  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   969  
   970  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   971  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   972  		(isPtrShaped(t2) && !t1.HasPointers()) {
   973  		return true
   974  	}
   975  
   976  	return false
   977  }
   978  
   979  // moveSize returns the number of bytes an aligned MOV instruction moves.
   980  func moveSize(align int64, c *Config) int64 {
   981  	switch {
   982  	case align%8 == 0 && c.PtrSize == 8:
   983  		return 8
   984  	case align%4 == 0:
   985  		return 4
   986  	case align%2 == 0:
   987  		return 2
   988  	}
   989  	return 1
   990  }
   991  
   992  // mergePoint finds a block among a's blocks which dominates b and is itself
   993  // dominated by all of a's blocks. Returns nil if it can't find one.
   994  // Might return nil even if one does exist.
   995  func mergePoint(b *Block, a ...*Value) *Block {
   996  	// Walk backward from b looking for one of the a's blocks.
   997  
   998  	// Max distance
   999  	d := 100
  1000  
  1001  	for d > 0 {
  1002  		for _, x := range a {
  1003  			if b == x.Block {
  1004  				goto found
  1005  			}
  1006  		}
  1007  		if len(b.Preds) > 1 {
  1008  			// Don't know which way to go back. Abort.
  1009  			return nil
  1010  		}
  1011  		b = b.Preds[0].b
  1012  		d--
  1013  	}
  1014  	return nil // too far away
  1015  found:
  1016  	// At this point, r is the first value in a that we find by walking backwards.
  1017  	// if we return anything, r will be it.
  1018  	r := b
  1019  
  1020  	// Keep going, counting the other a's that we find. They must all dominate r.
  1021  	na := 0
  1022  	for d > 0 {
  1023  		for _, x := range a {
  1024  			if b == x.Block {
  1025  				na++
  1026  			}
  1027  		}
  1028  		if na == len(a) {
  1029  			// Found all of a in a backwards walk. We can return r.
  1030  			return r
  1031  		}
  1032  		if len(b.Preds) > 1 {
  1033  			return nil
  1034  		}
  1035  		b = b.Preds[0].b
  1036  		d--
  1037  
  1038  	}
  1039  	return nil // too far away
  1040  }
  1041  
  1042  // clobber invalidates values. Returns true.
  1043  // clobber is used by rewrite rules to:
  1044  //
  1045  //	A) make sure the values are really dead and never used again.
  1046  //	B) decrement use counts of the values' args.
  1047  func clobber(vv ...*Value) bool {
  1048  	for _, v := range vv {
  1049  		v.reset(OpInvalid)
  1050  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1051  	}
  1052  	return true
  1053  }
  1054  
  1055  // resetCopy resets v to be a copy of arg.
  1056  // Always returns true.
  1057  func resetCopy(v *Value, arg *Value) bool {
  1058  	v.reset(OpCopy)
  1059  	v.AddArg(arg)
  1060  	return true
  1061  }
  1062  
  1063  // clobberIfDead resets v when use count is 1. Returns true.
  1064  // clobberIfDead is used by rewrite rules to decrement
  1065  // use counts of v's args when v is dead and never used.
  1066  func clobberIfDead(v *Value) bool {
  1067  	if v.Uses == 1 {
  1068  		v.reset(OpInvalid)
  1069  	}
  1070  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1071  	return true
  1072  }
  1073  
  1074  // noteRule is an easy way to track if a rule is matched when writing
  1075  // new ones.  Make the rule of interest also conditional on
  1076  //
  1077  //	noteRule("note to self: rule of interest matched")
  1078  //
  1079  // and that message will print when the rule matches.
  1080  func noteRule(s string) bool {
  1081  	fmt.Println(s)
  1082  	return true
  1083  }
  1084  
  1085  // countRule increments Func.ruleMatches[key].
  1086  // If Func.ruleMatches is non-nil at the end
  1087  // of compilation, it will be printed to stdout.
  1088  // This is intended to make it easier to find which functions
  1089  // which contain lots of rules matches when developing new rules.
  1090  func countRule(v *Value, key string) bool {
  1091  	f := v.Block.Func
  1092  	if f.ruleMatches == nil {
  1093  		f.ruleMatches = make(map[string]int)
  1094  	}
  1095  	f.ruleMatches[key]++
  1096  	return true
  1097  }
  1098  
  1099  // warnRule generates compiler debug output with string s when
  1100  // v is not in autogenerated code, cond is true and the rule has fired.
  1101  func warnRule(cond bool, v *Value, s string) bool {
  1102  	if pos := v.Pos; pos.Line() > 1 && cond {
  1103  		v.Block.Func.Warnl(pos, s)
  1104  	}
  1105  	return true
  1106  }
  1107  
  1108  // for a pseudo-op like (LessThan x), extract x.
  1109  func flagArg(v *Value) *Value {
  1110  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1111  		return nil
  1112  	}
  1113  	return v.Args[0]
  1114  }
  1115  
  1116  // arm64Negate finds the complement to an ARM64 condition code,
  1117  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1118  //
  1119  // For floating point, it's more subtle because NaN is unordered. We do
  1120  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1121  func arm64Negate(op Op) Op {
  1122  	switch op {
  1123  	case OpARM64LessThan:
  1124  		return OpARM64GreaterEqual
  1125  	case OpARM64LessThanU:
  1126  		return OpARM64GreaterEqualU
  1127  	case OpARM64GreaterThan:
  1128  		return OpARM64LessEqual
  1129  	case OpARM64GreaterThanU:
  1130  		return OpARM64LessEqualU
  1131  	case OpARM64LessEqual:
  1132  		return OpARM64GreaterThan
  1133  	case OpARM64LessEqualU:
  1134  		return OpARM64GreaterThanU
  1135  	case OpARM64GreaterEqual:
  1136  		return OpARM64LessThan
  1137  	case OpARM64GreaterEqualU:
  1138  		return OpARM64LessThanU
  1139  	case OpARM64Equal:
  1140  		return OpARM64NotEqual
  1141  	case OpARM64NotEqual:
  1142  		return OpARM64Equal
  1143  	case OpARM64LessThanF:
  1144  		return OpARM64NotLessThanF
  1145  	case OpARM64NotLessThanF:
  1146  		return OpARM64LessThanF
  1147  	case OpARM64LessEqualF:
  1148  		return OpARM64NotLessEqualF
  1149  	case OpARM64NotLessEqualF:
  1150  		return OpARM64LessEqualF
  1151  	case OpARM64GreaterThanF:
  1152  		return OpARM64NotGreaterThanF
  1153  	case OpARM64NotGreaterThanF:
  1154  		return OpARM64GreaterThanF
  1155  	case OpARM64GreaterEqualF:
  1156  		return OpARM64NotGreaterEqualF
  1157  	case OpARM64NotGreaterEqualF:
  1158  		return OpARM64GreaterEqualF
  1159  	default:
  1160  		panic("unreachable")
  1161  	}
  1162  }
  1163  
  1164  // arm64Invert evaluates (InvertFlags op), which
  1165  // is the same as altering the condition codes such
  1166  // that the same result would be produced if the arguments
  1167  // to the flag-generating instruction were reversed, e.g.
  1168  // (InvertFlags (CMP x y)) -> (CMP y x)
  1169  func arm64Invert(op Op) Op {
  1170  	switch op {
  1171  	case OpARM64LessThan:
  1172  		return OpARM64GreaterThan
  1173  	case OpARM64LessThanU:
  1174  		return OpARM64GreaterThanU
  1175  	case OpARM64GreaterThan:
  1176  		return OpARM64LessThan
  1177  	case OpARM64GreaterThanU:
  1178  		return OpARM64LessThanU
  1179  	case OpARM64LessEqual:
  1180  		return OpARM64GreaterEqual
  1181  	case OpARM64LessEqualU:
  1182  		return OpARM64GreaterEqualU
  1183  	case OpARM64GreaterEqual:
  1184  		return OpARM64LessEqual
  1185  	case OpARM64GreaterEqualU:
  1186  		return OpARM64LessEqualU
  1187  	case OpARM64Equal, OpARM64NotEqual:
  1188  		return op
  1189  	case OpARM64LessThanF:
  1190  		return OpARM64GreaterThanF
  1191  	case OpARM64GreaterThanF:
  1192  		return OpARM64LessThanF
  1193  	case OpARM64LessEqualF:
  1194  		return OpARM64GreaterEqualF
  1195  	case OpARM64GreaterEqualF:
  1196  		return OpARM64LessEqualF
  1197  	case OpARM64NotLessThanF:
  1198  		return OpARM64NotGreaterThanF
  1199  	case OpARM64NotGreaterThanF:
  1200  		return OpARM64NotLessThanF
  1201  	case OpARM64NotLessEqualF:
  1202  		return OpARM64NotGreaterEqualF
  1203  	case OpARM64NotGreaterEqualF:
  1204  		return OpARM64NotLessEqualF
  1205  	default:
  1206  		panic("unreachable")
  1207  	}
  1208  }
  1209  
  1210  // evaluate an ARM64 op against a flags value
  1211  // that is potentially constant; return 1 for true,
  1212  // -1 for false, and 0 for not constant.
  1213  func ccARM64Eval(op Op, flags *Value) int {
  1214  	fop := flags.Op
  1215  	if fop == OpARM64InvertFlags {
  1216  		return -ccARM64Eval(op, flags.Args[0])
  1217  	}
  1218  	if fop != OpARM64FlagConstant {
  1219  		return 0
  1220  	}
  1221  	fc := flagConstant(flags.AuxInt)
  1222  	b2i := func(b bool) int {
  1223  		if b {
  1224  			return 1
  1225  		}
  1226  		return -1
  1227  	}
  1228  	switch op {
  1229  	case OpARM64Equal:
  1230  		return b2i(fc.eq())
  1231  	case OpARM64NotEqual:
  1232  		return b2i(fc.ne())
  1233  	case OpARM64LessThan:
  1234  		return b2i(fc.lt())
  1235  	case OpARM64LessThanU:
  1236  		return b2i(fc.ult())
  1237  	case OpARM64GreaterThan:
  1238  		return b2i(fc.gt())
  1239  	case OpARM64GreaterThanU:
  1240  		return b2i(fc.ugt())
  1241  	case OpARM64LessEqual:
  1242  		return b2i(fc.le())
  1243  	case OpARM64LessEqualU:
  1244  		return b2i(fc.ule())
  1245  	case OpARM64GreaterEqual:
  1246  		return b2i(fc.ge())
  1247  	case OpARM64GreaterEqualU:
  1248  		return b2i(fc.uge())
  1249  	}
  1250  	return 0
  1251  }
  1252  
  1253  // logRule logs the use of the rule s. This will only be enabled if
  1254  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1255  func logRule(s string) {
  1256  	if ruleFile == nil {
  1257  		// Open a log file to write log to. We open in append
  1258  		// mode because all.bash runs the compiler lots of times,
  1259  		// and we want the concatenation of all of those logs.
  1260  		// This means, of course, that users need to rm the old log
  1261  		// to get fresh data.
  1262  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1263  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1264  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1265  		if err != nil {
  1266  			panic(err)
  1267  		}
  1268  		ruleFile = w
  1269  	}
  1270  	// Ignore errors in case of multiple processes fighting over the file.
  1271  	fmt.Fprintln(ruleFile, s)
  1272  }
  1273  
  1274  var ruleFile io.Writer
  1275  
  1276  func isConstZero(v *Value) bool {
  1277  	switch v.Op {
  1278  	case OpConstNil:
  1279  		return true
  1280  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1281  		return v.AuxInt == 0
  1282  	case OpStringMake, OpIMake, OpComplexMake:
  1283  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1284  	case OpSliceMake:
  1285  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1286  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1287  		return isConstZero(v.Args[0])
  1288  	}
  1289  	return false
  1290  }
  1291  
  1292  // reciprocalExact64 reports whether 1/c is exactly representable.
  1293  func reciprocalExact64(c float64) bool {
  1294  	b := math.Float64bits(c)
  1295  	man := b & (1<<52 - 1)
  1296  	if man != 0 {
  1297  		return false // not a power of 2, denormal, or NaN
  1298  	}
  1299  	exp := b >> 52 & (1<<11 - 1)
  1300  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1301  	// changes the exponent to 0x7fe-exp.
  1302  	switch exp {
  1303  	case 0:
  1304  		return false // ±0
  1305  	case 0x7ff:
  1306  		return false // ±inf
  1307  	case 0x7fe:
  1308  		return false // exponent is not representable
  1309  	default:
  1310  		return true
  1311  	}
  1312  }
  1313  
  1314  // reciprocalExact32 reports whether 1/c is exactly representable.
  1315  func reciprocalExact32(c float32) bool {
  1316  	b := math.Float32bits(c)
  1317  	man := b & (1<<23 - 1)
  1318  	if man != 0 {
  1319  		return false // not a power of 2, denormal, or NaN
  1320  	}
  1321  	exp := b >> 23 & (1<<8 - 1)
  1322  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1323  	// changes the exponent to 0xfe-exp.
  1324  	switch exp {
  1325  	case 0:
  1326  		return false // ±0
  1327  	case 0xff:
  1328  		return false // ±inf
  1329  	case 0xfe:
  1330  		return false // exponent is not representable
  1331  	default:
  1332  		return true
  1333  	}
  1334  }
  1335  
  1336  // check if an immediate can be directly encoded into an ARM's instruction.
  1337  func isARMImmRot(v uint32) bool {
  1338  	for i := 0; i < 16; i++ {
  1339  		if v&^0xff == 0 {
  1340  			return true
  1341  		}
  1342  		v = v<<2 | v>>30
  1343  	}
  1344  
  1345  	return false
  1346  }
  1347  
  1348  // overlap reports whether the ranges given by the given offset and
  1349  // size pairs overlap.
  1350  func overlap(offset1, size1, offset2, size2 int64) bool {
  1351  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1352  		return true
  1353  	}
  1354  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1355  		return true
  1356  	}
  1357  	return false
  1358  }
  1359  
  1360  // check if value zeroes out upper 32-bit of 64-bit register.
  1361  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1362  // because it catches same amount of cases as 4.
  1363  func zeroUpper32Bits(x *Value, depth int) bool {
  1364  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1365  		// If the value is signed, it might get re-sign-extended
  1366  		// during spill and restore. See issue 68227.
  1367  		return false
  1368  	}
  1369  	switch x.Op {
  1370  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1371  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1372  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1373  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1374  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1375  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1376  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1377  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1378  		OpAMD64SHLL, OpAMD64SHLLconst:
  1379  		return true
  1380  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1381  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1382  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1383  		return true
  1384  	case OpArg: // note: but not ArgIntReg
  1385  		// amd64 always loads args from the stack unsigned.
  1386  		// most other architectures load them sign/zero extended based on the type.
  1387  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1388  	case OpPhi, OpSelect0, OpSelect1:
  1389  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1390  		// just limit recursion depth.
  1391  		if depth <= 0 {
  1392  			return false
  1393  		}
  1394  		for i := range x.Args {
  1395  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1396  				return false
  1397  			}
  1398  		}
  1399  		return true
  1400  
  1401  	}
  1402  	return false
  1403  }
  1404  
  1405  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1406  func zeroUpper48Bits(x *Value, depth int) bool {
  1407  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1408  		return false
  1409  	}
  1410  	switch x.Op {
  1411  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1412  		return true
  1413  	case OpArg: // note: but not ArgIntReg
  1414  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1415  	case OpPhi, OpSelect0, OpSelect1:
  1416  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1417  		// just limit recursion depth.
  1418  		if depth <= 0 {
  1419  			return false
  1420  		}
  1421  		for i := range x.Args {
  1422  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1423  				return false
  1424  			}
  1425  		}
  1426  		return true
  1427  
  1428  	}
  1429  	return false
  1430  }
  1431  
  1432  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1433  func zeroUpper56Bits(x *Value, depth int) bool {
  1434  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1435  		return false
  1436  	}
  1437  	switch x.Op {
  1438  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1439  		return true
  1440  	case OpArg: // note: but not ArgIntReg
  1441  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1442  	case OpPhi, OpSelect0, OpSelect1:
  1443  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1444  		// just limit recursion depth.
  1445  		if depth <= 0 {
  1446  			return false
  1447  		}
  1448  		for i := range x.Args {
  1449  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1450  				return false
  1451  			}
  1452  		}
  1453  		return true
  1454  
  1455  	}
  1456  	return false
  1457  }
  1458  
  1459  func isInlinableMemclr(c *Config, sz int64) bool {
  1460  	if sz < 0 {
  1461  		return false
  1462  	}
  1463  	// TODO: expand this check to allow other architectures
  1464  	// see CL 454255 and issue 56997
  1465  	switch c.arch {
  1466  	case "amd64", "arm64":
  1467  		return true
  1468  	case "ppc64le", "ppc64", "loong64":
  1469  		return sz < 512
  1470  	}
  1471  	return false
  1472  }
  1473  
  1474  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1475  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1476  // safe, either because Move will do all of its loads before any of its stores, or
  1477  // because the arguments are known to be disjoint.
  1478  // This is used as a check for replacing memmove with Move ops.
  1479  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1480  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1481  	// Move ops may or may not be faster for large sizes depending on how the platform
  1482  	// lowers them, so we only perform this optimization on platforms that we know to
  1483  	// have fast Move ops.
  1484  	switch c.arch {
  1485  	case "amd64":
  1486  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1487  	case "arm64":
  1488  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1489  	case "386":
  1490  		return sz <= 8
  1491  	case "s390x", "ppc64", "ppc64le":
  1492  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1493  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1494  		return sz <= 4
  1495  	}
  1496  	return false
  1497  }
  1498  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1499  	return isInlinableMemmove(dst, src, sz, c)
  1500  }
  1501  
  1502  // logLargeCopy logs the occurrence of a large copy.
  1503  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1504  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1505  func logLargeCopy(v *Value, s int64) bool {
  1506  	if s < 128 {
  1507  		return true
  1508  	}
  1509  	if logopt.Enabled() {
  1510  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1511  	}
  1512  	return true
  1513  }
  1514  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1515  	if s < 128 {
  1516  		return
  1517  	}
  1518  	if logopt.Enabled() {
  1519  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1520  	}
  1521  }
  1522  
  1523  // hasSmallRotate reports whether the architecture has rotate instructions
  1524  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1525  func hasSmallRotate(c *Config) bool {
  1526  	switch c.arch {
  1527  	case "amd64", "386":
  1528  		return true
  1529  	default:
  1530  		return false
  1531  	}
  1532  }
  1533  
  1534  func supportsPPC64PCRel() bool {
  1535  	// PCRel is currently supported for >= power10, linux only
  1536  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1537  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1538  }
  1539  
  1540  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1541  	if sh < 0 || sh >= sz {
  1542  		panic("PPC64 shift arg sh out of range")
  1543  	}
  1544  	if mb < 0 || mb >= sz {
  1545  		panic("PPC64 shift arg mb out of range")
  1546  	}
  1547  	if me < 0 || me >= sz {
  1548  		panic("PPC64 shift arg me out of range")
  1549  	}
  1550  	return int32(sh<<16 | mb<<8 | me)
  1551  }
  1552  
  1553  func GetPPC64Shiftsh(auxint int64) int64 {
  1554  	return int64(int8(auxint >> 16))
  1555  }
  1556  
  1557  func GetPPC64Shiftmb(auxint int64) int64 {
  1558  	return int64(int8(auxint >> 8))
  1559  }
  1560  
  1561  // Test if this value can encoded as a mask for a rlwinm like
  1562  // operation.  Masks can also extend from the msb and wrap to
  1563  // the lsb too.  That is, the valid masks are 32 bit strings
  1564  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1565  //
  1566  // Note: This ignores the upper 32 bits of the input. When a
  1567  // zero extended result is desired (e.g a 64 bit result), the
  1568  // user must verify the upper 32 bits are 0 and the mask is
  1569  // contiguous (that is, non-wrapping).
  1570  func isPPC64WordRotateMask(v64 int64) bool {
  1571  	// Isolate rightmost 1 (if none 0) and add.
  1572  	v := uint32(v64)
  1573  	vp := (v & -v) + v
  1574  	// Likewise, for the wrapping case.
  1575  	vn := ^v
  1576  	vpn := (vn & -vn) + vn
  1577  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1578  }
  1579  
  1580  // Test if this mask is a valid, contiguous bitmask which can be
  1581  // represented by a RLWNM mask and also clears the upper 32 bits
  1582  // of the register.
  1583  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1584  	// Isolate rightmost 1 (if none 0) and add.
  1585  	v := uint32(v64)
  1586  	vp := (v & -v) + v
  1587  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1588  }
  1589  
  1590  // Compress mask and shift into single value of the form
  1591  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1592  // be used to regenerate the input mask.
  1593  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1594  	var mb, me, mbn, men int
  1595  
  1596  	// Determine boundaries and then decode them
  1597  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1598  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1599  	} else if nbits == 32 {
  1600  		mb = bits.LeadingZeros32(uint32(mask))
  1601  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1602  		mbn = bits.LeadingZeros32(^uint32(mask))
  1603  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1604  	} else {
  1605  		mb = bits.LeadingZeros64(uint64(mask))
  1606  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1607  		mbn = bits.LeadingZeros64(^uint64(mask))
  1608  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1609  	}
  1610  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1611  	if mb == 0 && me == int(nbits) {
  1612  		// swap the inverted values
  1613  		mb, me = men, mbn
  1614  	}
  1615  
  1616  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1617  }
  1618  
  1619  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1620  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1621  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1622  // operations can be combined. This functions assumes the two opcodes can
  1623  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1624  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1625  	mb := s
  1626  	r := 64 - s
  1627  	// A larger mb is a smaller mask.
  1628  	if (encoded>>8)&0xFF < mb {
  1629  		encoded = (encoded &^ 0xFF00) | mb<<8
  1630  	}
  1631  	// The rotate is expected to be 0.
  1632  	if (encoded & 0xFF0000) != 0 {
  1633  		panic("non-zero rotate")
  1634  	}
  1635  	return encoded | r<<16
  1636  }
  1637  
  1638  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1639  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1640  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1641  	auxint := uint64(sauxint)
  1642  	rotate = int64((auxint >> 16) & 0xFF)
  1643  	mb = int64((auxint >> 8) & 0xFF)
  1644  	me = int64((auxint >> 0) & 0xFF)
  1645  	nbits := int64((auxint >> 24) & 0xFF)
  1646  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1647  	if mb > me {
  1648  		mask = ^mask
  1649  	}
  1650  	if nbits == 32 {
  1651  		mask = uint64(uint32(mask))
  1652  	}
  1653  
  1654  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1655  	// is inclusive.
  1656  	me = (me - 1) & (nbits - 1)
  1657  	return
  1658  }
  1659  
  1660  // This verifies that the mask is a set of
  1661  // consecutive bits including the least
  1662  // significant bit.
  1663  func isPPC64ValidShiftMask(v int64) bool {
  1664  	if (v != 0) && ((v+1)&v) == 0 {
  1665  		return true
  1666  	}
  1667  	return false
  1668  }
  1669  
  1670  func getPPC64ShiftMaskLength(v int64) int64 {
  1671  	return int64(bits.Len64(uint64(v)))
  1672  }
  1673  
  1674  // Decompose a shift right into an equivalent rotate/mask,
  1675  // and return mask & m.
  1676  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1677  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1678  	return m & int64(smask)
  1679  }
  1680  
  1681  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1682  func mergePPC64AndSrwi(m, s int64) int64 {
  1683  	mask := mergePPC64RShiftMask(m, s, 32)
  1684  	if !isPPC64WordRotateMask(mask) {
  1685  		return 0
  1686  	}
  1687  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1688  }
  1689  
  1690  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1691  func mergePPC64AndSrdi(m, s int64) int64 {
  1692  	mask := mergePPC64RShiftMask(m, s, 64)
  1693  
  1694  	// Verify the rotate and mask result only uses the lower 32 bits.
  1695  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1696  	if rv&uint64(mask) != 0 {
  1697  		return 0
  1698  	}
  1699  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1700  		return 0
  1701  	}
  1702  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1703  }
  1704  
  1705  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1706  func mergePPC64AndSldi(m, s int64) int64 {
  1707  	mask := -1 << s & m
  1708  
  1709  	// Verify the rotate and mask result only uses the lower 32 bits.
  1710  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1711  	if rv&uint64(mask) != 0 {
  1712  		return 0
  1713  	}
  1714  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1715  		return 0
  1716  	}
  1717  	return encodePPC64RotateMask(s&31, mask, 32)
  1718  }
  1719  
  1720  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1721  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1722  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1723  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1724  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1725  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1726  
  1727  	// Rewrite mask to apply after the final left shift.
  1728  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1729  
  1730  	r_1 := 32 - srw
  1731  	r_2 := GetPPC64Shiftsh(sld)
  1732  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1733  
  1734  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1735  		return 0
  1736  	}
  1737  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1738  }
  1739  
  1740  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1741  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1742  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1743  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1744  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1745  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1746  
  1747  	// Rewrite mask to apply after the final left shift.
  1748  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1749  
  1750  	r_1 := 64 - srd
  1751  	r_2 := GetPPC64Shiftsh(sld)
  1752  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1753  
  1754  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1755  		return 0
  1756  	}
  1757  	// This combine only works when selecting and shifting the lower 32 bits.
  1758  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1759  	if v1&mask_3 != 0 {
  1760  		return 0
  1761  	}
  1762  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1763  }
  1764  
  1765  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1766  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1767  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1768  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1769  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1770  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1771  
  1772  	// combine the masks, and adjust for the final left shift.
  1773  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1774  	r_2 := GetPPC64Shiftsh(int64(sld))
  1775  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1776  
  1777  	// Verify the result is still a valid bitmask of <= 32 bits.
  1778  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1779  		return 0
  1780  	}
  1781  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1782  }
  1783  
  1784  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1785  // or 0 if they cannot be merged.
  1786  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1787  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1788  	mask_out := (mask_rlw & uint64(mask))
  1789  
  1790  	// Verify the result is still a valid bitmask of <= 32 bits.
  1791  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1792  		return 0
  1793  	}
  1794  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1795  }
  1796  
  1797  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1798  // result. Return rlw if it does, 0 otherwise.
  1799  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1800  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1801  	if mb > me {
  1802  		return 0
  1803  	}
  1804  	return rlw
  1805  }
  1806  
  1807  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1808  // or 0 if they cannot be merged.
  1809  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1810  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1811  
  1812  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1813  	r_mask := bits.RotateLeft32(mask, int(r))
  1814  
  1815  	mask_out := (mask_rlw & uint64(r_mask))
  1816  
  1817  	// Verify the result is still a valid bitmask of <= 32 bits.
  1818  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1819  		return 0
  1820  	}
  1821  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1822  }
  1823  
  1824  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1825  // or 0 if they cannot be merged.
  1826  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1827  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1828  	if mb > me || mb < sldi {
  1829  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1830  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1831  		return 0
  1832  	}
  1833  	// combine the masks, and adjust for the final left shift.
  1834  	mask_3 := mask_1 << sldi
  1835  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1836  
  1837  	// Verify the result is still a valid bitmask of <= 32 bits.
  1838  	if uint64(uint32(mask_3)) != mask_3 {
  1839  		return 0
  1840  	}
  1841  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1842  }
  1843  
  1844  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1845  // or return 0 if they cannot be combined.
  1846  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1847  	if sld > srw || srw >= 32 {
  1848  		return 0
  1849  	}
  1850  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1851  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1852  	mask := (mask_r & mask_l) << uint(sld)
  1853  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1854  }
  1855  
  1856  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1857  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1858  // of op.
  1859  //
  1860  // E.g consider the case:
  1861  // a = (ADD x y)
  1862  // b = (CMPconst [0] a)
  1863  // c = (OR a z)
  1864  //
  1865  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1866  // would produce:
  1867  // a  = (ADD x y)
  1868  // a' = (ADDCC x y)
  1869  // a” = (Select0 a')
  1870  // b  = (CMPconst [0] a”)
  1871  // c  = (OR a z)
  1872  //
  1873  // which makes it impossible to rewrite the second user. Instead the result
  1874  // of this conversion is:
  1875  // a' = (ADDCC x y)
  1876  // a  = (Select0 a')
  1877  // b  = (CMPconst [0] a)
  1878  // c  = (OR a z)
  1879  //
  1880  // Which makes it trivial to rewrite b using a lowering rule.
  1881  func convertPPC64OpToOpCC(op *Value) *Value {
  1882  	ccOpMap := map[Op]Op{
  1883  		OpPPC64ADD:      OpPPC64ADDCC,
  1884  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1885  		OpPPC64AND:      OpPPC64ANDCC,
  1886  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1887  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1888  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1889  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1890  		OpPPC64NEG:      OpPPC64NEGCC,
  1891  		OpPPC64NOR:      OpPPC64NORCC,
  1892  		OpPPC64OR:       OpPPC64ORCC,
  1893  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1894  		OpPPC64SUB:      OpPPC64SUBCC,
  1895  		OpPPC64XOR:      OpPPC64XORCC,
  1896  	}
  1897  	b := op.Block
  1898  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1899  	opCC.AddArgs(op.Args...)
  1900  	op.reset(OpSelect0)
  1901  	op.AddArgs(opCC)
  1902  	return op
  1903  }
  1904  
  1905  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1906  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1907  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1908  	if r != 0 || mask&0xFFFF != mask {
  1909  		return 0
  1910  	}
  1911  	return int64(mask)
  1912  }
  1913  
  1914  // Convenience function to rotate a 32 bit constant value by another constant.
  1915  func rotateLeft32(v, rotate int64) int64 {
  1916  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1917  }
  1918  
  1919  func rotateRight64(v, rotate int64) int64 {
  1920  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1921  }
  1922  
  1923  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1924  func armBFAuxInt(lsb, width int64) arm64BitField {
  1925  	if lsb < 0 || lsb > 63 {
  1926  		panic("ARM(64) bit field lsb constant out of range")
  1927  	}
  1928  	if width < 1 || lsb+width > 64 {
  1929  		panic("ARM(64) bit field width constant out of range")
  1930  	}
  1931  	return arm64BitField(width | lsb<<8)
  1932  }
  1933  
  1934  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1935  func (bfc arm64BitField) lsb() int64 {
  1936  	return int64(uint64(bfc) >> 8)
  1937  }
  1938  
  1939  // returns the width part of the auxInt field of arm64 bitfield ops.
  1940  func (bfc arm64BitField) width() int64 {
  1941  	return int64(bfc) & 0xff
  1942  }
  1943  
  1944  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1945  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1946  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1947  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1948  }
  1949  
  1950  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1951  func arm64BFWidth(mask, rshift int64) int64 {
  1952  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1953  	if shiftedMask == 0 {
  1954  		panic("ARM64 BF mask is zero")
  1955  	}
  1956  	return nto(shiftedMask)
  1957  }
  1958  
  1959  // encodes condition code and NZCV flags into auxint.
  1960  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1961  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1962  		panic("Wrong conditional operation")
  1963  	}
  1964  	if nzcv&0x0f != nzcv {
  1965  		panic("Wrong value of NZCV flag")
  1966  	}
  1967  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1968  }
  1969  
  1970  // encodes condition code, NZCV flags and constant value into auxint.
  1971  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1972  	if value&0x1f != value {
  1973  		panic("Wrong value of constant")
  1974  	}
  1975  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1976  	params.constValue = value
  1977  	params.ind = true
  1978  	return params
  1979  }
  1980  
  1981  // extracts condition code from auxint.
  1982  func (condParams arm64ConditionalParams) Cond() Op {
  1983  	return condParams.cond
  1984  }
  1985  
  1986  // extracts NZCV flags from auxint.
  1987  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1988  	return int64(condParams.nzcv)
  1989  }
  1990  
  1991  // extracts constant value from auxint if present.
  1992  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1993  	return int64(condParams.constValue), condParams.ind
  1994  }
  1995  
  1996  // registerizable reports whether t is a primitive type that fits in
  1997  // a register. It assumes float64 values will always fit into registers
  1998  // even if that isn't strictly true.
  1999  func registerizable(b *Block, typ *types.Type) bool {
  2000  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  2001  		return true
  2002  	}
  2003  	if typ.IsInteger() {
  2004  		return typ.Size() <= b.Func.Config.RegSize
  2005  	}
  2006  	return false
  2007  }
  2008  
  2009  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2010  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2011  	f := v.Block.Func
  2012  	if !f.Config.Race {
  2013  		return false
  2014  	}
  2015  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2016  		return false
  2017  	}
  2018  	for _, b := range f.Blocks {
  2019  		for _, v := range b.Values {
  2020  			switch v.Op {
  2021  			case OpStaticCall, OpStaticLECall:
  2022  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2023  				// Allow calls to panic*
  2024  				s := v.Aux.(*AuxCall).Fn.String()
  2025  				switch s {
  2026  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2027  					"runtime.panicdivide", "runtime.panicwrap",
  2028  					"runtime.panicshift":
  2029  					continue
  2030  				}
  2031  				// If we encountered any call, we need to keep racefunc*,
  2032  				// for accurate stacktraces.
  2033  				return false
  2034  			case OpPanicBounds, OpPanicExtend:
  2035  				// Note: these are panic generators that are ok (like the static calls above).
  2036  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2037  				// We must keep the race functions if there are any other call types.
  2038  				return false
  2039  			}
  2040  		}
  2041  	}
  2042  	if isSameCall(sym, "runtime.racefuncenter") {
  2043  		// TODO REGISTER ABI this needs to be cleaned up.
  2044  		// If we're removing racefuncenter, remove its argument as well.
  2045  		if v.Args[0].Op != OpStore {
  2046  			if v.Op == OpStaticLECall {
  2047  				// there is no store, yet.
  2048  				return true
  2049  			}
  2050  			return false
  2051  		}
  2052  		mem := v.Args[0].Args[2]
  2053  		v.Args[0].reset(OpCopy)
  2054  		v.Args[0].AddArg(mem)
  2055  	}
  2056  	return true
  2057  }
  2058  
  2059  // symIsRO reports whether sym is a read-only global.
  2060  func symIsRO(sym Sym) bool {
  2061  	lsym := sym.(*obj.LSym)
  2062  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2063  }
  2064  
  2065  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2066  func symIsROZero(sym Sym) bool {
  2067  	lsym := sym.(*obj.LSym)
  2068  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2069  		return false
  2070  	}
  2071  	for _, b := range lsym.P {
  2072  		if b != 0 {
  2073  			return false
  2074  		}
  2075  	}
  2076  	return true
  2077  }
  2078  
  2079  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2080  // and can be rewritten by rewriteFixedLoad.
  2081  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2082  	lsym := sym.(*obj.LSym)
  2083  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2084  		for _, r := range lsym.R {
  2085  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2086  				return true
  2087  			}
  2088  		}
  2089  		return false
  2090  	}
  2091  
  2092  	if ti := lsym.TypeInfo(); ti != nil {
  2093  		// Type symbols do not contain information about their fields, unlike the cases above.
  2094  		// Hand-implement field accesses.
  2095  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2096  
  2097  		t := ti.Type.(*types.Type)
  2098  
  2099  		for _, f := range rttype.Type.Fields() {
  2100  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2101  				switch f.Sym.Name {
  2102  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2103  					return true
  2104  				default:
  2105  					// fmt.Println("unknown field", f.Sym.Name)
  2106  					return false
  2107  				}
  2108  			}
  2109  		}
  2110  
  2111  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2112  			return true
  2113  		}
  2114  
  2115  		return false
  2116  	}
  2117  
  2118  	return false
  2119  }
  2120  
  2121  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2122  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2123  	b := v.Block
  2124  	f := b.Func
  2125  
  2126  	lsym := sym.(*obj.LSym)
  2127  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2128  		for _, r := range lsym.R {
  2129  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2130  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2131  					// In case we're loading a type out of a dictionary, we need to record
  2132  					// that the containing function might put that type in an interface.
  2133  					// That information is currently recorded in relocations in the dictionary,
  2134  					// but if we perform this load at compile time then the dictionary
  2135  					// might be dead.
  2136  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2137  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2138  					// Same, but if we're using an itab we need to record that the
  2139  					// itab._type might be put in an interface.
  2140  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2141  				}
  2142  				v.reset(OpAddr)
  2143  				v.Aux = symToAux(r.Sym)
  2144  				v.AddArg(sb)
  2145  				return v
  2146  			}
  2147  		}
  2148  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2149  	}
  2150  
  2151  	if ti := lsym.TypeInfo(); ti != nil {
  2152  		// Type symbols do not contain information about their fields, unlike the cases above.
  2153  		// Hand-implement field accesses.
  2154  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2155  
  2156  		t := ti.Type.(*types.Type)
  2157  
  2158  		ptrSizedOpConst := OpConst64
  2159  		if f.Config.PtrSize == 4 {
  2160  			ptrSizedOpConst = OpConst32
  2161  		}
  2162  
  2163  		for _, f := range rttype.Type.Fields() {
  2164  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2165  				switch f.Sym.Name {
  2166  				case "Size_":
  2167  					v.reset(ptrSizedOpConst)
  2168  					v.AuxInt = t.Size()
  2169  					return v
  2170  				case "PtrBytes":
  2171  					v.reset(ptrSizedOpConst)
  2172  					v.AuxInt = types.PtrDataSize(t)
  2173  					return v
  2174  				case "Hash":
  2175  					v.reset(OpConst32)
  2176  					v.AuxInt = int64(types.TypeHash(t))
  2177  					return v
  2178  				case "Kind_":
  2179  					v.reset(OpConst8)
  2180  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2181  					return v
  2182  				case "GCData":
  2183  					gcdata, _ := reflectdata.GCSym(t, true)
  2184  					v.reset(OpAddr)
  2185  					v.Aux = symToAux(gcdata)
  2186  					v.AddArg(sb)
  2187  					return v
  2188  				default:
  2189  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2190  				}
  2191  			}
  2192  		}
  2193  
  2194  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2195  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2196  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2197  			v.reset(OpAddr)
  2198  			v.Aux = symToAux(elemSym)
  2199  			v.AddArg(sb)
  2200  			return v
  2201  		}
  2202  
  2203  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2204  	}
  2205  
  2206  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2207  	return nil
  2208  }
  2209  
  2210  // read8 reads one byte from the read-only global sym at offset off.
  2211  func read8(sym Sym, off int64) uint8 {
  2212  	lsym := sym.(*obj.LSym)
  2213  	if off >= int64(len(lsym.P)) || off < 0 {
  2214  		// Invalid index into the global sym.
  2215  		// This can happen in dead code, so we don't want to panic.
  2216  		// Just return any value, it will eventually get ignored.
  2217  		// See issue 29215.
  2218  		return 0
  2219  	}
  2220  	return lsym.P[off]
  2221  }
  2222  
  2223  // read16 reads two bytes from the read-only global sym at offset off.
  2224  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2225  	lsym := sym.(*obj.LSym)
  2226  	// lsym.P is written lazily.
  2227  	// Bytes requested after the end of lsym.P are 0.
  2228  	var src []byte
  2229  	if 0 <= off && off < int64(len(lsym.P)) {
  2230  		src = lsym.P[off:]
  2231  	}
  2232  	buf := make([]byte, 2)
  2233  	copy(buf, src)
  2234  	return byteorder.Uint16(buf)
  2235  }
  2236  
  2237  // read32 reads four bytes from the read-only global sym at offset off.
  2238  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2239  	lsym := sym.(*obj.LSym)
  2240  	var src []byte
  2241  	if 0 <= off && off < int64(len(lsym.P)) {
  2242  		src = lsym.P[off:]
  2243  	}
  2244  	buf := make([]byte, 4)
  2245  	copy(buf, src)
  2246  	return byteorder.Uint32(buf)
  2247  }
  2248  
  2249  // read64 reads eight bytes from the read-only global sym at offset off.
  2250  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2251  	lsym := sym.(*obj.LSym)
  2252  	var src []byte
  2253  	if 0 <= off && off < int64(len(lsym.P)) {
  2254  		src = lsym.P[off:]
  2255  	}
  2256  	buf := make([]byte, 8)
  2257  	copy(buf, src)
  2258  	return byteorder.Uint64(buf)
  2259  }
  2260  
  2261  // sequentialAddresses reports true if it can prove that x + n == y
  2262  func sequentialAddresses(x, y *Value, n int64) bool {
  2263  	if x == y && n == 0 {
  2264  		return true
  2265  	}
  2266  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2267  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2268  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2269  		return true
  2270  	}
  2271  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2272  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2273  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2274  		return true
  2275  	}
  2276  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2277  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2278  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2279  		return true
  2280  	}
  2281  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2282  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2283  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2284  		return true
  2285  	}
  2286  	return false
  2287  }
  2288  
  2289  // flagConstant represents the result of a compile-time comparison.
  2290  // The sense of these flags does not necessarily represent the hardware's notion
  2291  // of a flags register - these are just a compile-time construct.
  2292  // We happen to match the semantics to those of arm/arm64.
  2293  // Note that these semantics differ from x86: the carry flag has the opposite
  2294  // sense on a subtraction!
  2295  //
  2296  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2297  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2298  //	 (because it does x + ^y + C).
  2299  //
  2300  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2301  type flagConstant uint8
  2302  
  2303  // N reports whether the result of an operation is negative (high bit set).
  2304  func (fc flagConstant) N() bool {
  2305  	return fc&1 != 0
  2306  }
  2307  
  2308  // Z reports whether the result of an operation is 0.
  2309  func (fc flagConstant) Z() bool {
  2310  	return fc&2 != 0
  2311  }
  2312  
  2313  // C reports whether an unsigned add overflowed (carry), or an
  2314  // unsigned subtract did not underflow (borrow).
  2315  func (fc flagConstant) C() bool {
  2316  	return fc&4 != 0
  2317  }
  2318  
  2319  // V reports whether a signed operation overflowed or underflowed.
  2320  func (fc flagConstant) V() bool {
  2321  	return fc&8 != 0
  2322  }
  2323  
  2324  func (fc flagConstant) eq() bool {
  2325  	return fc.Z()
  2326  }
  2327  func (fc flagConstant) ne() bool {
  2328  	return !fc.Z()
  2329  }
  2330  func (fc flagConstant) lt() bool {
  2331  	return fc.N() != fc.V()
  2332  }
  2333  func (fc flagConstant) le() bool {
  2334  	return fc.Z() || fc.lt()
  2335  }
  2336  func (fc flagConstant) gt() bool {
  2337  	return !fc.Z() && fc.ge()
  2338  }
  2339  func (fc flagConstant) ge() bool {
  2340  	return fc.N() == fc.V()
  2341  }
  2342  func (fc flagConstant) ult() bool {
  2343  	return !fc.C()
  2344  }
  2345  func (fc flagConstant) ule() bool {
  2346  	return fc.Z() || fc.ult()
  2347  }
  2348  func (fc flagConstant) ugt() bool {
  2349  	return !fc.Z() && fc.uge()
  2350  }
  2351  func (fc flagConstant) uge() bool {
  2352  	return fc.C()
  2353  }
  2354  
  2355  func (fc flagConstant) ltNoov() bool {
  2356  	return fc.lt() && !fc.V()
  2357  }
  2358  func (fc flagConstant) leNoov() bool {
  2359  	return fc.le() && !fc.V()
  2360  }
  2361  func (fc flagConstant) gtNoov() bool {
  2362  	return fc.gt() && !fc.V()
  2363  }
  2364  func (fc flagConstant) geNoov() bool {
  2365  	return fc.ge() && !fc.V()
  2366  }
  2367  
  2368  func (fc flagConstant) String() string {
  2369  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2370  }
  2371  
  2372  type flagConstantBuilder struct {
  2373  	N bool
  2374  	Z bool
  2375  	C bool
  2376  	V bool
  2377  }
  2378  
  2379  func (fcs flagConstantBuilder) encode() flagConstant {
  2380  	var fc flagConstant
  2381  	if fcs.N {
  2382  		fc |= 1
  2383  	}
  2384  	if fcs.Z {
  2385  		fc |= 2
  2386  	}
  2387  	if fcs.C {
  2388  		fc |= 4
  2389  	}
  2390  	if fcs.V {
  2391  		fc |= 8
  2392  	}
  2393  	return fc
  2394  }
  2395  
  2396  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2397  //  - the results of the C flag are different
  2398  //  - the results of the V flag when y==minint are different
  2399  
  2400  // addFlags64 returns the flags that would be set from computing x+y.
  2401  func addFlags64(x, y int64) flagConstant {
  2402  	var fcb flagConstantBuilder
  2403  	fcb.Z = x+y == 0
  2404  	fcb.N = x+y < 0
  2405  	fcb.C = uint64(x+y) < uint64(x)
  2406  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2407  	return fcb.encode()
  2408  }
  2409  
  2410  // subFlags64 returns the flags that would be set from computing x-y.
  2411  func subFlags64(x, y int64) flagConstant {
  2412  	var fcb flagConstantBuilder
  2413  	fcb.Z = x-y == 0
  2414  	fcb.N = x-y < 0
  2415  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2416  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2417  	return fcb.encode()
  2418  }
  2419  
  2420  // addFlags32 returns the flags that would be set from computing x+y.
  2421  func addFlags32(x, y int32) flagConstant {
  2422  	var fcb flagConstantBuilder
  2423  	fcb.Z = x+y == 0
  2424  	fcb.N = x+y < 0
  2425  	fcb.C = uint32(x+y) < uint32(x)
  2426  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2427  	return fcb.encode()
  2428  }
  2429  
  2430  // subFlags32 returns the flags that would be set from computing x-y.
  2431  func subFlags32(x, y int32) flagConstant {
  2432  	var fcb flagConstantBuilder
  2433  	fcb.Z = x-y == 0
  2434  	fcb.N = x-y < 0
  2435  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2436  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2437  	return fcb.encode()
  2438  }
  2439  
  2440  // logicFlags64 returns flags set to the sign/zeroness of x.
  2441  // C and V are set to false.
  2442  func logicFlags64(x int64) flagConstant {
  2443  	var fcb flagConstantBuilder
  2444  	fcb.Z = x == 0
  2445  	fcb.N = x < 0
  2446  	return fcb.encode()
  2447  }
  2448  
  2449  // logicFlags32 returns flags set to the sign/zeroness of x.
  2450  // C and V are set to false.
  2451  func logicFlags32(x int32) flagConstant {
  2452  	var fcb flagConstantBuilder
  2453  	fcb.Z = x == 0
  2454  	fcb.N = x < 0
  2455  	return fcb.encode()
  2456  }
  2457  
  2458  func makeJumpTableSym(b *Block) *obj.LSym {
  2459  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2460  	// The jump table symbol is accessed only from the function symbol.
  2461  	s.Set(obj.AttrStatic, true)
  2462  	return s
  2463  }
  2464  
  2465  // canRotate reports whether the architecture supports
  2466  // rotates of integer registers with the given number of bits.
  2467  func canRotate(c *Config, bits int64) bool {
  2468  	if bits > c.PtrSize*8 {
  2469  		// Don't rewrite to rotates bigger than the machine word.
  2470  		return false
  2471  	}
  2472  	switch c.arch {
  2473  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2474  		return true
  2475  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2476  		return bits >= 32
  2477  	default:
  2478  		return false
  2479  	}
  2480  }
  2481  
  2482  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2483  func isARM64bitcon(x uint64) bool {
  2484  	if x == 1<<64-1 || x == 0 {
  2485  		return false
  2486  	}
  2487  	// determine the period and sign-extend a unit to 64 bits
  2488  	switch {
  2489  	case x != x>>32|x<<32:
  2490  		// period is 64
  2491  		// nothing to do
  2492  	case x != x>>16|x<<48:
  2493  		// period is 32
  2494  		x = uint64(int64(int32(x)))
  2495  	case x != x>>8|x<<56:
  2496  		// period is 16
  2497  		x = uint64(int64(int16(x)))
  2498  	case x != x>>4|x<<60:
  2499  		// period is 8
  2500  		x = uint64(int64(int8(x)))
  2501  	default:
  2502  		// period is 4 or 2, always true
  2503  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2504  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2505  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2506  		// 0101, 1010             -- 01   rotate, repeat
  2507  		return true
  2508  	}
  2509  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2510  }
  2511  
  2512  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2513  func sequenceOfOnes(x uint64) bool {
  2514  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2515  	y += x
  2516  	return (y-1)&y == 0
  2517  }
  2518  
  2519  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2520  func isARM64addcon(v int64) bool {
  2521  	/* uimm12 or uimm24? */
  2522  	if v < 0 {
  2523  		return false
  2524  	}
  2525  	if (v & 0xFFF) == 0 {
  2526  		v >>= 12
  2527  	}
  2528  	return v <= 0xFFF
  2529  }
  2530  
  2531  // setPos sets the position of v to pos, then returns true.
  2532  // Useful for setting the result of a rewrite's position to
  2533  // something other than the default.
  2534  func setPos(v *Value, pos src.XPos) bool {
  2535  	v.Pos = pos
  2536  	return true
  2537  }
  2538  
  2539  // isNonNegative reports whether v is known to be greater or equal to zero.
  2540  // Note that this is pretty simplistic. The prove pass generates more detailed
  2541  // nonnegative information about values.
  2542  func isNonNegative(v *Value) bool {
  2543  	if !v.Type.IsInteger() {
  2544  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2545  	}
  2546  	// TODO: return true if !v.Type.IsSigned()
  2547  	// SSA isn't type-safe enough to do that now (issue 37753).
  2548  	// The checks below depend only on the pattern of bits.
  2549  
  2550  	switch v.Op {
  2551  	case OpConst64:
  2552  		return v.AuxInt >= 0
  2553  
  2554  	case OpConst32:
  2555  		return int32(v.AuxInt) >= 0
  2556  
  2557  	case OpConst16:
  2558  		return int16(v.AuxInt) >= 0
  2559  
  2560  	case OpConst8:
  2561  		return int8(v.AuxInt) >= 0
  2562  
  2563  	case OpStringLen, OpSliceLen, OpSliceCap,
  2564  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2565  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2566  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2567  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2568  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2569  		return true
  2570  
  2571  	case OpRsh64Ux64, OpRsh32Ux64:
  2572  		by := v.Args[1]
  2573  		return by.Op == OpConst64 && by.AuxInt > 0
  2574  
  2575  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2576  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2577  		return isNonNegative(v.Args[0])
  2578  
  2579  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2580  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2581  
  2582  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2583  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2584  		OpOr64, OpOr32, OpOr16, OpOr8,
  2585  		OpXor64, OpXor32, OpXor16, OpXor8:
  2586  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2587  
  2588  		// We could handle OpPhi here, but the improvements from doing
  2589  		// so are very minor, and it is neither simple nor cheap.
  2590  	}
  2591  	return false
  2592  }
  2593  
  2594  func rewriteStructLoad(v *Value) *Value {
  2595  	b := v.Block
  2596  	ptr := v.Args[0]
  2597  	mem := v.Args[1]
  2598  
  2599  	t := v.Type
  2600  	args := make([]*Value, t.NumFields())
  2601  	for i := range args {
  2602  		ft := t.FieldType(i)
  2603  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2604  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2605  	}
  2606  
  2607  	v.reset(OpStructMake)
  2608  	v.AddArgs(args...)
  2609  	return v
  2610  }
  2611  
  2612  func rewriteStructStore(v *Value) *Value {
  2613  	b := v.Block
  2614  	dst := v.Args[0]
  2615  	x := v.Args[1]
  2616  	if x.Op != OpStructMake {
  2617  		base.Fatalf("invalid struct store: %v", x)
  2618  	}
  2619  	mem := v.Args[2]
  2620  
  2621  	t := x.Type
  2622  	for i, arg := range x.Args {
  2623  		ft := t.FieldType(i)
  2624  
  2625  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2626  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2627  	}
  2628  
  2629  	return mem
  2630  }
  2631  
  2632  // isDirectAndComparableType reports whether v represents a type
  2633  // (a *runtime._type) whose value is stored directly in an
  2634  // interface (i.e., is pointer or pointer-like) and is comparable.
  2635  func isDirectAndComparableType(v *Value) bool {
  2636  	return isDirectAndComparableType1(v)
  2637  }
  2638  
  2639  // v is a type
  2640  func isDirectAndComparableType1(v *Value) bool {
  2641  	switch v.Op {
  2642  	case OpITab:
  2643  		return isDirectAndComparableType2(v.Args[0])
  2644  	case OpAddr:
  2645  		lsym := v.Aux.(*obj.LSym)
  2646  		if ti := lsym.TypeInfo(); ti != nil {
  2647  			t := ti.Type.(*types.Type)
  2648  			return types.IsDirectIface(t) && types.IsComparable(t)
  2649  		}
  2650  	}
  2651  	return false
  2652  }
  2653  
  2654  // v is an empty interface
  2655  func isDirectAndComparableType2(v *Value) bool {
  2656  	switch v.Op {
  2657  	case OpIMake:
  2658  		return isDirectAndComparableType1(v.Args[0])
  2659  	}
  2660  	return false
  2661  }
  2662  
  2663  // isDirectAndComparableIface reports whether v represents an itab
  2664  // (a *runtime._itab) for a type whose value is stored directly
  2665  // in an interface (i.e., is pointer or pointer-like) and is comparable.
  2666  func isDirectAndComparableIface(v *Value) bool {
  2667  	return isDirectAndComparableIface1(v, 9)
  2668  }
  2669  
  2670  // v is an itab
  2671  func isDirectAndComparableIface1(v *Value, depth int) bool {
  2672  	if depth == 0 {
  2673  		return false
  2674  	}
  2675  	switch v.Op {
  2676  	case OpITab:
  2677  		return isDirectAndComparableIface2(v.Args[0], depth-1)
  2678  	case OpAddr:
  2679  		lsym := v.Aux.(*obj.LSym)
  2680  		if ii := lsym.ItabInfo(); ii != nil {
  2681  			t := ii.Type.(*types.Type)
  2682  			return types.IsDirectIface(t) && types.IsComparable(t)
  2683  		}
  2684  	case OpConstNil:
  2685  		// We can treat this as direct, because if the itab is
  2686  		// nil, the data field must be nil also.
  2687  		return true
  2688  	}
  2689  	return false
  2690  }
  2691  
  2692  // v is an interface
  2693  func isDirectAndComparableIface2(v *Value, depth int) bool {
  2694  	if depth == 0 {
  2695  		return false
  2696  	}
  2697  	switch v.Op {
  2698  	case OpIMake:
  2699  		return isDirectAndComparableIface1(v.Args[0], depth-1)
  2700  	case OpPhi:
  2701  		for _, a := range v.Args {
  2702  			if !isDirectAndComparableIface2(a, depth-1) {
  2703  				return false
  2704  			}
  2705  		}
  2706  		return true
  2707  	}
  2708  	return false
  2709  }
  2710  
  2711  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2712  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2713  	r.sum, r.carry = int64(s), int64(c)
  2714  	return
  2715  }
  2716  
  2717  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2718  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2719  	r.hi, r.lo = int64(hi), int64(lo)
  2720  	return
  2721  }
  2722  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2723  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2724  	r.hi, r.lo = int32(hi), int32(lo)
  2725  	return
  2726  }
  2727  
  2728  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2729  func flagify(v *Value) bool {
  2730  	var flagVersion Op
  2731  	switch v.Op {
  2732  	case OpAMD64ADDQconst:
  2733  		flagVersion = OpAMD64ADDQconstflags
  2734  	case OpAMD64ADDLconst:
  2735  		flagVersion = OpAMD64ADDLconstflags
  2736  	default:
  2737  		base.Fatalf("can't flagify op %s", v.Op)
  2738  	}
  2739  	inner := v.copyInto(v.Block)
  2740  	inner.Op = flagVersion
  2741  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2742  	v.reset(OpSelect0)
  2743  	v.AddArg(inner)
  2744  	return true
  2745  }
  2746  
  2747  // PanicBoundsC contains a constant for a bounds failure.
  2748  type PanicBoundsC struct {
  2749  	C int64
  2750  }
  2751  
  2752  // PanicBoundsCC contains 2 constants for a bounds failure.
  2753  type PanicBoundsCC struct {
  2754  	Cx int64
  2755  	Cy int64
  2756  }
  2757  
  2758  func (p PanicBoundsC) CanBeAnSSAAux() {
  2759  }
  2760  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2761  }
  2762  
  2763  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2764  	return i.(PanicBoundsC)
  2765  }
  2766  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2767  	return i.(PanicBoundsCC)
  2768  }
  2769  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2770  	return p
  2771  }
  2772  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2773  	return p
  2774  }
  2775  
  2776  func isDictArgSym(sym Sym) bool {
  2777  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2778  }
  2779  

View as plain text