Source file src/cmd/compile/internal/ssa/prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"cmp"
    11  	"fmt"
    12  	"math"
    13  	"math/bits"
    14  	"slices"
    15  	"strings"
    16  )
    17  
    18  type branch int
    19  
    20  const (
    21  	unknown branch = iota
    22  	positive
    23  	negative
    24  	// The outedges from a jump table are jumpTable0,
    25  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    26  	// arbitrary number so we can't list them all here.
    27  	jumpTable0
    28  )
    29  
    30  func (b branch) String() string {
    31  	switch b {
    32  	case unknown:
    33  		return "unk"
    34  	case positive:
    35  		return "pos"
    36  	case negative:
    37  		return "neg"
    38  	default:
    39  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    40  	}
    41  }
    42  
    43  // relation represents the set of possible relations between
    44  // pairs of variables (v, w). Without a priori knowledge the
    45  // mask is lt | eq | gt meaning v can be less than, equal to or
    46  // greater than w. When the execution path branches on the condition
    47  // `v op w` the set of relations is updated to exclude any
    48  // relation not possible due to `v op w` being true (or false).
    49  //
    50  // E.g.
    51  //
    52  //	r := relation(...)
    53  //
    54  //	if v < w {
    55  //	  newR := r & lt
    56  //	}
    57  //	if v >= w {
    58  //	  newR := r & (eq|gt)
    59  //	}
    60  //	if v != w {
    61  //	  newR := r & (lt|gt)
    62  //	}
    63  type relation uint
    64  
    65  const (
    66  	lt relation = 1 << iota
    67  	eq
    68  	gt
    69  )
    70  
    71  var relationStrings = [...]string{
    72  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    73  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    74  }
    75  
    76  func (r relation) String() string {
    77  	if r < relation(len(relationStrings)) {
    78  		return relationStrings[r]
    79  	}
    80  	return fmt.Sprintf("relation(%d)", uint(r))
    81  }
    82  
    83  // domain represents the domain of a variable pair in which a set
    84  // of relations is known. For example, relations learned for unsigned
    85  // pairs cannot be transferred to signed pairs because the same bit
    86  // representation can mean something else.
    87  type domain uint
    88  
    89  const (
    90  	signed domain = 1 << iota
    91  	unsigned
    92  	pointer
    93  	boolean
    94  )
    95  
    96  var domainStrings = [...]string{
    97  	"signed", "unsigned", "pointer", "boolean",
    98  }
    99  
   100  func (d domain) String() string {
   101  	s := ""
   102  	for i, ds := range domainStrings {
   103  		if d&(1<<uint(i)) != 0 {
   104  			if len(s) != 0 {
   105  				s += "|"
   106  			}
   107  			s += ds
   108  			d &^= 1 << uint(i)
   109  		}
   110  	}
   111  	if d != 0 {
   112  		if len(s) != 0 {
   113  			s += "|"
   114  		}
   115  		s += fmt.Sprintf("0x%x", uint(d))
   116  	}
   117  	return s
   118  }
   119  
   120  // a limit records known upper and lower bounds for a value.
   121  //
   122  // If we have min>max or umin>umax, then this limit is
   123  // called "unsatisfiable". When we encounter such a limit, we
   124  // know that any code for which that limit applies is unreachable.
   125  // We don't particularly care how unsatisfiable limits propagate,
   126  // including becoming satisfiable, because any optimization
   127  // decisions based on those limits only apply to unreachable code.
   128  type limit struct {
   129  	min, max   int64  // min <= value <= max, signed
   130  	umin, umax uint64 // umin <= value <= umax, unsigned
   131  	// For booleans, we use 0==false, 1==true for both ranges
   132  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   133  }
   134  
   135  func (l limit) String() string {
   136  	return fmt.Sprintf("sm,SM=%d,%d um,UM=%d,%d", l.min, l.max, l.umin, l.umax)
   137  }
   138  
   139  func (l limit) intersect(l2 limit) limit {
   140  	l.min = max(l.min, l2.min)
   141  	l.umin = max(l.umin, l2.umin)
   142  	l.max = min(l.max, l2.max)
   143  	l.umax = min(l.umax, l2.umax)
   144  	return l
   145  }
   146  
   147  func (l limit) signedMin(m int64) limit {
   148  	l.min = max(l.min, m)
   149  	return l
   150  }
   151  
   152  func (l limit) signedMinMax(minimum, maximum int64) limit {
   153  	l.min = max(l.min, minimum)
   154  	l.max = min(l.max, maximum)
   155  	return l
   156  }
   157  
   158  func (l limit) unsignedMin(m uint64) limit {
   159  	l.umin = max(l.umin, m)
   160  	return l
   161  }
   162  func (l limit) unsignedMax(m uint64) limit {
   163  	l.umax = min(l.umax, m)
   164  	return l
   165  }
   166  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   167  	l.umin = max(l.umin, minimum)
   168  	l.umax = min(l.umax, maximum)
   169  	return l
   170  }
   171  
   172  func (l limit) nonzero() bool {
   173  	return l.min > 0 || l.umin > 0 || l.max < 0
   174  }
   175  func (l limit) maybeZero() bool {
   176  	return !l.nonzero()
   177  }
   178  func (l limit) nonnegative() bool {
   179  	return l.min >= 0
   180  }
   181  func (l limit) unsat() bool {
   182  	return l.min > l.max || l.umin > l.umax
   183  }
   184  
   185  // If x and y can add without overflow or underflow
   186  // (using b bits), safeAdd returns x+y, true.
   187  // Otherwise, returns 0, false.
   188  func safeAdd(x, y int64, b uint) (int64, bool) {
   189  	s := x + y
   190  	if x >= 0 && y >= 0 && s < 0 {
   191  		return 0, false // 64-bit overflow
   192  	}
   193  	if x < 0 && y < 0 && s >= 0 {
   194  		return 0, false // 64-bit underflow
   195  	}
   196  	if !fitsInBits(s, b) {
   197  		return 0, false
   198  	}
   199  	return s, true
   200  }
   201  
   202  // same as safeAdd for unsigned arithmetic.
   203  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   204  	s := x + y
   205  	if s < x || s < y {
   206  		return 0, false // 64-bit overflow
   207  	}
   208  	if !fitsInBitsU(s, b) {
   209  		return 0, false
   210  	}
   211  	return s, true
   212  }
   213  
   214  // same as safeAdd but for subtraction.
   215  func safeSub(x, y int64, b uint) (int64, bool) {
   216  	if y == math.MinInt64 {
   217  		if x == math.MaxInt64 {
   218  			return 0, false // 64-bit overflow
   219  		}
   220  		x++
   221  		y++
   222  	}
   223  	return safeAdd(x, -y, b)
   224  }
   225  
   226  // same as safeAddU but for subtraction.
   227  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   228  	if x < y {
   229  		return 0, false // 64-bit underflow
   230  	}
   231  	s := x - y
   232  	if !fitsInBitsU(s, b) {
   233  		return 0, false
   234  	}
   235  	return s, true
   236  }
   237  
   238  // fitsInBits reports whether x fits in b bits (signed).
   239  func fitsInBits(x int64, b uint) bool {
   240  	if b == 64 {
   241  		return true
   242  	}
   243  	m := int64(-1) << (b - 1)
   244  	M := -m - 1
   245  	return x >= m && x <= M
   246  }
   247  
   248  // fitsInBitsU reports whether x fits in b bits (unsigned).
   249  func fitsInBitsU(x uint64, b uint) bool {
   250  	return x>>b == 0
   251  }
   252  
   253  // add returns the limit obtained by adding a value with limit l
   254  // to a value with limit l2. The result must fit in b bits.
   255  func (l limit) add(l2 limit, b uint) limit {
   256  	r := noLimit
   257  	min, minOk := safeAdd(l.min, l2.min, b)
   258  	max, maxOk := safeAdd(l.max, l2.max, b)
   259  	if minOk && maxOk {
   260  		r.min = min
   261  		r.max = max
   262  	}
   263  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   264  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   265  	if uminOk && umaxOk {
   266  		r.umin = umin
   267  		r.umax = umax
   268  	}
   269  	return r
   270  }
   271  
   272  // same as add but for subtraction.
   273  func (l limit) sub(l2 limit, b uint) limit {
   274  	r := noLimit
   275  	min, minOk := safeSub(l.min, l2.max, b)
   276  	max, maxOk := safeSub(l.max, l2.min, b)
   277  	if minOk && maxOk {
   278  		r.min = min
   279  		r.max = max
   280  	}
   281  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   282  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   283  	if uminOk && umaxOk {
   284  		r.umin = umin
   285  		r.umax = umax
   286  	}
   287  	return r
   288  }
   289  
   290  // same as add but for multiplication.
   291  func (l limit) mul(l2 limit, b uint) limit {
   292  	r := noLimit
   293  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   294  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   295  		r.umax = umaxlo
   296  		r.umin = l.umin * l2.umin
   297  		// Note: if the code containing this multiply is
   298  		// unreachable, then we may have umin>umax, and this
   299  		// multiply may overflow.  But that's ok for
   300  		// unreachable code. If this code is reachable, we
   301  		// know umin<=umax, so this multiply will not overflow
   302  		// because the max multiply didn't.
   303  	}
   304  	// Signed is harder, so don't bother. The only useful
   305  	// case is when we know both multiplicands are nonnegative,
   306  	// but that case is handled above because we would have then
   307  	// previously propagated signed info to the unsigned domain,
   308  	// and will propagate it back after the multiply.
   309  	return r
   310  }
   311  
   312  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   313  func (l limit) exp2(b uint) limit {
   314  	r := noLimit
   315  	if l.umax < uint64(b) {
   316  		r.umin = 1 << l.umin
   317  		r.umax = 1 << l.umax
   318  		// Same as above in mul, signed<->unsigned propagation
   319  		// will handle the signed case for us.
   320  	}
   321  	return r
   322  }
   323  
   324  // Similar to add, but computes the complement of the limit for bitsize b.
   325  func (l limit) com(b uint) limit {
   326  	switch b {
   327  	case 64:
   328  		return limit{
   329  			min:  ^l.max,
   330  			max:  ^l.min,
   331  			umin: ^l.umax,
   332  			umax: ^l.umin,
   333  		}
   334  	case 32:
   335  		return limit{
   336  			min:  int64(^int32(l.max)),
   337  			max:  int64(^int32(l.min)),
   338  			umin: uint64(^uint32(l.umax)),
   339  			umax: uint64(^uint32(l.umin)),
   340  		}
   341  	case 16:
   342  		return limit{
   343  			min:  int64(^int16(l.max)),
   344  			max:  int64(^int16(l.min)),
   345  			umin: uint64(^uint16(l.umax)),
   346  			umax: uint64(^uint16(l.umin)),
   347  		}
   348  	case 8:
   349  		return limit{
   350  			min:  int64(^int8(l.max)),
   351  			max:  int64(^int8(l.min)),
   352  			umin: uint64(^uint8(l.umax)),
   353  			umax: uint64(^uint8(l.umin)),
   354  		}
   355  	default:
   356  		panic("unreachable")
   357  	}
   358  }
   359  
   360  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   361  
   362  // a limitFact is a limit known for a particular value.
   363  type limitFact struct {
   364  	vid   ID
   365  	limit limit
   366  }
   367  
   368  // An ordering encodes facts like v < w.
   369  type ordering struct {
   370  	next *ordering // linked list of all known orderings for v.
   371  	// Note: v is implicit here, determined by which linked list it is in.
   372  	w *Value
   373  	d domain
   374  	r relation // one of ==,!=,<,<=,>,>=
   375  	// if d is boolean or pointer, r can only be ==, !=
   376  }
   377  
   378  // factsTable keeps track of relations between pairs of values.
   379  //
   380  // The fact table logic is sound, but incomplete. Outside of a few
   381  // special cases, it performs no deduction or arithmetic. While there
   382  // are known decision procedures for this, the ad hoc approach taken
   383  // by the facts table is effective for real code while remaining very
   384  // efficient.
   385  type factsTable struct {
   386  	// unsat is true if facts contains a contradiction.
   387  	//
   388  	// Note that the factsTable logic is incomplete, so if unsat
   389  	// is false, the assertions in factsTable could be satisfiable
   390  	// *or* unsatisfiable.
   391  	unsat      bool // true if facts contains a contradiction
   392  	unsatDepth int  // number of unsat checkpoints
   393  
   394  	// order* is a couple of partial order sets that record information
   395  	// about relations between SSA values in the signed and unsigned
   396  	// domain.
   397  	orderS *poset
   398  	orderU *poset
   399  
   400  	// orderings contains a list of known orderings between values.
   401  	// These lists are indexed by v.ID.
   402  	// We do not record transitive orderings. Only explicitly learned
   403  	// orderings are recorded. Transitive orderings can be obtained
   404  	// by walking along the individual orderings.
   405  	orderings map[ID]*ordering
   406  	// stack of IDs which have had an entry added in orderings.
   407  	// In addition, ID==0 are checkpoint markers.
   408  	orderingsStack []ID
   409  	orderingCache  *ordering // unused ordering records
   410  
   411  	// known lower and upper constant bounds on individual values.
   412  	limits       []limit     // indexed by value ID
   413  	limitStack   []limitFact // previous entries
   414  	recurseCheck []bool      // recursion detector for limit propagation
   415  
   416  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   417  	// TODO: check if there are cases that matter where we have
   418  	// more than one len(s) for a slice. We could keep a list if necessary.
   419  	lens map[ID]*Value
   420  	caps map[ID]*Value
   421  
   422  	// reusedTopoSortScoresTable recycle allocations for topo-sort
   423  	reusedTopoSortScoresTable []uint
   424  }
   425  
   426  // checkpointBound is an invalid value used for checkpointing
   427  // and restoring factsTable.
   428  var checkpointBound = limitFact{}
   429  
   430  func newFactsTable(f *Func) *factsTable {
   431  	ft := &factsTable{}
   432  	ft.orderS = f.newPoset()
   433  	ft.orderU = f.newPoset()
   434  	ft.orderS.SetUnsigned(false)
   435  	ft.orderU.SetUnsigned(true)
   436  	ft.orderings = make(map[ID]*ordering)
   437  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   438  	for _, b := range f.Blocks {
   439  		for _, v := range b.Values {
   440  			ft.limits[v.ID] = initLimit(v)
   441  		}
   442  	}
   443  	ft.limitStack = make([]limitFact, 4)
   444  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   445  	return ft
   446  }
   447  
   448  // initLimitForNewValue initializes the limits for newly created values,
   449  // possibly needing to expand the limits slice. Currently used by
   450  // simplifyBlock when certain provably constant results are folded.
   451  func (ft *factsTable) initLimitForNewValue(v *Value) {
   452  	if int(v.ID) >= len(ft.limits) {
   453  		f := v.Block.Func
   454  		n := f.NumValues()
   455  		if cap(ft.limits) >= n {
   456  			ft.limits = ft.limits[:n]
   457  		} else {
   458  			old := ft.limits
   459  			ft.limits = f.Cache.allocLimitSlice(n)
   460  			copy(ft.limits, old)
   461  			f.Cache.freeLimitSlice(old)
   462  		}
   463  	}
   464  	ft.limits[v.ID] = initLimit(v)
   465  }
   466  
   467  // signedMin records the fact that we know v is at least
   468  // min in the signed domain.
   469  func (ft *factsTable) signedMin(v *Value, min int64) {
   470  	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   471  }
   472  
   473  // signedMax records the fact that we know v is at most
   474  // max in the signed domain.
   475  func (ft *factsTable) signedMax(v *Value, max int64) {
   476  	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   477  }
   478  func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
   479  	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   480  }
   481  
   482  // setNonNegative records the fact that v is known to be non-negative.
   483  func (ft *factsTable) setNonNegative(v *Value) {
   484  	ft.signedMin(v, 0)
   485  }
   486  
   487  // unsignedMin records the fact that we know v is at least
   488  // min in the unsigned domain.
   489  func (ft *factsTable) unsignedMin(v *Value, min uint64) {
   490  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   491  }
   492  
   493  // unsignedMax records the fact that we know v is at most
   494  // max in the unsigned domain.
   495  func (ft *factsTable) unsignedMax(v *Value, max uint64) {
   496  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   497  }
   498  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
   499  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   500  }
   501  
   502  func (ft *factsTable) booleanFalse(v *Value) {
   503  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   504  }
   505  func (ft *factsTable) booleanTrue(v *Value) {
   506  	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   507  }
   508  func (ft *factsTable) pointerNil(v *Value) {
   509  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   510  }
   511  func (ft *factsTable) pointerNonNil(v *Value) {
   512  	l := noLimit
   513  	l.umin = 1
   514  	ft.newLimit(v, l)
   515  }
   516  
   517  // newLimit adds new limiting information for v.
   518  func (ft *factsTable) newLimit(v *Value, newLim limit) {
   519  	oldLim := ft.limits[v.ID]
   520  
   521  	// Merge old and new information.
   522  	lim := oldLim.intersect(newLim)
   523  
   524  	// signed <-> unsigned propagation
   525  	if lim.min >= 0 {
   526  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   527  	}
   528  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   529  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   530  	}
   531  
   532  	if lim == oldLim {
   533  		return // nothing new to record
   534  	}
   535  
   536  	if lim.unsat() {
   537  		ft.unsat = true
   538  		return
   539  	}
   540  
   541  	// Check for recursion. This normally happens because in unsatisfiable
   542  	// cases we have a < b < a, and every update to a's limits returns
   543  	// here again with the limit increased by 2.
   544  	// Normally this is caught early by the orderS/orderU posets, but in
   545  	// cases where the comparisons jump between signed and unsigned domains,
   546  	// the posets will not notice.
   547  	if ft.recurseCheck[v.ID] {
   548  		// This should only happen for unsatisfiable cases. TODO: check
   549  		return
   550  	}
   551  	ft.recurseCheck[v.ID] = true
   552  	defer func() {
   553  		ft.recurseCheck[v.ID] = false
   554  	}()
   555  
   556  	// Record undo information.
   557  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   558  	// Record new information.
   559  	ft.limits[v.ID] = lim
   560  	if v.Block.Func.pass.debug > 2 {
   561  		// TODO: pos is probably wrong. This is the position where v is defined,
   562  		// not the position where we learned the fact about it (which was
   563  		// probably some subsequent compare+branch).
   564  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   565  	}
   566  
   567  	// Propagate this new constant range to other values
   568  	// that we know are ordered with respect to this one.
   569  	// Note overflow/underflow in the arithmetic below is ok,
   570  	// it will just lead to imprecision (undetected unsatisfiability).
   571  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   572  		switch o.d {
   573  		case signed:
   574  			switch o.r {
   575  			case eq: // v == w
   576  				ft.signedMinMax(o.w, lim.min, lim.max)
   577  			case lt | eq: // v <= w
   578  				ft.signedMin(o.w, lim.min)
   579  			case lt: // v < w
   580  				ft.signedMin(o.w, lim.min+1)
   581  			case gt | eq: // v >= w
   582  				ft.signedMax(o.w, lim.max)
   583  			case gt: // v > w
   584  				ft.signedMax(o.w, lim.max-1)
   585  			case lt | gt: // v != w
   586  				if lim.min == lim.max { // v is a constant
   587  					c := lim.min
   588  					if ft.limits[o.w.ID].min == c {
   589  						ft.signedMin(o.w, c+1)
   590  					}
   591  					if ft.limits[o.w.ID].max == c {
   592  						ft.signedMax(o.w, c-1)
   593  					}
   594  				}
   595  			}
   596  		case unsigned:
   597  			switch o.r {
   598  			case eq: // v == w
   599  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   600  			case lt | eq: // v <= w
   601  				ft.unsignedMin(o.w, lim.umin)
   602  			case lt: // v < w
   603  				ft.unsignedMin(o.w, lim.umin+1)
   604  			case gt | eq: // v >= w
   605  				ft.unsignedMax(o.w, lim.umax)
   606  			case gt: // v > w
   607  				ft.unsignedMax(o.w, lim.umax-1)
   608  			case lt | gt: // v != w
   609  				if lim.umin == lim.umax { // v is a constant
   610  					c := lim.umin
   611  					if ft.limits[o.w.ID].umin == c {
   612  						ft.unsignedMin(o.w, c+1)
   613  					}
   614  					if ft.limits[o.w.ID].umax == c {
   615  						ft.unsignedMax(o.w, c-1)
   616  					}
   617  				}
   618  			}
   619  		case boolean:
   620  			switch o.r {
   621  			case eq:
   622  				if lim.min == 0 && lim.max == 0 { // constant false
   623  					ft.booleanFalse(o.w)
   624  				}
   625  				if lim.min == 1 && lim.max == 1 { // constant true
   626  					ft.booleanTrue(o.w)
   627  				}
   628  			case lt | gt:
   629  				if lim.min == 0 && lim.max == 0 { // constant false
   630  					ft.booleanTrue(o.w)
   631  				}
   632  				if lim.min == 1 && lim.max == 1 { // constant true
   633  					ft.booleanFalse(o.w)
   634  				}
   635  			}
   636  		case pointer:
   637  			switch o.r {
   638  			case eq:
   639  				if lim.umax == 0 { // nil
   640  					ft.pointerNil(o.w)
   641  				}
   642  				if lim.umin > 0 { // non-nil
   643  					ft.pointerNonNil(o.w)
   644  				}
   645  			case lt | gt:
   646  				if lim.umax == 0 { // nil
   647  					ft.pointerNonNil(o.w)
   648  				}
   649  				// note: not equal to non-nil doesn't tell us anything.
   650  			}
   651  		}
   652  	}
   653  
   654  	// If this is new known constant for a boolean value,
   655  	// extract relation between its args. For example, if
   656  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   657  	if v.Type.IsBoolean() {
   658  		// If we reach here, it is because we have a more restrictive
   659  		// value for v than the default. The only two such values
   660  		// are constant true or constant false.
   661  		if lim.min != lim.max {
   662  			v.Block.Func.Fatalf("boolean not constant %v", v)
   663  		}
   664  		isTrue := lim.min == 1
   665  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   666  			d := dr.d
   667  			r := dr.r
   668  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   669  				d |= unsigned
   670  			}
   671  			if !isTrue {
   672  				r ^= lt | gt | eq
   673  			}
   674  			// TODO: v.Block is wrong?
   675  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   676  		}
   677  		switch v.Op {
   678  		case OpIsNonNil:
   679  			if isTrue {
   680  				ft.pointerNonNil(v.Args[0])
   681  			} else {
   682  				ft.pointerNil(v.Args[0])
   683  			}
   684  		case OpIsInBounds, OpIsSliceInBounds:
   685  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   686  			r := lt
   687  			if v.Op == OpIsSliceInBounds {
   688  				r |= eq
   689  			}
   690  			if isTrue {
   691  				// On the positive branch, we learn:
   692  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   693  				//   unsigned:    a0 < a1 (or a0 <= a1)
   694  				ft.setNonNegative(v.Args[0])
   695  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   696  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   697  			} else {
   698  				// On the negative branch, we learn (0 > a0 ||
   699  				// a0 >= a1). In the unsigned domain, this is
   700  				// simply a0 >= a1 (which is the reverse of the
   701  				// positive branch, so nothing surprising).
   702  				// But in the signed domain, we can't express the ||
   703  				// condition, so check if a0 is non-negative instead,
   704  				// to be able to learn something.
   705  				r ^= lt | gt | eq // >= (index) or > (slice)
   706  				if ft.isNonNegative(v.Args[0]) {
   707  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   708  				}
   709  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   710  				// TODO: v.Block is wrong here
   711  			}
   712  		}
   713  	}
   714  }
   715  
   716  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   717  	o := ft.orderingCache
   718  	if o == nil {
   719  		o = &ordering{}
   720  	} else {
   721  		ft.orderingCache = o.next
   722  	}
   723  	o.w = w
   724  	o.d = d
   725  	o.r = r
   726  	o.next = ft.orderings[v.ID]
   727  	ft.orderings[v.ID] = o
   728  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   729  }
   730  
   731  // update updates the set of relations between v and w in domain d
   732  // restricting it to r.
   733  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   734  	if parent.Func.pass.debug > 2 {
   735  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   736  	}
   737  	// No need to do anything else if we already found unsat.
   738  	if ft.unsat {
   739  		return
   740  	}
   741  
   742  	// Self-fact. It's wasteful to register it into the facts
   743  	// table, so just note whether it's satisfiable
   744  	if v == w {
   745  		if r&eq == 0 {
   746  			ft.unsat = true
   747  		}
   748  		return
   749  	}
   750  
   751  	if d == signed || d == unsigned {
   752  		var ok bool
   753  		order := ft.orderS
   754  		if d == unsigned {
   755  			order = ft.orderU
   756  		}
   757  		switch r {
   758  		case lt:
   759  			ok = order.SetOrder(v, w)
   760  		case gt:
   761  			ok = order.SetOrder(w, v)
   762  		case lt | eq:
   763  			ok = order.SetOrderOrEqual(v, w)
   764  		case gt | eq:
   765  			ok = order.SetOrderOrEqual(w, v)
   766  		case eq:
   767  			ok = order.SetEqual(v, w)
   768  		case lt | gt:
   769  			ok = order.SetNonEqual(v, w)
   770  		default:
   771  			panic("unknown relation")
   772  		}
   773  		ft.addOrdering(v, w, d, r)
   774  		ft.addOrdering(w, v, d, reverseBits[r])
   775  
   776  		if !ok {
   777  			if parent.Func.pass.debug > 2 {
   778  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   779  			}
   780  			ft.unsat = true
   781  			return
   782  		}
   783  	}
   784  	if d == boolean || d == pointer {
   785  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   786  			if o.d == d && o.w == w {
   787  				// We already know a relationship between v and w.
   788  				// Either it is a duplicate, or it is a contradiction,
   789  				// as we only allow eq and lt|gt for these domains,
   790  				if o.r != r {
   791  					ft.unsat = true
   792  				}
   793  				return
   794  			}
   795  		}
   796  		// TODO: this does not do transitive equality.
   797  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   798  		ft.addOrdering(v, w, d, r)
   799  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   800  	}
   801  
   802  	// Extract new constant limits based on the comparison.
   803  	vLimit := ft.limits[v.ID]
   804  	wLimit := ft.limits[w.ID]
   805  	// Note: all the +1/-1 below could overflow/underflow. Either will
   806  	// still generate correct results, it will just lead to imprecision.
   807  	// In fact if there is overflow/underflow, the corresponding
   808  	// code is unreachable because the known range is outside the range
   809  	// of the value's type.
   810  	switch d {
   811  	case signed:
   812  		switch r {
   813  		case eq: // v == w
   814  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   815  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   816  		case lt: // v < w
   817  			ft.signedMax(v, wLimit.max-1)
   818  			ft.signedMin(w, vLimit.min+1)
   819  		case lt | eq: // v <= w
   820  			ft.signedMax(v, wLimit.max)
   821  			ft.signedMin(w, vLimit.min)
   822  		case gt: // v > w
   823  			ft.signedMin(v, wLimit.min+1)
   824  			ft.signedMax(w, vLimit.max-1)
   825  		case gt | eq: // v >= w
   826  			ft.signedMin(v, wLimit.min)
   827  			ft.signedMax(w, vLimit.max)
   828  		case lt | gt: // v != w
   829  			if vLimit.min == vLimit.max { // v is a constant
   830  				c := vLimit.min
   831  				if wLimit.min == c {
   832  					ft.signedMin(w, c+1)
   833  				}
   834  				if wLimit.max == c {
   835  					ft.signedMax(w, c-1)
   836  				}
   837  			}
   838  			if wLimit.min == wLimit.max { // w is a constant
   839  				c := wLimit.min
   840  				if vLimit.min == c {
   841  					ft.signedMin(v, c+1)
   842  				}
   843  				if vLimit.max == c {
   844  					ft.signedMax(v, c-1)
   845  				}
   846  			}
   847  		}
   848  	case unsigned:
   849  		switch r {
   850  		case eq: // v == w
   851  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   852  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   853  		case lt: // v < w
   854  			ft.unsignedMax(v, wLimit.umax-1)
   855  			ft.unsignedMin(w, vLimit.umin+1)
   856  		case lt | eq: // v <= w
   857  			ft.unsignedMax(v, wLimit.umax)
   858  			ft.unsignedMin(w, vLimit.umin)
   859  		case gt: // v > w
   860  			ft.unsignedMin(v, wLimit.umin+1)
   861  			ft.unsignedMax(w, vLimit.umax-1)
   862  		case gt | eq: // v >= w
   863  			ft.unsignedMin(v, wLimit.umin)
   864  			ft.unsignedMax(w, vLimit.umax)
   865  		case lt | gt: // v != w
   866  			if vLimit.umin == vLimit.umax { // v is a constant
   867  				c := vLimit.umin
   868  				if wLimit.umin == c {
   869  					ft.unsignedMin(w, c+1)
   870  				}
   871  				if wLimit.umax == c {
   872  					ft.unsignedMax(w, c-1)
   873  				}
   874  			}
   875  			if wLimit.umin == wLimit.umax { // w is a constant
   876  				c := wLimit.umin
   877  				if vLimit.umin == c {
   878  					ft.unsignedMin(v, c+1)
   879  				}
   880  				if vLimit.umax == c {
   881  					ft.unsignedMax(v, c-1)
   882  				}
   883  			}
   884  		}
   885  	case boolean:
   886  		switch r {
   887  		case eq: // v == w
   888  			if vLimit.min == 1 { // v is true
   889  				ft.booleanTrue(w)
   890  			}
   891  			if vLimit.max == 0 { // v is false
   892  				ft.booleanFalse(w)
   893  			}
   894  			if wLimit.min == 1 { // w is true
   895  				ft.booleanTrue(v)
   896  			}
   897  			if wLimit.max == 0 { // w is false
   898  				ft.booleanFalse(v)
   899  			}
   900  		case lt | gt: // v != w
   901  			if vLimit.min == 1 { // v is true
   902  				ft.booleanFalse(w)
   903  			}
   904  			if vLimit.max == 0 { // v is false
   905  				ft.booleanTrue(w)
   906  			}
   907  			if wLimit.min == 1 { // w is true
   908  				ft.booleanFalse(v)
   909  			}
   910  			if wLimit.max == 0 { // w is false
   911  				ft.booleanTrue(v)
   912  			}
   913  		}
   914  	case pointer:
   915  		switch r {
   916  		case eq: // v == w
   917  			if vLimit.umax == 0 { // v is nil
   918  				ft.pointerNil(w)
   919  			}
   920  			if vLimit.umin > 0 { // v is non-nil
   921  				ft.pointerNonNil(w)
   922  			}
   923  			if wLimit.umax == 0 { // w is nil
   924  				ft.pointerNil(v)
   925  			}
   926  			if wLimit.umin > 0 { // w is non-nil
   927  				ft.pointerNonNil(v)
   928  			}
   929  		case lt | gt: // v != w
   930  			if vLimit.umax == 0 { // v is nil
   931  				ft.pointerNonNil(w)
   932  			}
   933  			if wLimit.umax == 0 { // w is nil
   934  				ft.pointerNonNil(v)
   935  			}
   936  			// Note: the other direction doesn't work.
   937  			// Being not equal to a non-nil pointer doesn't
   938  			// make you (necessarily) a nil pointer.
   939  		}
   940  	}
   941  
   942  	// Derived facts below here are only about numbers.
   943  	if d != signed && d != unsigned {
   944  		return
   945  	}
   946  
   947  	// Additional facts we know given the relationship between len and cap.
   948  	//
   949  	// TODO: Since prove now derives transitive relations, it
   950  	// should be sufficient to learn that len(w) <= cap(w) at the
   951  	// beginning of prove where we look for all len/cap ops.
   952  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
   953  		// len(s) > w implies cap(s) > w
   954  		// len(s) >= w implies cap(s) >= w
   955  		// len(s) == w implies cap(s) >= w
   956  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
   957  	}
   958  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
   959  		// same, length on the RHS.
   960  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
   961  	}
   962  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
   963  		// cap(s) < w implies len(s) < w
   964  		// cap(s) <= w implies len(s) <= w
   965  		// cap(s) == w implies len(s) <= w
   966  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
   967  	}
   968  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
   969  		// same, capacity on the RHS.
   970  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
   971  	}
   972  
   973  	// Process fence-post implications.
   974  	//
   975  	// First, make the condition > or >=.
   976  	if r == lt || r == lt|eq {
   977  		v, w = w, v
   978  		r = reverseBits[r]
   979  	}
   980  	switch r {
   981  	case gt:
   982  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
   983  			// x+1 > w  ⇒  x >= w
   984  			//
   985  			// This is useful for eliminating the
   986  			// growslice branch of append.
   987  			ft.update(parent, x, w, d, gt|eq)
   988  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
   989  			// v > x-1  ⇒  v >= x
   990  			ft.update(parent, v, x, d, gt|eq)
   991  		}
   992  	case gt | eq:
   993  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
   994  			// x-1 >= w && x > min  ⇒  x > w
   995  			//
   996  			// Useful for i > 0; s[i-1].
   997  			lim := ft.limits[x.ID]
   998  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
   999  				ft.update(parent, x, w, d, gt)
  1000  			}
  1001  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
  1002  			// v >= x+1 && x < max  ⇒  v > x
  1003  			lim := ft.limits[x.ID]
  1004  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
  1005  				ft.update(parent, v, x, d, gt)
  1006  			}
  1007  		}
  1008  	}
  1009  
  1010  	// Process: x+delta > w (with delta constant)
  1011  	// Only signed domain for now (useful for accesses to slices in loops).
  1012  	if r == gt || r == gt|eq {
  1013  		if x, delta := isConstDelta(v); x != nil && d == signed {
  1014  			if parent.Func.pass.debug > 1 {
  1015  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
  1016  			}
  1017  			underflow := true
  1018  			if delta < 0 {
  1019  				l := ft.limits[x.ID]
  1020  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
  1021  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
  1022  					underflow = false
  1023  				}
  1024  			}
  1025  			if delta < 0 && !underflow {
  1026  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
  1027  				ft.update(parent, x, v, signed, gt)
  1028  			}
  1029  			if !w.isGenericIntConst() {
  1030  				// If we know that x+delta > w but w is not constant, we can derive:
  1031  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1032  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1033  				if delta < 0 && !underflow {
  1034  					ft.update(parent, x, w, signed, r)
  1035  				}
  1036  			} else {
  1037  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1038  				//
  1039  				// We compute (using integers of the correct size):
  1040  				//    min = w - delta
  1041  				//    max = MaxInt - delta
  1042  				//
  1043  				// And we prove that:
  1044  				//    if min<max: min < x AND x <= max
  1045  				//    if min>max: min < x OR  x <= max
  1046  				//
  1047  				// This is always correct, even in case of overflow.
  1048  				//
  1049  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1050  				//    if min<max: min <= x AND x <= max
  1051  				//    if min>max: min <= x OR  x <= max
  1052  				//
  1053  				// Notice the conditions for max are still <=, as they handle overflows.
  1054  				var min, max int64
  1055  				switch x.Type.Size() {
  1056  				case 8:
  1057  					min = w.AuxInt - delta
  1058  					max = int64(^uint64(0)>>1) - delta
  1059  				case 4:
  1060  					min = int64(int32(w.AuxInt) - int32(delta))
  1061  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1062  				case 2:
  1063  					min = int64(int16(w.AuxInt) - int16(delta))
  1064  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1065  				case 1:
  1066  					min = int64(int8(w.AuxInt) - int8(delta))
  1067  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1068  				default:
  1069  					panic("unimplemented")
  1070  				}
  1071  
  1072  				if min < max {
  1073  					// Record that x > min and max >= x
  1074  					if r == gt {
  1075  						min++
  1076  					}
  1077  					ft.signedMinMax(x, min, max)
  1078  				} else {
  1079  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1080  					// so let's see if we can already prove that one of them is false, in which case
  1081  					// the other must be true
  1082  					l := ft.limits[x.ID]
  1083  					if l.max <= min {
  1084  						if r&eq == 0 || l.max < min {
  1085  							// x>min (x>=min) is impossible, so it must be x<=max
  1086  							ft.signedMax(x, max)
  1087  						}
  1088  					} else if l.min > max {
  1089  						// x<=max is impossible, so it must be x>min
  1090  						if r == gt {
  1091  							min++
  1092  						}
  1093  						ft.signedMin(x, min)
  1094  					}
  1095  				}
  1096  			}
  1097  		}
  1098  	}
  1099  
  1100  	// Look through value-preserving extensions.
  1101  	// If the domain is appropriate for the pre-extension Type,
  1102  	// repeat the update with the pre-extension Value.
  1103  	if isCleanExt(v) {
  1104  		switch {
  1105  		case d == signed && v.Args[0].Type.IsSigned():
  1106  			fallthrough
  1107  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1108  			ft.update(parent, v.Args[0], w, d, r)
  1109  		}
  1110  	}
  1111  	if isCleanExt(w) {
  1112  		switch {
  1113  		case d == signed && w.Args[0].Type.IsSigned():
  1114  			fallthrough
  1115  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1116  			ft.update(parent, v, w.Args[0], d, r)
  1117  		}
  1118  	}
  1119  }
  1120  
  1121  var opMin = map[Op]int64{
  1122  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1123  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1124  }
  1125  
  1126  var opMax = map[Op]int64{
  1127  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1128  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1129  }
  1130  
  1131  var opUMax = map[Op]uint64{
  1132  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1133  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1134  }
  1135  
  1136  // isNonNegative reports whether v is known to be non-negative.
  1137  func (ft *factsTable) isNonNegative(v *Value) bool {
  1138  	return ft.limits[v.ID].min >= 0
  1139  }
  1140  
  1141  // checkpoint saves the current state of known relations.
  1142  // Called when descending on a branch.
  1143  func (ft *factsTable) checkpoint() {
  1144  	if ft.unsat {
  1145  		ft.unsatDepth++
  1146  	}
  1147  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1148  	ft.orderS.Checkpoint()
  1149  	ft.orderU.Checkpoint()
  1150  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1151  }
  1152  
  1153  // restore restores known relation to the state just
  1154  // before the previous checkpoint.
  1155  // Called when backing up on a branch.
  1156  func (ft *factsTable) restore() {
  1157  	if ft.unsatDepth > 0 {
  1158  		ft.unsatDepth--
  1159  	} else {
  1160  		ft.unsat = false
  1161  	}
  1162  	for {
  1163  		old := ft.limitStack[len(ft.limitStack)-1]
  1164  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1165  		if old.vid == 0 { // checkpointBound
  1166  			break
  1167  		}
  1168  		ft.limits[old.vid] = old.limit
  1169  	}
  1170  	ft.orderS.Undo()
  1171  	ft.orderU.Undo()
  1172  	for {
  1173  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1174  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1175  		if id == 0 { // checkpoint marker
  1176  			break
  1177  		}
  1178  		o := ft.orderings[id]
  1179  		ft.orderings[id] = o.next
  1180  		o.next = ft.orderingCache
  1181  		ft.orderingCache = o
  1182  	}
  1183  }
  1184  
  1185  var (
  1186  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1187  
  1188  	// maps what we learn when the positive branch is taken.
  1189  	// For example:
  1190  	//      OpLess8:   {signed, lt},
  1191  	//	v1 = (OpLess8 v2 v3).
  1192  	// If we learn that v1 is true, then we can deduce that v2<v3
  1193  	// in the signed domain.
  1194  	domainRelationTable = map[Op]struct {
  1195  		d domain
  1196  		r relation
  1197  	}{
  1198  		OpEq8:   {signed | unsigned, eq},
  1199  		OpEq16:  {signed | unsigned, eq},
  1200  		OpEq32:  {signed | unsigned, eq},
  1201  		OpEq64:  {signed | unsigned, eq},
  1202  		OpEqPtr: {pointer, eq},
  1203  		OpEqB:   {boolean, eq},
  1204  
  1205  		OpNeq8:   {signed | unsigned, lt | gt},
  1206  		OpNeq16:  {signed | unsigned, lt | gt},
  1207  		OpNeq32:  {signed | unsigned, lt | gt},
  1208  		OpNeq64:  {signed | unsigned, lt | gt},
  1209  		OpNeqPtr: {pointer, lt | gt},
  1210  		OpNeqB:   {boolean, lt | gt},
  1211  
  1212  		OpLess8:   {signed, lt},
  1213  		OpLess8U:  {unsigned, lt},
  1214  		OpLess16:  {signed, lt},
  1215  		OpLess16U: {unsigned, lt},
  1216  		OpLess32:  {signed, lt},
  1217  		OpLess32U: {unsigned, lt},
  1218  		OpLess64:  {signed, lt},
  1219  		OpLess64U: {unsigned, lt},
  1220  
  1221  		OpLeq8:   {signed, lt | eq},
  1222  		OpLeq8U:  {unsigned, lt | eq},
  1223  		OpLeq16:  {signed, lt | eq},
  1224  		OpLeq16U: {unsigned, lt | eq},
  1225  		OpLeq32:  {signed, lt | eq},
  1226  		OpLeq32U: {unsigned, lt | eq},
  1227  		OpLeq64:  {signed, lt | eq},
  1228  		OpLeq64U: {unsigned, lt | eq},
  1229  	}
  1230  )
  1231  
  1232  // cleanup returns the posets to the free list
  1233  func (ft *factsTable) cleanup(f *Func) {
  1234  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1235  		// Make sure it's empty as it should be. A non-empty poset
  1236  		// might cause errors and miscompilations if reused.
  1237  		if checkEnabled {
  1238  			if err := po.CheckEmpty(); err != nil {
  1239  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1240  			}
  1241  		}
  1242  		f.retPoset(po)
  1243  	}
  1244  	f.Cache.freeLimitSlice(ft.limits)
  1245  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1246  	if cap(ft.reusedTopoSortScoresTable) > 0 {
  1247  		f.Cache.freeUintSlice(ft.reusedTopoSortScoresTable)
  1248  	}
  1249  }
  1250  
  1251  // addSlicesOfSameLen finds the slices that are in the same block and whose Op
  1252  // is OpPhi and always have the same length, then add the equality relationship
  1253  // between them to ft. If two slices start out with the same length and decrease
  1254  // in length by the same amount on each round of the loop (or in the if block),
  1255  // then we think their lengths are always equal.
  1256  //
  1257  // See https://go.dev/issues/75144
  1258  //
  1259  // In fact, we are just propagating the equality
  1260  //
  1261  //	if len(a) == len(b) { // from here
  1262  //		for len(a) > 4 {
  1263  //			a = a[4:]
  1264  //			b = b[4:]
  1265  //		}
  1266  //		if len(a) == len(b) { // to here
  1267  //			return true
  1268  //		}
  1269  //	}
  1270  //
  1271  // or change the for to if:
  1272  //
  1273  //	if len(a) == len(b) { // from here
  1274  //		if len(a) > 4 {
  1275  //			a = a[4:]
  1276  //			b = b[4:]
  1277  //		}
  1278  //		if len(a) == len(b) { // to here
  1279  //			return true
  1280  //		}
  1281  //	}
  1282  func addSlicesOfSameLen(ft *factsTable, b *Block) {
  1283  	// Let w points to the first value we're interested in, and then we
  1284  	// only process those values ​​that appear to be the same length as w,
  1285  	// looping only once. This should be enough in most cases. And u is
  1286  	// similar to w, see comment for predIndex.
  1287  	var u, w *Value
  1288  	var i, j, k sliceInfo
  1289  	isInterested := func(v *Value) bool {
  1290  		j = getSliceInfo(v)
  1291  		return j.sliceWhere != sliceUnknown
  1292  	}
  1293  	for _, v := range b.Values {
  1294  		if v.Uses == 0 {
  1295  			continue
  1296  		}
  1297  		if v.Op == OpPhi && len(v.Args) == 2 && ft.lens[v.ID] != nil && isInterested(v) {
  1298  			if j.predIndex == 1 && ft.lens[v.Args[0].ID] != nil {
  1299  				// found v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _))) or
  1300  				// v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)))
  1301  				if w == nil {
  1302  					k = j
  1303  					w = v
  1304  					continue
  1305  				}
  1306  				// propagate the equality
  1307  				if j == k && ft.orderS.Equal(ft.lens[v.Args[0].ID], ft.lens[w.Args[0].ID]) {
  1308  					ft.update(b, ft.lens[v.ID], ft.lens[w.ID], signed, eq)
  1309  				}
  1310  			} else if j.predIndex == 0 && ft.lens[v.Args[1].ID] != nil {
  1311  				// found v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _)) x) or
  1312  				// v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)) x)
  1313  				if u == nil {
  1314  					i = j
  1315  					u = v
  1316  					continue
  1317  				}
  1318  				// propagate the equality
  1319  				if j == i && ft.orderS.Equal(ft.lens[v.Args[1].ID], ft.lens[u.Args[1].ID]) {
  1320  					ft.update(b, ft.lens[v.ID], ft.lens[u.ID], signed, eq)
  1321  				}
  1322  			}
  1323  		}
  1324  	}
  1325  }
  1326  
  1327  type sliceWhere int
  1328  
  1329  const (
  1330  	sliceUnknown sliceWhere = iota
  1331  	sliceInFor
  1332  	sliceInIf
  1333  )
  1334  
  1335  // predIndex is used to indicate the branch represented by the predecessor
  1336  // block in which the slicing operation occurs.
  1337  type predIndex int
  1338  
  1339  type sliceInfo struct {
  1340  	lengthDiff int64
  1341  	sliceWhere
  1342  	predIndex
  1343  }
  1344  
  1345  // getSliceInfo returns the negative increment of the slice length in a slice
  1346  // operation by examine the Phi node at the merge block. So, we only interest
  1347  // in the slice operation if it is inside a for block or an if block.
  1348  // Otherwise it returns sliceInfo{0, sliceUnknown, 0}.
  1349  //
  1350  // For the following for block:
  1351  //
  1352  //	for len(a) > 4 {
  1353  //	    a = a[4:]
  1354  //	}
  1355  //
  1356  // vp = (Phi v3 v9)
  1357  // v5 = (SliceLen vp)
  1358  // v7 = (Add64 (Const64 [-4]) v5)
  1359  // v9 = (SliceMake _ v7 _)
  1360  //
  1361  // returns sliceInfo{-4, sliceInFor, 1}
  1362  //
  1363  // For a subsequent merge block after an if block:
  1364  //
  1365  //	if len(a) > 4 {
  1366  //	    a = a[4:]
  1367  //	}
  1368  //	a // here
  1369  //
  1370  // vp = (Phi v3 v9)
  1371  // v5 = (SliceLen v3)
  1372  // v7 = (Add64 (Const64 [-4]) v5)
  1373  // v9 = (SliceMake _ v7 _)
  1374  //
  1375  // returns sliceInfo{-4, sliceInIf, 1}
  1376  //
  1377  // Returns sliceInfo{0, sliceUnknown, 0} if it is not the slice
  1378  // operation we are interested in.
  1379  func getSliceInfo(vp *Value) (inf sliceInfo) {
  1380  	if vp.Op != OpPhi || len(vp.Args) != 2 {
  1381  		return
  1382  	}
  1383  	var i predIndex
  1384  	var l *Value // length for OpSliceMake
  1385  	if vp.Args[0].Op != OpSliceMake && vp.Args[1].Op == OpSliceMake {
  1386  		l = vp.Args[1].Args[1]
  1387  		i = 1
  1388  	} else if vp.Args[0].Op == OpSliceMake && vp.Args[1].Op != OpSliceMake {
  1389  		l = vp.Args[0].Args[1]
  1390  		i = 0
  1391  	} else {
  1392  		return
  1393  	}
  1394  	var op Op
  1395  	switch l.Op {
  1396  	case OpAdd64:
  1397  		op = OpConst64
  1398  	case OpAdd32:
  1399  		op = OpConst32
  1400  	default:
  1401  		return
  1402  	}
  1403  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp {
  1404  		return sliceInfo{l.Args[0].AuxInt, sliceInFor, i}
  1405  	}
  1406  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp {
  1407  		return sliceInfo{l.Args[1].AuxInt, sliceInFor, i}
  1408  	}
  1409  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp.Args[1-i] {
  1410  		return sliceInfo{l.Args[0].AuxInt, sliceInIf, i}
  1411  	}
  1412  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp.Args[1-i] {
  1413  		return sliceInfo{l.Args[1].AuxInt, sliceInIf, i}
  1414  	}
  1415  	return
  1416  }
  1417  
  1418  // prove removes redundant BlockIf branches that can be inferred
  1419  // from previous dominating comparisons.
  1420  //
  1421  // By far, the most common redundant pair are generated by bounds checking.
  1422  // For example for the code:
  1423  //
  1424  //	a[i] = 4
  1425  //	foo(a[i])
  1426  //
  1427  // The compiler will generate the following code:
  1428  //
  1429  //	if i >= len(a) {
  1430  //	    panic("not in bounds")
  1431  //	}
  1432  //	a[i] = 4
  1433  //	if i >= len(a) {
  1434  //	    panic("not in bounds")
  1435  //	}
  1436  //	foo(a[i])
  1437  //
  1438  // The second comparison i >= len(a) is clearly redundant because if the
  1439  // else branch of the first comparison is executed, we already know that i < len(a).
  1440  // The code for the second panic can be removed.
  1441  //
  1442  // prove works by finding contradictions and trimming branches whose
  1443  // conditions are unsatisfiable given the branches leading up to them.
  1444  // It tracks a "fact table" of branch conditions. For each branching
  1445  // block, it asserts the branch conditions that uniquely dominate that
  1446  // block, and then separately asserts the block's branch condition and
  1447  // its negation. If either leads to a contradiction, it can trim that
  1448  // successor.
  1449  func prove(f *Func) {
  1450  	// Find induction variables. Currently, findIndVars
  1451  	// is limited to one induction variable per block.
  1452  	var indVars map[*Block]indVar
  1453  	for _, v := range findIndVar(f) {
  1454  		ind := v.ind
  1455  		if len(ind.Args) != 2 {
  1456  			// the rewrite code assumes there is only ever two parents to loops
  1457  			panic("unexpected induction with too many parents")
  1458  		}
  1459  
  1460  		nxt := v.nxt
  1461  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1462  			nxt.Uses == 1) { // 1 used by induction
  1463  			// ind or nxt is used inside the loop, add it for the facts table
  1464  			if indVars == nil {
  1465  				indVars = make(map[*Block]indVar)
  1466  			}
  1467  			indVars[v.entry] = v
  1468  			continue
  1469  		} else {
  1470  			// Since this induction variable is not used for anything but counting the iterations,
  1471  			// no point in putting it into the facts table.
  1472  		}
  1473  
  1474  		// try to rewrite to a downward counting loop checking against start if the
  1475  		// loop body does not depend on ind or nxt and end is known before the loop.
  1476  		// This reduces pressure on the register allocator because this does not need
  1477  		// to use end on each iteration anymore. We compare against the start constant instead.
  1478  		// That means this code:
  1479  		//
  1480  		//	loop:
  1481  		//		ind = (Phi (Const [x]) nxt),
  1482  		//		if ind < end
  1483  		//		then goto enter_loop
  1484  		//		else goto exit_loop
  1485  		//
  1486  		//	enter_loop:
  1487  		//		do something without using ind nor nxt
  1488  		//		nxt = inc + ind
  1489  		//		goto loop
  1490  		//
  1491  		//	exit_loop:
  1492  		//
  1493  		// is rewritten to:
  1494  		//
  1495  		//	loop:
  1496  		//		ind = (Phi end nxt)
  1497  		//		if (Const [x]) < ind
  1498  		//		then goto enter_loop
  1499  		//		else goto exit_loop
  1500  		//
  1501  		//	enter_loop:
  1502  		//		do something without using ind nor nxt
  1503  		//		nxt = ind - inc
  1504  		//		goto loop
  1505  		//
  1506  		//	exit_loop:
  1507  		//
  1508  		// this is better because it only requires to keep ind then nxt alive while looping,
  1509  		// while the original form keeps ind then nxt and end alive
  1510  		start, end := v.min, v.max
  1511  		if v.flags&indVarCountDown != 0 {
  1512  			start, end = end, start
  1513  		}
  1514  
  1515  		if !start.isGenericIntConst() {
  1516  			// if start is not a constant we would be winning nothing from inverting the loop
  1517  			continue
  1518  		}
  1519  		if end.isGenericIntConst() {
  1520  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1521  			// is against zero and nxt is ++ or -- operation
  1522  			// That means:
  1523  			//	for i := 2; i < 11; i += 2 {
  1524  			// should be rewritten to:
  1525  			//	for i := 5; 0 < i; i-- {
  1526  			continue
  1527  		}
  1528  
  1529  		if end.Block == ind.Block {
  1530  			// we can't rewrite loops where the condition depends on the loop body
  1531  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1532  			continue
  1533  		}
  1534  
  1535  		check := ind.Block.Controls[0]
  1536  		// invert the check
  1537  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1538  
  1539  		// swap start and end in the loop
  1540  		for i, v := range check.Args {
  1541  			if v != end {
  1542  				continue
  1543  			}
  1544  
  1545  			check.SetArg(i, start)
  1546  			goto replacedEnd
  1547  		}
  1548  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1549  	replacedEnd:
  1550  
  1551  		for i, v := range ind.Args {
  1552  			if v != start {
  1553  				continue
  1554  			}
  1555  
  1556  			ind.SetArg(i, end)
  1557  			goto replacedStart
  1558  		}
  1559  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1560  	replacedStart:
  1561  
  1562  		if nxt.Args[0] != ind {
  1563  			// unlike additions subtractions are not commutative so be sure we get it right
  1564  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1565  		}
  1566  
  1567  		switch nxt.Op {
  1568  		case OpAdd8:
  1569  			nxt.Op = OpSub8
  1570  		case OpAdd16:
  1571  			nxt.Op = OpSub16
  1572  		case OpAdd32:
  1573  			nxt.Op = OpSub32
  1574  		case OpAdd64:
  1575  			nxt.Op = OpSub64
  1576  		case OpSub8:
  1577  			nxt.Op = OpAdd8
  1578  		case OpSub16:
  1579  			nxt.Op = OpAdd16
  1580  		case OpSub32:
  1581  			nxt.Op = OpAdd32
  1582  		case OpSub64:
  1583  			nxt.Op = OpAdd64
  1584  		default:
  1585  			panic("unreachable")
  1586  		}
  1587  
  1588  		if f.pass.debug > 0 {
  1589  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1590  		}
  1591  	}
  1592  
  1593  	ft := newFactsTable(f)
  1594  	ft.checkpoint()
  1595  
  1596  	// Find length and capacity ops.
  1597  	for _, b := range f.Blocks {
  1598  		for _, v := range b.Values {
  1599  			if v.Uses == 0 {
  1600  				// We don't care about dead values.
  1601  				// (There can be some that are CSEd but not removed yet.)
  1602  				continue
  1603  			}
  1604  			switch v.Op {
  1605  			case OpSliceLen:
  1606  				if ft.lens == nil {
  1607  					ft.lens = map[ID]*Value{}
  1608  				}
  1609  				// Set all len Values for the same slice as equal in the poset.
  1610  				// The poset handles transitive relations, so Values related to
  1611  				// any OpSliceLen for this slice will be correctly related to others.
  1612  				if l, ok := ft.lens[v.Args[0].ID]; ok {
  1613  					ft.update(b, v, l, signed, eq)
  1614  				} else {
  1615  					ft.lens[v.Args[0].ID] = v
  1616  				}
  1617  			case OpSliceCap:
  1618  				if ft.caps == nil {
  1619  					ft.caps = map[ID]*Value{}
  1620  				}
  1621  				// Same as case OpSliceLen above, but for slice cap.
  1622  				if c, ok := ft.caps[v.Args[0].ID]; ok {
  1623  					ft.update(b, v, c, signed, eq)
  1624  				} else {
  1625  					ft.caps[v.Args[0].ID] = v
  1626  				}
  1627  			}
  1628  		}
  1629  	}
  1630  
  1631  	// current node state
  1632  	type walkState int
  1633  	const (
  1634  		descend walkState = iota
  1635  		simplify
  1636  	)
  1637  	// work maintains the DFS stack.
  1638  	type bp struct {
  1639  		block *Block    // current handled block
  1640  		state walkState // what's to do
  1641  	}
  1642  	work := make([]bp, 0, 256)
  1643  	work = append(work, bp{
  1644  		block: f.Entry,
  1645  		state: descend,
  1646  	})
  1647  
  1648  	idom := f.Idom()
  1649  	sdom := f.Sdom()
  1650  
  1651  	// DFS on the dominator tree.
  1652  	//
  1653  	// For efficiency, we consider only the dominator tree rather
  1654  	// than the entire flow graph. On the way down, we consider
  1655  	// incoming branches and accumulate conditions that uniquely
  1656  	// dominate the current block. If we discover a contradiction,
  1657  	// we can eliminate the entire block and all of its children.
  1658  	// On the way back up, we consider outgoing branches that
  1659  	// haven't already been considered. This way we consider each
  1660  	// branch condition only once.
  1661  	for len(work) > 0 {
  1662  		node := work[len(work)-1]
  1663  		work = work[:len(work)-1]
  1664  		parent := idom[node.block.ID]
  1665  		branch := getBranch(sdom, parent, node.block)
  1666  
  1667  		switch node.state {
  1668  		case descend:
  1669  			ft.checkpoint()
  1670  
  1671  			// Entering the block, add facts about the induction variable
  1672  			// that is bound to this block.
  1673  			if iv, ok := indVars[node.block]; ok {
  1674  				addIndVarRestrictions(ft, parent, iv)
  1675  			}
  1676  
  1677  			// Add results of reaching this block via a branch from
  1678  			// its immediate dominator (if any).
  1679  			if branch != unknown {
  1680  				addBranchRestrictions(ft, parent, branch)
  1681  			}
  1682  
  1683  			// Add slices of the same length start from current block.
  1684  			addSlicesOfSameLen(ft, node.block)
  1685  
  1686  			if ft.unsat {
  1687  				// node.block is unreachable.
  1688  				// Remove it and don't visit
  1689  				// its children.
  1690  				removeBranch(parent, branch)
  1691  				ft.restore()
  1692  				break
  1693  			}
  1694  			// Otherwise, we can now commit to
  1695  			// taking this branch. We'll restore
  1696  			// ft when we unwind.
  1697  
  1698  			// Add facts about the values in the current block.
  1699  			addLocalFacts(ft, node.block)
  1700  
  1701  			work = append(work, bp{
  1702  				block: node.block,
  1703  				state: simplify,
  1704  			})
  1705  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1706  				work = append(work, bp{
  1707  					block: s,
  1708  					state: descend,
  1709  				})
  1710  			}
  1711  
  1712  		case simplify:
  1713  			simplifyBlock(sdom, ft, node.block)
  1714  			ft.restore()
  1715  		}
  1716  	}
  1717  
  1718  	ft.restore()
  1719  
  1720  	ft.cleanup(f)
  1721  }
  1722  
  1723  // initLimit sets initial constant limit for v.  This limit is based
  1724  // only on the operation itself, not any of its input arguments. This
  1725  // method is only used in two places, once when the prove pass startup
  1726  // and the other when a new ssa value is created, both for init. (unlike
  1727  // flowLimit, below, which computes additional constraints based on
  1728  // ranges of opcode arguments).
  1729  func initLimit(v *Value) limit {
  1730  	if v.Type.IsBoolean() {
  1731  		switch v.Op {
  1732  		case OpConstBool:
  1733  			b := v.AuxInt
  1734  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1735  		default:
  1736  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1737  		}
  1738  	}
  1739  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1740  		switch v.Op {
  1741  		case OpConstNil:
  1742  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1743  		case OpAddr, OpLocalAddr: // TODO: others?
  1744  			l := noLimit
  1745  			l.umin = 1
  1746  			return l
  1747  		default:
  1748  			return noLimit
  1749  		}
  1750  	}
  1751  	if !v.Type.IsInteger() {
  1752  		return noLimit
  1753  	}
  1754  
  1755  	// Default limits based on type.
  1756  	bitsize := v.Type.Size() * 8
  1757  	lim := limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
  1758  
  1759  	// Tighter limits on some opcodes.
  1760  	switch v.Op {
  1761  	// constants
  1762  	case OpConst64:
  1763  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1764  	case OpConst32:
  1765  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1766  	case OpConst16:
  1767  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1768  	case OpConst8:
  1769  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1770  
  1771  	// extensions
  1772  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1773  		lim = lim.signedMinMax(0, 1<<8-1)
  1774  		lim = lim.unsignedMax(1<<8 - 1)
  1775  	case OpZeroExt16to64, OpZeroExt16to32:
  1776  		lim = lim.signedMinMax(0, 1<<16-1)
  1777  		lim = lim.unsignedMax(1<<16 - 1)
  1778  	case OpZeroExt32to64:
  1779  		lim = lim.signedMinMax(0, 1<<32-1)
  1780  		lim = lim.unsignedMax(1<<32 - 1)
  1781  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1782  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1783  	case OpSignExt16to64, OpSignExt16to32:
  1784  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1785  	case OpSignExt32to64:
  1786  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1787  
  1788  	// math/bits intrinsics
  1789  	case OpCtz64, OpBitLen64, OpPopCount64,
  1790  		OpCtz32, OpBitLen32, OpPopCount32,
  1791  		OpCtz16, OpBitLen16, OpPopCount16,
  1792  		OpCtz8, OpBitLen8, OpPopCount8:
  1793  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1794  
  1795  	// bool to uint8 conversion
  1796  	case OpCvtBoolToUint8:
  1797  		lim = lim.unsignedMax(1)
  1798  
  1799  	// length operations
  1800  	case OpSliceLen, OpSliceCap:
  1801  		f := v.Block.Func
  1802  		elemSize := uint64(v.Args[0].Type.Elem().Size())
  1803  		if elemSize > 0 {
  1804  			heapSize := uint64(1)<<(uint64(f.Config.PtrSize)*8) - 1
  1805  			maximumElementsFittingInHeap := heapSize / elemSize
  1806  			lim = lim.unsignedMax(maximumElementsFittingInHeap)
  1807  		}
  1808  		fallthrough
  1809  	case OpStringLen:
  1810  		lim = lim.signedMin(0)
  1811  	}
  1812  
  1813  	// signed <-> unsigned propagation
  1814  	if lim.min >= 0 {
  1815  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1816  	}
  1817  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1818  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1819  	}
  1820  
  1821  	return lim
  1822  }
  1823  
  1824  // flowLimit updates the known limits of v in ft.
  1825  // flowLimit can use the ranges of input arguments.
  1826  //
  1827  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1828  // it later. So for example:
  1829  //
  1830  //	v := x + y
  1831  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1832  //
  1833  // we don't discover that the range of v is bounded in the conditioned
  1834  // block. We could recompute the range of v once we enter the block so
  1835  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1836  // that right now.
  1837  func (ft *factsTable) flowLimit(v *Value) {
  1838  	if !v.Type.IsInteger() {
  1839  		// TODO: boolean?
  1840  		return
  1841  	}
  1842  
  1843  	// Additional limits based on opcode and argument.
  1844  	// No need to repeat things here already done in initLimit.
  1845  	switch v.Op {
  1846  
  1847  	// extensions
  1848  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1849  		a := ft.limits[v.Args[0].ID]
  1850  		ft.unsignedMinMax(v, a.umin, a.umax)
  1851  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1852  		a := ft.limits[v.Args[0].ID]
  1853  		ft.signedMinMax(v, a.min, a.max)
  1854  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1855  		a := ft.limits[v.Args[0].ID]
  1856  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1857  			ft.unsignedMinMax(v, a.umin, a.umax)
  1858  		}
  1859  
  1860  	// math/bits
  1861  	case OpCtz64:
  1862  		a := ft.limits[v.Args[0].ID]
  1863  		if a.nonzero() {
  1864  			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1865  		}
  1866  	case OpCtz32:
  1867  		a := ft.limits[v.Args[0].ID]
  1868  		if a.nonzero() {
  1869  			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1870  		}
  1871  	case OpCtz16:
  1872  		a := ft.limits[v.Args[0].ID]
  1873  		if a.nonzero() {
  1874  			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1875  		}
  1876  	case OpCtz8:
  1877  		a := ft.limits[v.Args[0].ID]
  1878  		if a.nonzero() {
  1879  			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1880  		}
  1881  
  1882  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1883  		a := ft.limits[v.Args[0].ID]
  1884  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1885  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1886  		fixedBits := a.umax & sharedLeadingMask
  1887  		min := uint64(bits.OnesCount64(fixedBits))
  1888  		ft.unsignedMinMax(v, min, min+changingBitsCount)
  1889  
  1890  	case OpBitLen64:
  1891  		a := ft.limits[v.Args[0].ID]
  1892  		ft.unsignedMinMax(v,
  1893  			uint64(bits.Len64(a.umin)),
  1894  			uint64(bits.Len64(a.umax)))
  1895  	case OpBitLen32:
  1896  		a := ft.limits[v.Args[0].ID]
  1897  		ft.unsignedMinMax(v,
  1898  			uint64(bits.Len32(uint32(a.umin))),
  1899  			uint64(bits.Len32(uint32(a.umax))))
  1900  	case OpBitLen16:
  1901  		a := ft.limits[v.Args[0].ID]
  1902  		ft.unsignedMinMax(v,
  1903  			uint64(bits.Len16(uint16(a.umin))),
  1904  			uint64(bits.Len16(uint16(a.umax))))
  1905  	case OpBitLen8:
  1906  		a := ft.limits[v.Args[0].ID]
  1907  		ft.unsignedMinMax(v,
  1908  			uint64(bits.Len8(uint8(a.umin))),
  1909  			uint64(bits.Len8(uint8(a.umax))))
  1910  
  1911  	// Masks.
  1912  
  1913  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1914  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1915  	// But I doubt this help any real world code.
  1916  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1917  		// AND can only make the value smaller.
  1918  		a := ft.limits[v.Args[0].ID]
  1919  		b := ft.limits[v.Args[1].ID]
  1920  		ft.unsignedMax(v, min(a.umax, b.umax))
  1921  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1922  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1923  		a := ft.limits[v.Args[0].ID]
  1924  		b := ft.limits[v.Args[1].ID]
  1925  		ft.unsignedMinMax(v,
  1926  			max(a.umin, b.umin),
  1927  			1<<bits.Len64(a.umax|b.umax)-1)
  1928  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1929  		// XOR can't flip bits that are proved to be zero in both inputs.
  1930  		a := ft.limits[v.Args[0].ID]
  1931  		b := ft.limits[v.Args[1].ID]
  1932  		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1933  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1934  		a := ft.limits[v.Args[0].ID]
  1935  		ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1936  
  1937  	// Arithmetic.
  1938  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1939  		a := ft.limits[v.Args[0].ID]
  1940  		b := ft.limits[v.Args[1].ID]
  1941  		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1942  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1943  		a := ft.limits[v.Args[0].ID]
  1944  		b := ft.limits[v.Args[1].ID]
  1945  		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1946  		ft.detectMod(v)
  1947  		ft.detectSliceLenRelation(v)
  1948  		ft.detectSubRelations(v)
  1949  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1950  		a := ft.limits[v.Args[0].ID]
  1951  		bitsize := uint(v.Type.Size()) * 8
  1952  		ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
  1953  	case OpMul64, OpMul32, OpMul16, OpMul8:
  1954  		a := ft.limits[v.Args[0].ID]
  1955  		b := ft.limits[v.Args[1].ID]
  1956  		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  1957  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  1958  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  1959  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  1960  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  1961  		a := ft.limits[v.Args[0].ID]
  1962  		b := ft.limits[v.Args[1].ID]
  1963  		bitsize := uint(v.Type.Size()) * 8
  1964  		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  1965  	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
  1966  		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  1967  		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  1968  		OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8:
  1969  		a := ft.limits[v.Args[0].ID]
  1970  		b := ft.limits[v.Args[1].ID]
  1971  		if b.min >= 0 {
  1972  			// Shift of negative makes a value closer to 0 (greater),
  1973  			// so if a.min is negative, v.min is a.min>>b.min instead of a.min>>b.max,
  1974  			// and similarly if a.max is negative, v.max is a.max>>b.max.
  1975  			// Easier to compute min and max of both than to write sign logic.
  1976  			vmin := min(a.min>>b.min, a.min>>b.max)
  1977  			vmax := max(a.max>>b.min, a.max>>b.max)
  1978  			ft.signedMinMax(v, vmin, vmax)
  1979  		}
  1980  	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  1981  		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  1982  		OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  1983  		OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8:
  1984  		a := ft.limits[v.Args[0].ID]
  1985  		b := ft.limits[v.Args[1].ID]
  1986  		if b.min >= 0 {
  1987  			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
  1988  		}
  1989  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  1990  		a := ft.limits[v.Args[0].ID]
  1991  		b := ft.limits[v.Args[1].ID]
  1992  		if !(a.nonnegative() && b.nonnegative()) {
  1993  			// TODO: we could handle signed limits but I didn't bother.
  1994  			break
  1995  		}
  1996  		fallthrough
  1997  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  1998  		a := ft.limits[v.Args[0].ID]
  1999  		b := ft.limits[v.Args[1].ID]
  2000  		lim := noLimit
  2001  		if b.umax > 0 {
  2002  			lim = lim.unsignedMin(a.umin / b.umax)
  2003  		}
  2004  		if b.umin > 0 {
  2005  			lim = lim.unsignedMax(a.umax / b.umin)
  2006  		}
  2007  		ft.newLimit(v, lim)
  2008  	case OpMod64, OpMod32, OpMod16, OpMod8:
  2009  		ft.modLimit(true, v, v.Args[0], v.Args[1])
  2010  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2011  		ft.modLimit(false, v, v.Args[0], v.Args[1])
  2012  
  2013  	case OpPhi:
  2014  		// Compute the union of all the input phis.
  2015  		// Often this will convey no information, because the block
  2016  		// is not dominated by its predecessors and hence the
  2017  		// phi arguments might not have been processed yet. But if
  2018  		// the values are declared earlier, it may help. e.g., for
  2019  		//    v = phi(c3, c5)
  2020  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  2021  		// defined in the entry block, we can derive [3,5]
  2022  		// as the limit for v.
  2023  		l := ft.limits[v.Args[0].ID]
  2024  		for _, a := range v.Args[1:] {
  2025  			l2 := ft.limits[a.ID]
  2026  			l.min = min(l.min, l2.min)
  2027  			l.max = max(l.max, l2.max)
  2028  			l.umin = min(l.umin, l2.umin)
  2029  			l.umax = max(l.umax, l2.umax)
  2030  		}
  2031  		ft.newLimit(v, l)
  2032  	}
  2033  }
  2034  
  2035  // detectSliceLenRelation matches the pattern where
  2036  //  1. v := slicelen - index, OR v := slicecap - index
  2037  //     AND
  2038  //  2. index <= slicelen - K
  2039  //     THEN
  2040  //
  2041  // slicecap - index >= slicelen - index >= K
  2042  //
  2043  // Note that "index" is not useed for indexing in this pattern, but
  2044  // in the motivating example (chunked slice iteration) it is.
  2045  func (ft *factsTable) detectSliceLenRelation(v *Value) {
  2046  	if v.Op != OpSub64 {
  2047  		return
  2048  	}
  2049  
  2050  	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpSliceCap) {
  2051  		return
  2052  	}
  2053  
  2054  	index := v.Args[1]
  2055  	if !ft.isNonNegative(index) {
  2056  		return
  2057  	}
  2058  	slice := v.Args[0].Args[0]
  2059  
  2060  	for o := ft.orderings[index.ID]; o != nil; o = o.next {
  2061  		if o.d != signed {
  2062  			continue
  2063  		}
  2064  		or := o.r
  2065  		if or != lt && or != lt|eq {
  2066  			continue
  2067  		}
  2068  		ow := o.w
  2069  		if ow.Op != OpAdd64 && ow.Op != OpSub64 {
  2070  			continue
  2071  		}
  2072  		var lenOffset *Value
  2073  		if bound := ow.Args[0]; bound.Op == OpSliceLen && bound.Args[0] == slice {
  2074  			lenOffset = ow.Args[1]
  2075  		} else if bound := ow.Args[1]; bound.Op == OpSliceLen && bound.Args[0] == slice {
  2076  			lenOffset = ow.Args[0]
  2077  		}
  2078  		if lenOffset == nil || lenOffset.Op != OpConst64 {
  2079  			continue
  2080  		}
  2081  		K := lenOffset.AuxInt
  2082  		if ow.Op == OpAdd64 {
  2083  			K = -K
  2084  		}
  2085  		if K < 0 {
  2086  			continue
  2087  		}
  2088  		if or == lt {
  2089  			K++
  2090  		}
  2091  		if K < 0 { // We hate thinking about overflow
  2092  			continue
  2093  		}
  2094  		ft.signedMin(v, K)
  2095  	}
  2096  }
  2097  
  2098  // v must be Sub{64,32,16,8}.
  2099  func (ft *factsTable) detectSubRelations(v *Value) {
  2100  	// v = x-y
  2101  	x := v.Args[0]
  2102  	y := v.Args[1]
  2103  	if x == y {
  2104  		ft.signedMinMax(v, 0, 0)
  2105  		return
  2106  	}
  2107  	xLim := ft.limits[x.ID]
  2108  	yLim := ft.limits[y.ID]
  2109  
  2110  	// Check if we might wrap around. If so, give up.
  2111  	width := uint(v.Type.Size()) * 8
  2112  	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
  2113  		return // x-y might underflow
  2114  	}
  2115  	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
  2116  		return // x-y might overflow
  2117  	}
  2118  
  2119  	// Subtracting a positive number only makes
  2120  	// things smaller.
  2121  	if yLim.min >= 0 {
  2122  		ft.update(v.Block, v, x, signed, lt|eq)
  2123  		// TODO: is this worth it?
  2124  		//if yLim.min > 0 {
  2125  		//	ft.update(v.Block, v, x, signed, lt)
  2126  		//}
  2127  	}
  2128  
  2129  	// Subtracting a number from a bigger one
  2130  	// can't go below 0.
  2131  	if ft.orderS.OrderedOrEqual(y, x) {
  2132  		ft.setNonNegative(v)
  2133  		// TODO: is this worth it?
  2134  		//if ft.orderS.Ordered(y, x) {
  2135  		//	ft.signedMin(v, 1)
  2136  		//}
  2137  	}
  2138  }
  2139  
  2140  // x%d has been rewritten to x - (x/d)*d.
  2141  func (ft *factsTable) detectMod(v *Value) {
  2142  	var opDiv, opDivU, opMul, opConst Op
  2143  	switch v.Op {
  2144  	case OpSub64:
  2145  		opDiv = OpDiv64
  2146  		opDivU = OpDiv64u
  2147  		opMul = OpMul64
  2148  		opConst = OpConst64
  2149  	case OpSub32:
  2150  		opDiv = OpDiv32
  2151  		opDivU = OpDiv32u
  2152  		opMul = OpMul32
  2153  		opConst = OpConst32
  2154  	case OpSub16:
  2155  		opDiv = OpDiv16
  2156  		opDivU = OpDiv16u
  2157  		opMul = OpMul16
  2158  		opConst = OpConst16
  2159  	case OpSub8:
  2160  		opDiv = OpDiv8
  2161  		opDivU = OpDiv8u
  2162  		opMul = OpMul8
  2163  		opConst = OpConst8
  2164  	}
  2165  
  2166  	mul := v.Args[1]
  2167  	if mul.Op != opMul {
  2168  		return
  2169  	}
  2170  	div, con := mul.Args[0], mul.Args[1]
  2171  	if div.Op == opConst {
  2172  		div, con = con, div
  2173  	}
  2174  	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
  2175  		return
  2176  	}
  2177  	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
  2178  }
  2179  
  2180  // modLimit sets v with facts derived from v = p % q.
  2181  func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
  2182  	a := ft.limits[p.ID]
  2183  	b := ft.limits[q.ID]
  2184  	if signed {
  2185  		if a.min < 0 && b.min > 0 {
  2186  			ft.signedMinMax(v, -(b.max - 1), b.max-1)
  2187  			return
  2188  		}
  2189  		if !(a.nonnegative() && b.nonnegative()) {
  2190  			// TODO: we could handle signed limits but I didn't bother.
  2191  			return
  2192  		}
  2193  		if a.min >= 0 && b.min > 0 {
  2194  			ft.setNonNegative(v)
  2195  		}
  2196  	}
  2197  	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  2198  	ft.unsignedMax(v, min(a.umax, b.umax-1))
  2199  }
  2200  
  2201  // getBranch returns the range restrictions added by p
  2202  // when reaching b. p is the immediate dominator of b.
  2203  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2204  	if p == nil {
  2205  		return unknown
  2206  	}
  2207  	switch p.Kind {
  2208  	case BlockIf:
  2209  		// If p and p.Succs[0] are dominators it means that every path
  2210  		// from entry to b passes through p and p.Succs[0]. We care that
  2211  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2212  		// has one predecessor then (apart from the degenerate case),
  2213  		// there is no path from entry that can reach b through p.Succs[1].
  2214  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2215  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2216  			return positive
  2217  		}
  2218  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2219  			return negative
  2220  		}
  2221  	case BlockJumpTable:
  2222  		// TODO: this loop can lead to quadratic behavior, as
  2223  		// getBranch can be called len(p.Succs) times.
  2224  		for i, e := range p.Succs {
  2225  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2226  				return jumpTable0 + branch(i)
  2227  			}
  2228  		}
  2229  	}
  2230  	return unknown
  2231  }
  2232  
  2233  // addIndVarRestrictions updates the factsTables ft with the facts
  2234  // learned from the induction variable indVar which drives the loop
  2235  // starting in Block b.
  2236  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2237  	d := signed
  2238  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2239  		d |= unsigned
  2240  	}
  2241  
  2242  	if iv.flags&indVarMinExc == 0 {
  2243  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2244  	} else {
  2245  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2246  	}
  2247  
  2248  	if iv.flags&indVarMaxInc == 0 {
  2249  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2250  	} else {
  2251  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2252  	}
  2253  }
  2254  
  2255  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2256  // branching from Block b in direction br.
  2257  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2258  	c := b.Controls[0]
  2259  	switch {
  2260  	case br == negative:
  2261  		ft.booleanFalse(c)
  2262  	case br == positive:
  2263  		ft.booleanTrue(c)
  2264  	case br >= jumpTable0:
  2265  		idx := br - jumpTable0
  2266  		val := int64(idx)
  2267  		if v, off := isConstDelta(c); v != nil {
  2268  			// Establish the bound on the underlying value we're switching on,
  2269  			// not on the offset-ed value used as the jump table index.
  2270  			c = v
  2271  			val -= off
  2272  		}
  2273  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2274  	default:
  2275  		panic("unknown branch")
  2276  	}
  2277  }
  2278  
  2279  // addRestrictions updates restrictions from the immediate
  2280  // dominating block (p) using r.
  2281  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2282  	if t == 0 {
  2283  		// Trivial case: nothing to do.
  2284  		// Should not happen, but just in case.
  2285  		return
  2286  	}
  2287  	for i := domain(1); i <= t; i <<= 1 {
  2288  		if t&i == 0 {
  2289  			continue
  2290  		}
  2291  		ft.update(parent, v, w, i, r)
  2292  	}
  2293  }
  2294  
  2295  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2296  	switch t.Size() {
  2297  	case 8:
  2298  		return a+b < a
  2299  	case 4:
  2300  		return a+b > math.MaxUint32
  2301  	case 2:
  2302  		return a+b > math.MaxUint16
  2303  	case 1:
  2304  		return a+b > math.MaxUint8
  2305  	default:
  2306  		panic("unreachable")
  2307  	}
  2308  }
  2309  
  2310  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2311  	r := a + b
  2312  	switch t.Size() {
  2313  	case 8:
  2314  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2315  	case 4:
  2316  		return r < math.MinInt32 || math.MaxInt32 < r
  2317  	case 2:
  2318  		return r < math.MinInt16 || math.MaxInt16 < r
  2319  	case 1:
  2320  		return r < math.MinInt8 || math.MaxInt8 < r
  2321  	default:
  2322  		panic("unreachable")
  2323  	}
  2324  }
  2325  
  2326  func unsignedSubUnderflows(a, b uint64) bool {
  2327  	return a < b
  2328  }
  2329  
  2330  // checkForChunkedIndexBounds looks for index expressions of the form
  2331  // A[i+delta] where delta < K and i <= len(A)-K.  That is, this is a chunked
  2332  // iteration where the index is not directly compared to the length.
  2333  // if isReslice, then delta can be equal to K.
  2334  func checkForChunkedIndexBounds(ft *factsTable, b *Block, index, bound *Value, isReslice bool) bool {
  2335  	if bound.Op != OpSliceLen && bound.Op != OpSliceCap {
  2336  		return false
  2337  	}
  2338  
  2339  	// this is a slice bounds check against len or capacity,
  2340  	// and refers back to a prior check against length, which
  2341  	// will also work for the cap since that is not smaller
  2342  	// than the length.
  2343  
  2344  	slice := bound.Args[0]
  2345  	lim := ft.limits[index.ID]
  2346  	if lim.min < 0 {
  2347  		return false
  2348  	}
  2349  	i, delta := isConstDelta(index)
  2350  	if i == nil {
  2351  		return false
  2352  	}
  2353  	if delta < 0 {
  2354  		return false
  2355  	}
  2356  	// special case for blocked iteration over a slice.
  2357  	// slicelen > i + delta && <==== if clauses above
  2358  	// && index >= 0           <==== if clause above
  2359  	// delta >= 0 &&           <==== if clause above
  2360  	// slicelen-K >/>= x       <==== checked below
  2361  	// && K >=/> delta         <==== checked below
  2362  	// then v > w
  2363  	// example: i <=/< len - 4/3 means i+{0,1,2,3} are legal indices
  2364  	for o := ft.orderings[i.ID]; o != nil; o = o.next {
  2365  		if o.d != signed {
  2366  			continue
  2367  		}
  2368  		if ow := o.w; ow.Op == OpAdd64 {
  2369  			var lenOffset *Value
  2370  			if bound := ow.Args[0]; bound.Op == OpSliceLen && bound.Args[0] == slice {
  2371  				lenOffset = ow.Args[1]
  2372  			} else if bound := ow.Args[1]; bound.Op == OpSliceLen && bound.Args[0] == slice {
  2373  				lenOffset = ow.Args[0]
  2374  			}
  2375  			if lenOffset == nil || lenOffset.Op != OpConst64 {
  2376  				continue
  2377  			}
  2378  			if K := -lenOffset.AuxInt; K >= 0 {
  2379  				or := o.r
  2380  				if isReslice {
  2381  					K++
  2382  				}
  2383  				if or == lt {
  2384  					or = lt | eq
  2385  					K++
  2386  				}
  2387  				if K < 0 { // We hate thinking about overflow
  2388  					continue
  2389  				}
  2390  
  2391  				if delta < K && or == lt|eq {
  2392  					return true
  2393  				}
  2394  			}
  2395  		}
  2396  	}
  2397  	return false
  2398  }
  2399  
  2400  func addLocalFacts(ft *factsTable, b *Block) {
  2401  	ft.topoSortValuesInBlock(b)
  2402  
  2403  	for _, v := range b.Values {
  2404  		// Propagate constant ranges before relative relations to get
  2405  		// the most up-to-date constant bounds for isNonNegative calls.
  2406  		ft.flowLimit(v)
  2407  
  2408  		switch v.Op {
  2409  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2410  			x := ft.limits[v.Args[0].ID]
  2411  			y := ft.limits[v.Args[1].ID]
  2412  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2413  				r := gt
  2414  				if x.maybeZero() {
  2415  					r |= eq
  2416  				}
  2417  				ft.update(b, v, v.Args[1], unsigned, r)
  2418  				r = gt
  2419  				if y.maybeZero() {
  2420  					r |= eq
  2421  				}
  2422  				ft.update(b, v, v.Args[0], unsigned, r)
  2423  			}
  2424  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2425  				r := gt
  2426  				if x.maybeZero() {
  2427  					r |= eq
  2428  				}
  2429  				ft.update(b, v, v.Args[1], signed, r)
  2430  			}
  2431  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2432  				r := gt
  2433  				if y.maybeZero() {
  2434  					r |= eq
  2435  				}
  2436  				ft.update(b, v, v.Args[0], signed, r)
  2437  			}
  2438  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2439  				r := lt
  2440  				if x.maybeZero() {
  2441  					r |= eq
  2442  				}
  2443  				ft.update(b, v, v.Args[1], signed, r)
  2444  			}
  2445  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2446  				r := lt
  2447  				if y.maybeZero() {
  2448  					r |= eq
  2449  				}
  2450  				ft.update(b, v, v.Args[0], signed, r)
  2451  			}
  2452  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2453  			x := ft.limits[v.Args[0].ID]
  2454  			y := ft.limits[v.Args[1].ID]
  2455  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2456  				r := lt
  2457  				if y.maybeZero() {
  2458  					r |= eq
  2459  				}
  2460  				ft.update(b, v, v.Args[0], unsigned, r)
  2461  			}
  2462  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2463  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2464  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2465  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2466  			if ft.isNonNegative(v.Args[0]) {
  2467  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2468  			}
  2469  			if ft.isNonNegative(v.Args[1]) {
  2470  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2471  			}
  2472  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2473  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2474  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2475  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2476  		case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2477  			if !ft.isNonNegative(v.Args[1]) {
  2478  				break
  2479  			}
  2480  			fallthrough
  2481  		case OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2482  			OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2483  			OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2484  			OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2485  			if !ft.isNonNegative(v.Args[0]) {
  2486  				break
  2487  			}
  2488  			fallthrough
  2489  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2490  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2491  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2492  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2493  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2494  			switch add := v.Args[0]; add.Op {
  2495  			// round-up division pattern; given:
  2496  			// v = (x + y) / z
  2497  			// if y < z then v <= x
  2498  			case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2499  				z := v.Args[1]
  2500  				zl := ft.limits[z.ID]
  2501  				var uminDivisor uint64
  2502  				switch v.Op {
  2503  				case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2504  					OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2505  					uminDivisor = zl.umin
  2506  				case OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2507  					OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2508  					OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2509  					OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  2510  					OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2511  					OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2512  					OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2513  					OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2514  					uminDivisor = 1 << zl.umin
  2515  				default:
  2516  					panic("unreachable")
  2517  				}
  2518  
  2519  				x := add.Args[0]
  2520  				xl := ft.limits[x.ID]
  2521  				y := add.Args[1]
  2522  				yl := ft.limits[y.ID]
  2523  				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
  2524  					if xl.umax < uminDivisor {
  2525  						ft.update(b, v, y, unsigned, lt|eq)
  2526  					}
  2527  					if yl.umax < uminDivisor {
  2528  						ft.update(b, v, x, unsigned, lt|eq)
  2529  					}
  2530  				}
  2531  			}
  2532  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2533  		case OpMod64, OpMod32, OpMod16, OpMod8:
  2534  			if !ft.isNonNegative(v.Args[0]) || !ft.isNonNegative(v.Args[1]) {
  2535  				break
  2536  			}
  2537  			fallthrough
  2538  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2539  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2540  			// Note: we have to be careful that this doesn't imply
  2541  			// that the modulus is >0, which isn't true until *after*
  2542  			// the mod instruction executes (and thus panics if the
  2543  			// modulus is 0). See issue 67625.
  2544  			ft.update(b, v, v.Args[1], unsigned, lt)
  2545  		case OpStringLen:
  2546  			if v.Args[0].Op == OpStringMake {
  2547  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2548  			}
  2549  		case OpSliceLen:
  2550  			if v.Args[0].Op == OpSliceMake {
  2551  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2552  			}
  2553  		case OpSliceCap:
  2554  			if v.Args[0].Op == OpSliceMake {
  2555  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2556  			}
  2557  		case OpIsInBounds:
  2558  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], false) {
  2559  				if b.Func.pass.debug > 0 {
  2560  					b.Func.Warnl(v.Pos, "Proved %s for blocked indexing", v.Op)
  2561  				}
  2562  				ft.booleanTrue(v)
  2563  			}
  2564  		case OpIsSliceInBounds:
  2565  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], true) {
  2566  				if b.Func.pass.debug > 0 {
  2567  					b.Func.Warnl(v.Pos, "Proved %s for blocked reslicing", v.Op)
  2568  				}
  2569  				ft.booleanTrue(v)
  2570  			}
  2571  		case OpPhi:
  2572  			addLocalFactsPhi(ft, v)
  2573  		}
  2574  	}
  2575  }
  2576  
  2577  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2578  	// Look for phis that implement min/max.
  2579  	//   z:
  2580  	//      c = Less64 x y (or other Less/Leq operation)
  2581  	//      If c -> bx by
  2582  	//   bx: <- z
  2583  	//       -> b ...
  2584  	//   by: <- z
  2585  	//      -> b ...
  2586  	//   b: <- bx by
  2587  	//      v = Phi x y
  2588  	// Then v is either min or max of x,y.
  2589  	// If it is the min, then we deduce v <= x && v <= y.
  2590  	// If it is the max, then we deduce v >= x && v >= y.
  2591  	// The min case is useful for the copy builtin, see issue 16833.
  2592  	if len(v.Args) != 2 {
  2593  		return
  2594  	}
  2595  	b := v.Block
  2596  	x := v.Args[0]
  2597  	y := v.Args[1]
  2598  	bx := b.Preds[0].b
  2599  	by := b.Preds[1].b
  2600  	var z *Block // branch point
  2601  	switch {
  2602  	case bx == by: // bx == by == z case
  2603  		z = bx
  2604  	case by.uniquePred() == bx: // bx == z case
  2605  		z = bx
  2606  	case bx.uniquePred() == by: // by == z case
  2607  		z = by
  2608  	case bx.uniquePred() == by.uniquePred():
  2609  		z = bx.uniquePred()
  2610  	}
  2611  	if z == nil || z.Kind != BlockIf {
  2612  		return
  2613  	}
  2614  	c := z.Controls[0]
  2615  	if len(c.Args) != 2 {
  2616  		return
  2617  	}
  2618  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2619  	if bx == z {
  2620  		isMin = b.Preds[0].i == 0
  2621  	} else {
  2622  		isMin = bx.Preds[0].i == 0
  2623  	}
  2624  	if c.Args[0] == x && c.Args[1] == y {
  2625  		// ok
  2626  	} else if c.Args[0] == y && c.Args[1] == x {
  2627  		// Comparison is reversed from how the values are listed in the Phi.
  2628  		isMin = !isMin
  2629  	} else {
  2630  		// Not comparing x and y.
  2631  		return
  2632  	}
  2633  	var dom domain
  2634  	switch c.Op {
  2635  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2636  		dom = signed
  2637  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2638  		dom = unsigned
  2639  	default:
  2640  		return
  2641  	}
  2642  	var rel relation
  2643  	if isMin {
  2644  		rel = lt | eq
  2645  	} else {
  2646  		rel = gt | eq
  2647  	}
  2648  	ft.update(b, v, x, dom, rel)
  2649  	ft.update(b, v, y, dom, rel)
  2650  }
  2651  
  2652  var ctzNonZeroOp = map[Op]Op{
  2653  	OpCtz8:  OpCtz8NonZero,
  2654  	OpCtz16: OpCtz16NonZero,
  2655  	OpCtz32: OpCtz32NonZero,
  2656  	OpCtz64: OpCtz64NonZero,
  2657  }
  2658  var mostNegativeDividend = map[Op]int64{
  2659  	OpDiv16: -1 << 15,
  2660  	OpMod16: -1 << 15,
  2661  	OpDiv32: -1 << 31,
  2662  	OpMod32: -1 << 31,
  2663  	OpDiv64: -1 << 63,
  2664  	OpMod64: -1 << 63,
  2665  }
  2666  var unsignedOp = map[Op]Op{
  2667  	OpDiv8:     OpDiv8u,
  2668  	OpDiv16:    OpDiv16u,
  2669  	OpDiv32:    OpDiv32u,
  2670  	OpDiv64:    OpDiv64u,
  2671  	OpMod8:     OpMod8u,
  2672  	OpMod16:    OpMod16u,
  2673  	OpMod32:    OpMod32u,
  2674  	OpMod64:    OpMod64u,
  2675  	OpRsh8x8:   OpRsh8Ux8,
  2676  	OpRsh8x16:  OpRsh8Ux16,
  2677  	OpRsh8x32:  OpRsh8Ux32,
  2678  	OpRsh8x64:  OpRsh8Ux64,
  2679  	OpRsh16x8:  OpRsh16Ux8,
  2680  	OpRsh16x16: OpRsh16Ux16,
  2681  	OpRsh16x32: OpRsh16Ux32,
  2682  	OpRsh16x64: OpRsh16Ux64,
  2683  	OpRsh32x8:  OpRsh32Ux8,
  2684  	OpRsh32x16: OpRsh32Ux16,
  2685  	OpRsh32x32: OpRsh32Ux32,
  2686  	OpRsh32x64: OpRsh32Ux64,
  2687  	OpRsh64x8:  OpRsh64Ux8,
  2688  	OpRsh64x16: OpRsh64Ux16,
  2689  	OpRsh64x32: OpRsh64Ux32,
  2690  	OpRsh64x64: OpRsh64Ux64,
  2691  }
  2692  
  2693  var bytesizeToConst = [...]Op{
  2694  	8 / 8:  OpConst8,
  2695  	16 / 8: OpConst16,
  2696  	32 / 8: OpConst32,
  2697  	64 / 8: OpConst64,
  2698  }
  2699  var bytesizeToNeq = [...]Op{
  2700  	8 / 8:  OpNeq8,
  2701  	16 / 8: OpNeq16,
  2702  	32 / 8: OpNeq32,
  2703  	64 / 8: OpNeq64,
  2704  }
  2705  var bytesizeToAnd = [...]Op{
  2706  	8 / 8:  OpAnd8,
  2707  	16 / 8: OpAnd16,
  2708  	32 / 8: OpAnd32,
  2709  	64 / 8: OpAnd64,
  2710  }
  2711  
  2712  // simplifyBlock simplifies some constant values in b and evaluates
  2713  // branches to non-uniquely dominated successors of b.
  2714  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2715  	for iv, v := range b.Values {
  2716  		switch v.Op {
  2717  		case OpStaticLECall:
  2718  			if b.Func.pass.debug > 0 && len(v.Args) == 2 {
  2719  				fn := auxToCall(v.Aux).Fn
  2720  				if fn != nil && strings.Contains(fn.String(), "prove") {
  2721  					// Print bounds of any argument to single-arg function with "prove" in name,
  2722  					// for debugging and especially for test/prove.go.
  2723  					// (v.Args[1] is mem).
  2724  					x := v.Args[0]
  2725  					b.Func.Warnl(v.Pos, "Proved %v (%v)", ft.limits[x.ID], x)
  2726  				}
  2727  			}
  2728  		case OpSlicemask:
  2729  			// Replace OpSlicemask operations in b with constants where possible.
  2730  			cap := v.Args[0]
  2731  			x, delta := isConstDelta(cap)
  2732  			if x != nil {
  2733  				// slicemask(x + y)
  2734  				// if x is larger than -y (y is negative), then slicemask is -1.
  2735  				lim := ft.limits[x.ID]
  2736  				if lim.umin > uint64(-delta) {
  2737  					if cap.Op == OpAdd64 {
  2738  						v.reset(OpConst64)
  2739  					} else {
  2740  						v.reset(OpConst32)
  2741  					}
  2742  					if b.Func.pass.debug > 0 {
  2743  						b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2744  					}
  2745  					v.AuxInt = -1
  2746  				}
  2747  				break
  2748  			}
  2749  			lim := ft.limits[cap.ID]
  2750  			if lim.umin > 0 {
  2751  				if cap.Type.Size() == 8 {
  2752  					v.reset(OpConst64)
  2753  				} else {
  2754  					v.reset(OpConst32)
  2755  				}
  2756  				if b.Func.pass.debug > 0 {
  2757  					b.Func.Warnl(v.Pos, "Proved slicemask not needed (by limit)")
  2758  				}
  2759  				v.AuxInt = -1
  2760  			}
  2761  
  2762  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2763  			// On some architectures, notably amd64, we can generate much better
  2764  			// code for CtzNN if we know that the argument is non-zero.
  2765  			// Capture that information here for use in arch-specific optimizations.
  2766  			x := v.Args[0]
  2767  			lim := ft.limits[x.ID]
  2768  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2769  				if b.Func.pass.debug > 0 {
  2770  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2771  				}
  2772  				v.Op = ctzNonZeroOp[v.Op]
  2773  			}
  2774  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2775  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2776  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2777  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2778  			if ft.isNonNegative(v.Args[0]) {
  2779  				if b.Func.pass.debug > 0 {
  2780  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2781  				}
  2782  				v.Op = unsignedOp[v.Op]
  2783  			}
  2784  			fallthrough
  2785  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2786  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2787  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2788  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2789  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2790  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2791  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2792  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2793  			// Check whether, for a << b, we know that b
  2794  			// is strictly less than the number of bits in a.
  2795  			by := v.Args[1]
  2796  			lim := ft.limits[by.ID]
  2797  			bits := 8 * v.Args[0].Type.Size()
  2798  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2799  				v.AuxInt = 1 // see shiftIsBounded
  2800  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2801  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2802  				}
  2803  			}
  2804  		case OpDiv8, OpDiv16, OpDiv32, OpDiv64, OpMod8, OpMod16, OpMod32, OpMod64:
  2805  			p, q := ft.limits[v.Args[0].ID], ft.limits[v.Args[1].ID] // p/q
  2806  			if p.nonnegative() && q.nonnegative() {
  2807  				if b.Func.pass.debug > 0 {
  2808  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2809  				}
  2810  				v.Op = unsignedOp[v.Op]
  2811  				v.AuxInt = 0
  2812  				break
  2813  			}
  2814  			// Fixup code can be avoided on x86 if we know
  2815  			//  the divisor is not -1 or the dividend > MinIntNN.
  2816  			if v.Op != OpDiv8 && v.Op != OpMod8 && (q.max < -1 || q.min > -1 || p.min > mostNegativeDividend[v.Op]) {
  2817  				// See DivisionNeedsFixUp in rewrite.go.
  2818  				// v.AuxInt = 1 means we have proved that the divisor is not -1
  2819  				// or that the dividend is not the most negative integer,
  2820  				// so we do not need to add fix-up code.
  2821  				if b.Func.pass.debug > 0 {
  2822  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2823  				}
  2824  				// Only usable on amd64 and 386, and only for ≥ 16-bit ops.
  2825  				// Don't modify AuxInt on other architectures, as that can interfere with CSE.
  2826  				// (Print the debug info above always, so that test/prove.go can be
  2827  				// checked on non-x86 systems.)
  2828  				// TODO: add other architectures?
  2829  				if b.Func.Config.arch == "386" || b.Func.Config.arch == "amd64" {
  2830  					v.AuxInt = 1
  2831  				}
  2832  			}
  2833  		case OpMul64, OpMul32, OpMul16, OpMul8:
  2834  			if vl := ft.limits[v.ID]; vl.min == vl.max || vl.umin == vl.umax {
  2835  				// v is going to be constant folded away; don't "optimize" it.
  2836  				break
  2837  			}
  2838  			x := v.Args[0]
  2839  			xl := ft.limits[x.ID]
  2840  			y := v.Args[1]
  2841  			yl := ft.limits[y.ID]
  2842  			if xl.umin == xl.umax && isPowerOfTwo(int64(xl.umin)) ||
  2843  				xl.min == xl.max && isPowerOfTwo(xl.min) ||
  2844  				yl.umin == yl.umax && isPowerOfTwo(int64(yl.umin)) ||
  2845  				yl.min == yl.max && isPowerOfTwo(yl.min) {
  2846  				// 0,1 * a power of two is better done as a shift
  2847  				break
  2848  			}
  2849  			switch xOne, yOne := xl.umax <= 1, yl.umax <= 1; {
  2850  			case xOne && yOne:
  2851  				v.Op = bytesizeToAnd[v.Type.Size()]
  2852  				if b.Func.pass.debug > 0 {
  2853  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into And", v)
  2854  				}
  2855  			case yOne && b.Func.Config.haveCondSelect:
  2856  				x, y = y, x
  2857  				fallthrough
  2858  			case xOne && b.Func.Config.haveCondSelect:
  2859  				if !canCondSelect(v, b.Func.Config.arch, nil) {
  2860  					break
  2861  				}
  2862  				zero := b.Func.constVal(bytesizeToConst[v.Type.Size()], v.Type, 0, true)
  2863  				ft.initLimitForNewValue(zero)
  2864  				check := b.NewValue2(v.Pos, bytesizeToNeq[v.Type.Size()], types.Types[types.TBOOL], zero, x)
  2865  				ft.initLimitForNewValue(check)
  2866  				v.reset(OpCondSelect)
  2867  				v.AddArg3(y, zero, check)
  2868  
  2869  				// FIXME: workaround for go.dev/issues/76060
  2870  				// we need to schedule the Neq before the CondSelect even tho
  2871  				// scheduling is meaningless until we reach the schedule pass.
  2872  				if b.Values[len(b.Values)-1] != check {
  2873  					panic("unreachable; failed sanity check, new value isn't at the end of the block")
  2874  				}
  2875  				b.Values[iv], b.Values[len(b.Values)-1] = b.Values[len(b.Values)-1], b.Values[iv]
  2876  
  2877  				if b.Func.pass.debug > 0 {
  2878  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into CondSelect; %v is bool", v, x)
  2879  				}
  2880  			}
  2881  		}
  2882  
  2883  		// Fold provable constant results.
  2884  		// Helps in cases where we reuse a value after branching on its equality.
  2885  		for i, arg := range v.Args {
  2886  			lim := ft.limits[arg.ID]
  2887  			var constValue int64
  2888  			switch {
  2889  			case lim.min == lim.max:
  2890  				constValue = lim.min
  2891  			case lim.umin == lim.umax:
  2892  				constValue = int64(lim.umin)
  2893  			default:
  2894  				continue
  2895  			}
  2896  			switch arg.Op {
  2897  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2898  				continue
  2899  			}
  2900  			typ := arg.Type
  2901  			f := b.Func
  2902  			var c *Value
  2903  			switch {
  2904  			case typ.IsBoolean():
  2905  				c = f.ConstBool(typ, constValue != 0)
  2906  			case typ.IsInteger() && typ.Size() == 1:
  2907  				c = f.ConstInt8(typ, int8(constValue))
  2908  			case typ.IsInteger() && typ.Size() == 2:
  2909  				c = f.ConstInt16(typ, int16(constValue))
  2910  			case typ.IsInteger() && typ.Size() == 4:
  2911  				c = f.ConstInt32(typ, int32(constValue))
  2912  			case typ.IsInteger() && typ.Size() == 8:
  2913  				c = f.ConstInt64(typ, constValue)
  2914  			case typ.IsPtrShaped():
  2915  				if constValue == 0 {
  2916  					c = f.ConstNil(typ)
  2917  				} else {
  2918  					// Not sure how this might happen, but if it
  2919  					// does, just skip it.
  2920  					continue
  2921  				}
  2922  			default:
  2923  				// Not sure how this might happen, but if it
  2924  				// does, just skip it.
  2925  				continue
  2926  			}
  2927  			v.SetArg(i, c)
  2928  			ft.initLimitForNewValue(c)
  2929  			if b.Func.pass.debug > 1 {
  2930  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2931  			}
  2932  		}
  2933  	}
  2934  
  2935  	if b.Kind != BlockIf {
  2936  		return
  2937  	}
  2938  
  2939  	// Consider outgoing edges from this block.
  2940  	parent := b
  2941  	for i, branch := range [...]branch{positive, negative} {
  2942  		child := parent.Succs[i].b
  2943  		if getBranch(sdom, parent, child) != unknown {
  2944  			// For edges to uniquely dominated blocks, we
  2945  			// already did this when we visited the child.
  2946  			continue
  2947  		}
  2948  		// For edges to other blocks, this can trim a branch
  2949  		// even if we couldn't get rid of the child itself.
  2950  		ft.checkpoint()
  2951  		addBranchRestrictions(ft, parent, branch)
  2952  		unsat := ft.unsat
  2953  		ft.restore()
  2954  		if unsat {
  2955  			// This branch is impossible, so remove it
  2956  			// from the block.
  2957  			removeBranch(parent, branch)
  2958  			// No point in considering the other branch.
  2959  			// (It *is* possible for both to be
  2960  			// unsatisfiable since the fact table is
  2961  			// incomplete. We could turn this into a
  2962  			// BlockExit, but it doesn't seem worth it.)
  2963  			break
  2964  		}
  2965  	}
  2966  }
  2967  
  2968  func removeBranch(b *Block, branch branch) {
  2969  	c := b.Controls[0]
  2970  	if b.Func.pass.debug > 0 {
  2971  		verb := "Proved"
  2972  		if branch == positive {
  2973  			verb = "Disproved"
  2974  		}
  2975  		if b.Func.pass.debug > 1 {
  2976  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  2977  		} else {
  2978  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  2979  		}
  2980  	}
  2981  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  2982  		// attempt to preserve statement marker.
  2983  		b.Pos = b.Pos.WithIsStmt()
  2984  	}
  2985  	if branch == positive || branch == negative {
  2986  		b.Kind = BlockFirst
  2987  		b.ResetControls()
  2988  		if branch == positive {
  2989  			b.swapSuccessors()
  2990  		}
  2991  	} else {
  2992  		// TODO: figure out how to remove an entry from a jump table
  2993  	}
  2994  }
  2995  
  2996  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  2997  func isConstDelta(v *Value) (w *Value, delta int64) {
  2998  	cop := OpConst64
  2999  	switch v.Op {
  3000  	case OpAdd32, OpSub32:
  3001  		cop = OpConst32
  3002  	case OpAdd16, OpSub16:
  3003  		cop = OpConst16
  3004  	case OpAdd8, OpSub8:
  3005  		cop = OpConst8
  3006  	}
  3007  	switch v.Op {
  3008  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  3009  		if v.Args[0].Op == cop {
  3010  			return v.Args[1], v.Args[0].AuxInt
  3011  		}
  3012  		if v.Args[1].Op == cop {
  3013  			return v.Args[0], v.Args[1].AuxInt
  3014  		}
  3015  	case OpSub64, OpSub32, OpSub16, OpSub8:
  3016  		if v.Args[1].Op == cop {
  3017  			aux := v.Args[1].AuxInt
  3018  			if aux != -aux { // Overflow; too bad
  3019  				return v.Args[0], -aux
  3020  			}
  3021  		}
  3022  	}
  3023  	return nil, 0
  3024  }
  3025  
  3026  // isCleanExt reports whether v is the result of a value-preserving
  3027  // sign or zero extension.
  3028  func isCleanExt(v *Value) bool {
  3029  	switch v.Op {
  3030  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  3031  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  3032  		// signed -> signed is the only value-preserving sign extension
  3033  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  3034  
  3035  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  3036  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  3037  		// unsigned -> signed/unsigned are value-preserving zero extensions
  3038  		return !v.Args[0].Type.IsSigned()
  3039  	}
  3040  	return false
  3041  }
  3042  
  3043  func getDependencyScore(scores []uint, v *Value) (score uint) {
  3044  	if score = scores[v.ID]; score != 0 {
  3045  		return score
  3046  	}
  3047  	defer func() {
  3048  		scores[v.ID] = score
  3049  	}()
  3050  	if v.Op == OpPhi {
  3051  		return 1
  3052  	}
  3053  	score = 2 // NIT(@Jorropo): always order phis first to make GOSSAFUNC pretty.
  3054  	for _, a := range v.Args {
  3055  		if a.Block != v.Block {
  3056  			continue
  3057  		}
  3058  		score = max(score, getDependencyScore(scores, a)+1)
  3059  	}
  3060  	return score
  3061  }
  3062  
  3063  // topoSortValuesInBlock ensure ranging over b.Values visit values before they are being used.
  3064  // It does not consider dependencies with other blocks; thus Phi nodes are considered to not have any dependecies.
  3065  // The result is always determistic and does not depend on the previous slice ordering.
  3066  func (ft *factsTable) topoSortValuesInBlock(b *Block) {
  3067  	f := b.Func
  3068  	want := f.NumValues()
  3069  
  3070  	scores := ft.reusedTopoSortScoresTable
  3071  	if want <= cap(scores) {
  3072  		scores = scores[:want]
  3073  	} else {
  3074  		if cap(scores) > 0 {
  3075  			f.Cache.freeUintSlice(scores)
  3076  		}
  3077  		scores = f.Cache.allocUintSlice(want)
  3078  		ft.reusedTopoSortScoresTable = scores
  3079  	}
  3080  
  3081  	for _, v := range b.Values {
  3082  		scores[v.ID] = 0 // sentinel
  3083  	}
  3084  
  3085  	slices.SortFunc(b.Values, func(a, b *Value) int {
  3086  		dependencyScoreA := getDependencyScore(scores, a)
  3087  		dependencyScoreB := getDependencyScore(scores, b)
  3088  		if dependencyScoreA != dependencyScoreB {
  3089  			return cmp.Compare(dependencyScoreA, dependencyScoreB)
  3090  		}
  3091  		return cmp.Compare(a.ID, b.ID)
  3092  	})
  3093  }
  3094  

View as plain text