Source file src/cmd/compile/internal/ssa/pair.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/ir"
     9  	"cmd/compile/internal/types"
    10  	"slices"
    11  )
    12  
    13  // The pair pass finds memory operations that can be paired up
    14  // into single 2-register memory instructions.
    15  func pair(f *Func) {
    16  	// Only arm64 for now. This pass is fairly arch-specific.
    17  	switch f.Config.arch {
    18  	case "arm64":
    19  	default:
    20  		return
    21  	}
    22  	pairLoads(f)
    23  	pairStores(f)
    24  }
    25  
    26  type pairableLoadInfo struct {
    27  	width int64 // width of one element in the pair, in bytes
    28  	pair  Op
    29  }
    30  
    31  // All pairableLoad ops must take 2 arguments, a pointer and a memory.
    32  // They must also take an offset in Aux/AuxInt.
    33  var pairableLoads = map[Op]pairableLoadInfo{
    34  	OpARM64MOVDload:  {8, OpARM64LDP},
    35  	OpARM64MOVWUload: {4, OpARM64LDPW},
    36  	OpARM64MOVWload:  {4, OpARM64LDPSW},
    37  	// TODO: conceivably we could pair a signed and unsigned load
    38  	// if we knew the upper bits of one of them weren't being used.
    39  	OpARM64FMOVDload: {8, OpARM64FLDPD},
    40  	OpARM64FMOVSload: {4, OpARM64FLDPS},
    41  }
    42  
    43  type pairableStoreInfo struct {
    44  	width int64 // width of one element in the pair, in bytes
    45  	pair  Op
    46  }
    47  
    48  // All pairableStore keys must take 3 arguments, a pointer, a value, and a memory.
    49  // All pairableStore values must take 4 arguments, a pointer, 2 values, and a memory.
    50  // They must also take an offset in Aux/AuxInt.
    51  var pairableStores = map[Op]pairableStoreInfo{
    52  	OpARM64MOVDstore:  {8, OpARM64STP},
    53  	OpARM64MOVWstore:  {4, OpARM64STPW},
    54  	OpARM64FMOVDstore: {8, OpARM64FSTPD},
    55  	OpARM64FMOVSstore: {4, OpARM64FSTPS},
    56  	// TODO: storezero variants.
    57  }
    58  
    59  // offsetOk returns true if a pair instruction should be used
    60  // for the offset Aux+off, when the data width (of the
    61  // unpaired instructions) is width.
    62  // This function is best-effort. The compiled function must
    63  // still work if offsetOk always returns true.
    64  // TODO: this is currently arm64-specific.
    65  func offsetOk(aux Aux, off, width int64) bool {
    66  	if true {
    67  		// Seems to generate slightly smaller code if we just
    68  		// always allow this rewrite.
    69  		//
    70  		// Without pairing, we have 2 load instructions, like:
    71  		//   LDR 88(R0), R1
    72  		//   LDR 96(R0), R2
    73  		// with pairing we have, best case:
    74  		//   LDP 88(R0), R1, R2
    75  		// but maybe we need an adjuster if out of range or unaligned:
    76  		//   ADD R0, $88, R27
    77  		//   LDP (R27), R1, R2
    78  		// Even with the adjuster, it is at least no worse.
    79  		//
    80  		// A similar situation occurs when accessing globals.
    81  		// Two loads from globals requires 4 instructions,
    82  		// two ADRP and two LDR. With pairing, we need
    83  		// ADRP+ADD+LDP, three instructions.
    84  		//
    85  		// With pairing, it looks like the critical path might
    86  		// be a little bit longer. But it should never be more
    87  		// instructions.
    88  		// TODO: see if that longer critical path causes any
    89  		// regressions.
    90  		return true
    91  	}
    92  	if aux != nil {
    93  		if _, ok := aux.(*ir.Name); !ok {
    94  			// Offset is probably too big (globals).
    95  			return false
    96  		}
    97  		// We let *ir.Names pass here, as
    98  		// they are probably small offsets from SP.
    99  		// There's no guarantee that we're in range
   100  		// in that case though (we don't know the
   101  		// stack frame size yet), so the assembler
   102  		// might need to issue fixup instructions.
   103  		// Assume some small frame size.
   104  		if off >= 0 {
   105  			off += 120
   106  		}
   107  		// TODO: figure out how often this helps vs. hurts.
   108  	}
   109  	switch width {
   110  	case 4:
   111  		if off >= -256 && off <= 252 && off%4 == 0 {
   112  			return true
   113  		}
   114  	case 8:
   115  		if off >= -512 && off <= 504 && off%8 == 0 {
   116  			return true
   117  		}
   118  	}
   119  	return false
   120  }
   121  
   122  func pairLoads(f *Func) {
   123  	var loads []*Value
   124  
   125  	// Registry of aux values for sorting.
   126  	auxIDs := map[Aux]int{}
   127  	auxID := func(aux Aux) int {
   128  		id, ok := auxIDs[aux]
   129  		if !ok {
   130  			id = len(auxIDs)
   131  			auxIDs[aux] = id
   132  		}
   133  		return id
   134  	}
   135  
   136  	for _, b := range f.Blocks {
   137  		// Find loads.
   138  		loads = loads[:0]
   139  		clear(auxIDs)
   140  		for _, v := range b.Values {
   141  			info := pairableLoads[v.Op]
   142  			if info.width == 0 {
   143  				continue // not pairable
   144  			}
   145  			if !offsetOk(v.Aux, v.AuxInt, info.width) {
   146  				continue // not advisable
   147  			}
   148  			loads = append(loads, v)
   149  		}
   150  		if len(loads) < 2 {
   151  			continue
   152  		}
   153  
   154  		// Sort to put pairable loads together.
   155  		slices.SortFunc(loads, func(x, y *Value) int {
   156  			// First sort by op, ptr, and memory arg.
   157  			if x.Op != y.Op {
   158  				return int(x.Op - y.Op)
   159  			}
   160  			if x.Args[0].ID != y.Args[0].ID {
   161  				return int(x.Args[0].ID - y.Args[0].ID)
   162  			}
   163  			if x.Args[1].ID != y.Args[1].ID {
   164  				return int(x.Args[1].ID - y.Args[1].ID)
   165  			}
   166  			// Then sort by aux. (nil first, then by aux ID)
   167  			if x.Aux != nil {
   168  				if y.Aux == nil {
   169  					return 1
   170  				}
   171  				a, b := auxID(x.Aux), auxID(y.Aux)
   172  				if a != b {
   173  					return a - b
   174  				}
   175  			} else if y.Aux != nil {
   176  				return -1
   177  			}
   178  			// Then sort by offset, low to high.
   179  			return int(x.AuxInt - y.AuxInt)
   180  		})
   181  
   182  		// Look for pairable loads.
   183  		for i := 0; i < len(loads)-1; i++ {
   184  			x := loads[i]
   185  			y := loads[i+1]
   186  			if x.Op != y.Op || x.Args[0] != y.Args[0] || x.Args[1] != y.Args[1] {
   187  				continue
   188  			}
   189  			if x.Aux != y.Aux {
   190  				continue
   191  			}
   192  			if x.AuxInt+pairableLoads[x.Op].width != y.AuxInt {
   193  				continue
   194  			}
   195  
   196  			// Commit point.
   197  
   198  			// Make the 2-register load.
   199  			load := b.NewValue2IA(x.Pos, pairableLoads[x.Op].pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
   200  
   201  			// Modify x to be (Select0 load). Similar for y.
   202  			x.reset(OpSelect0)
   203  			x.SetArgs1(load)
   204  			y.reset(OpSelect1)
   205  			y.SetArgs1(load)
   206  
   207  			i++ // Skip y next time around the loop.
   208  		}
   209  	}
   210  }
   211  
   212  func pairStores(f *Func) {
   213  	last := f.Cache.allocBoolSlice(f.NumValues())
   214  	defer f.Cache.freeBoolSlice(last)
   215  
   216  	// prevStore returns the previous store in the
   217  	// same block, or nil if there are none.
   218  	prevStore := func(v *Value) *Value {
   219  		if v.Op == OpInitMem || v.Op == OpPhi {
   220  			return nil
   221  		}
   222  		m := v.MemoryArg()
   223  		if m.Block != v.Block {
   224  			return nil
   225  		}
   226  		return m
   227  	}
   228  
   229  	for _, b := range f.Blocks {
   230  		// Find last store in block, so we can
   231  		// walk the stores last to first.
   232  		// Last to first helps ensure that the rewrites we
   233  		// perform do not get in the way of subsequent rewrites.
   234  		for _, v := range b.Values {
   235  			if v.Type.IsMemory() {
   236  				last[v.ID] = true
   237  			}
   238  		}
   239  		for _, v := range b.Values {
   240  			if v.Type.IsMemory() {
   241  				if m := prevStore(v); m != nil {
   242  					last[m.ID] = false
   243  				}
   244  			}
   245  		}
   246  		var lastMem *Value
   247  		for _, v := range b.Values {
   248  			if last[v.ID] {
   249  				lastMem = v
   250  				break
   251  			}
   252  		}
   253  
   254  		// Check all stores, from last to first.
   255  	memCheck:
   256  		for v := lastMem; v != nil; v = prevStore(v) {
   257  			info := pairableStores[v.Op]
   258  			if info.width == 0 {
   259  				continue // Not pairable.
   260  			}
   261  			if !offsetOk(v.Aux, v.AuxInt, info.width) {
   262  				continue // Not advisable to pair.
   263  			}
   264  			ptr := v.Args[0]
   265  			val := v.Args[1]
   266  			mem := v.Args[2]
   267  			off := v.AuxInt
   268  			aux := v.Aux
   269  
   270  			// Look for earlier store we can combine with.
   271  			lowerOk := true
   272  			higherOk := true
   273  			count := 10 // max lookback distance
   274  			for w := prevStore(v); w != nil; w = prevStore(w) {
   275  				if w.Uses != 1 {
   276  					// We can't combine stores if the earlier
   277  					// store has any use besides the next one
   278  					// in the store chain.
   279  					// (Unless we could check the aliasing of
   280  					// all those other uses.)
   281  					continue memCheck
   282  				}
   283  				if w.Op == v.Op &&
   284  					w.Args[0] == ptr &&
   285  					w.Aux == aux &&
   286  					(lowerOk && w.AuxInt == off-info.width || higherOk && w.AuxInt == off+info.width) {
   287  					// This op is mergeable with v.
   288  
   289  					// Commit point.
   290  
   291  					// ptr val1 val2 mem
   292  					args := []*Value{ptr, val, w.Args[1], mem}
   293  					if w.AuxInt == off-info.width {
   294  						args[1], args[2] = args[2], args[1]
   295  						off -= info.width
   296  					}
   297  					v.reset(info.pair)
   298  					v.AddArgs(args...)
   299  					v.Aux = aux
   300  					v.AuxInt = off
   301  					v.Pos = w.Pos // take position of earlier of the two stores (TODO: not really working?)
   302  
   303  					// Make w just a memory copy.
   304  					wmem := w.MemoryArg()
   305  					w.reset(OpCopy)
   306  					w.SetArgs1(wmem)
   307  					continue memCheck
   308  				}
   309  				if count--; count == 0 {
   310  					// Only look back so far.
   311  					// This keeps us in O(n) territory, and it
   312  					// also prevents us from keeping values
   313  					// in registers for too long (and thus
   314  					// needing to spill them).
   315  					continue memCheck
   316  				}
   317  				// We're now looking at a store w which is currently
   318  				// between the store v that we're intending to merge into,
   319  				// and the store we'll eventually find to merge with it.
   320  				// Make sure this store doesn't alias with the one
   321  				// we'll be moving.
   322  				var width int64
   323  				switch w.Op {
   324  				case OpARM64MOVDstore, OpARM64MOVDstorezero, OpARM64FMOVDstore:
   325  					width = 8
   326  				case OpARM64MOVWstore, OpARM64MOVWstorezero, OpARM64FMOVSstore:
   327  					width = 4
   328  				case OpARM64MOVHstore, OpARM64MOVHstorezero:
   329  					width = 2
   330  				case OpARM64MOVBstore, OpARM64MOVBstorezero:
   331  					width = 1
   332  				case OpCopy:
   333  					continue // this was a store we merged earlier
   334  				default:
   335  					// Can't reorder with any other memory operations.
   336  					// (atomics, calls, ...)
   337  					continue memCheck
   338  				}
   339  
   340  				// We only allow reordering with respect to other
   341  				// writes to the same pointer and aux, so we can
   342  				// compute the exact the aliasing relationship.
   343  				if w.Args[0] != ptr || w.Aux != aux {
   344  					continue memCheck
   345  				}
   346  				if overlap(w.AuxInt, width, off-info.width, info.width) {
   347  					// Aliases with slot before v's location.
   348  					lowerOk = false
   349  				}
   350  				if overlap(w.AuxInt, width, off+info.width, info.width) {
   351  					// Aliases with slot after v's location.
   352  					higherOk = false
   353  				}
   354  				if !higherOk && !lowerOk {
   355  					continue memCheck
   356  				}
   357  			}
   358  		}
   359  	}
   360  }
   361  

View as plain text