Source file src/cmd/compile/internal/ssagen/pgen.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssagen
     6  
     7  import (
     8  	"fmt"
     9  	"internal/buildcfg"
    10  	"os"
    11  	"sort"
    12  	"sync"
    13  
    14  	"cmd/compile/internal/base"
    15  	"cmd/compile/internal/inline"
    16  	"cmd/compile/internal/ir"
    17  	"cmd/compile/internal/liveness"
    18  	"cmd/compile/internal/objw"
    19  	"cmd/compile/internal/pgoir"
    20  	"cmd/compile/internal/ssa"
    21  	"cmd/compile/internal/types"
    22  	"cmd/internal/obj"
    23  	"cmd/internal/objabi"
    24  	"cmd/internal/src"
    25  )
    26  
    27  // cmpstackvarlt reports whether the stack variable a sorts before b.
    28  func cmpstackvarlt(a, b *ir.Name, mls *liveness.MergeLocalsState) bool {
    29  	// Sort non-autos before autos.
    30  	if needAlloc(a) != needAlloc(b) {
    31  		return needAlloc(b)
    32  	}
    33  
    34  	// If both are non-auto (e.g., parameters, results), then sort by
    35  	// frame offset (defined by ABI).
    36  	if !needAlloc(a) {
    37  		return a.FrameOffset() < b.FrameOffset()
    38  	}
    39  
    40  	// From here on, a and b are both autos (i.e., local variables).
    41  
    42  	// Sort followers after leaders, if mls != nil
    43  	if mls != nil {
    44  		aFollow := mls.Subsumed(a)
    45  		bFollow := mls.Subsumed(b)
    46  		if aFollow != bFollow {
    47  			return bFollow
    48  		}
    49  	}
    50  
    51  	// Sort used before unused (so AllocFrame can truncate unused
    52  	// variables).
    53  	if a.Used() != b.Used() {
    54  		return a.Used()
    55  	}
    56  
    57  	// Sort pointer-typed before non-pointer types.
    58  	// Keeps the stack's GC bitmap compact.
    59  	ap := a.Type().HasPointers()
    60  	bp := b.Type().HasPointers()
    61  	if ap != bp {
    62  		return ap
    63  	}
    64  
    65  	// Group variables that need zeroing, so we can efficiently zero
    66  	// them altogether.
    67  	ap = a.Needzero()
    68  	bp = b.Needzero()
    69  	if ap != bp {
    70  		return ap
    71  	}
    72  
    73  	// Sort variables in descending alignment order, so we can optimally
    74  	// pack variables into the frame.
    75  	if a.Type().Alignment() != b.Type().Alignment() {
    76  		return a.Type().Alignment() > b.Type().Alignment()
    77  	}
    78  
    79  	// Sort normal variables before open-coded-defer slots, so that the
    80  	// latter are grouped together and near the top of the frame (to
    81  	// minimize varint encoding of their varp offset).
    82  	if a.OpenDeferSlot() != b.OpenDeferSlot() {
    83  		return a.OpenDeferSlot()
    84  	}
    85  
    86  	// If a and b are both open-coded defer slots, then order them by
    87  	// index in descending order, so they'll be laid out in the frame in
    88  	// ascending order.
    89  	//
    90  	// Their index was saved in FrameOffset in state.openDeferSave.
    91  	if a.OpenDeferSlot() {
    92  		return a.FrameOffset() > b.FrameOffset()
    93  	}
    94  
    95  	// Tie breaker for stable results.
    96  	return a.Sym().Name < b.Sym().Name
    97  }
    98  
    99  // needAlloc reports whether n is within the current frame, for which we need to
   100  // allocate space. In particular, it excludes arguments and results, which are in
   101  // the callers frame.
   102  func needAlloc(n *ir.Name) bool {
   103  	if n.Op() != ir.ONAME {
   104  		base.FatalfAt(n.Pos(), "%v has unexpected Op %v", n, n.Op())
   105  	}
   106  
   107  	switch n.Class {
   108  	case ir.PAUTO:
   109  		return true
   110  	case ir.PPARAM:
   111  		return false
   112  	case ir.PPARAMOUT:
   113  		return n.IsOutputParamInRegisters()
   114  
   115  	default:
   116  		base.FatalfAt(n.Pos(), "%v has unexpected Class %v", n, n.Class)
   117  		return false
   118  	}
   119  }
   120  
   121  func (s *ssafn) AllocFrame(f *ssa.Func) {
   122  	s.stksize = 0
   123  	s.stkptrsize = 0
   124  	s.stkalign = int64(types.RegSize)
   125  	fn := s.curfn
   126  
   127  	// Mark the PAUTO's unused.
   128  	for _, ln := range fn.Dcl {
   129  		if ln.OpenDeferSlot() {
   130  			// Open-coded defer slots have indices that were assigned
   131  			// upfront during SSA construction, but the defer statement can
   132  			// later get removed during deadcode elimination (#61895). To
   133  			// keep their relative offsets correct, treat them all as used.
   134  			continue
   135  		}
   136  
   137  		if needAlloc(ln) {
   138  			ln.SetUsed(false)
   139  		}
   140  	}
   141  
   142  	for _, l := range f.RegAlloc {
   143  		if ls, ok := l.(ssa.LocalSlot); ok {
   144  			ls.N.SetUsed(true)
   145  		}
   146  	}
   147  
   148  	for _, b := range f.Blocks {
   149  		for _, v := range b.Values {
   150  			if n, ok := v.Aux.(*ir.Name); ok {
   151  				switch n.Class {
   152  				case ir.PPARAMOUT:
   153  					if n.IsOutputParamInRegisters() && v.Op == ssa.OpVarDef {
   154  						// ignore VarDef, look for "real" uses.
   155  						// TODO: maybe do this for PAUTO as well?
   156  						continue
   157  					}
   158  					fallthrough
   159  				case ir.PPARAM, ir.PAUTO:
   160  					n.SetUsed(true)
   161  				}
   162  			}
   163  		}
   164  	}
   165  
   166  	var mls *liveness.MergeLocalsState
   167  	var leaders map[*ir.Name]int64
   168  	if base.Debug.MergeLocals != 0 {
   169  		mls = liveness.MergeLocals(fn, f)
   170  		if base.Debug.MergeLocalsTrace > 0 && mls != nil {
   171  			savedNP, savedP := mls.EstSavings()
   172  			fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging (%d nonpointer %d pointer)\n", ir.FuncName(fn), savedNP+savedP, savedNP, savedP)
   173  			if base.Debug.MergeLocalsTrace > 1 {
   174  				fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
   175  					fn, mls)
   176  			}
   177  		}
   178  		leaders = make(map[*ir.Name]int64)
   179  	}
   180  
   181  	// Use sort.SliceStable instead of sort.Slice so stack layout (and thus
   182  	// compiler output) is less sensitive to frontend changes that
   183  	// introduce or remove unused variables.
   184  	sort.SliceStable(fn.Dcl, func(i, j int) bool {
   185  		return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j], mls)
   186  	})
   187  
   188  	if mls != nil {
   189  		// Rewrite fn.Dcl to reposition followers (subsumed vars) to
   190  		// be immediately following the leader var in their partition.
   191  		followers := []*ir.Name{}
   192  		newdcl := make([]*ir.Name, 0, len(fn.Dcl))
   193  		for i := 0; i < len(fn.Dcl); i++ {
   194  			n := fn.Dcl[i]
   195  			if mls.Subsumed(n) {
   196  				continue
   197  			}
   198  			newdcl = append(newdcl, n)
   199  			if mls.IsLeader(n) {
   200  				followers = mls.Followers(n, followers)
   201  				// position followers immediately after leader
   202  				newdcl = append(newdcl, followers...)
   203  			}
   204  		}
   205  		fn.Dcl = newdcl
   206  	}
   207  
   208  	if base.Debug.MergeLocalsTrace > 1 && mls != nil {
   209  		fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
   210  		for i, v := range fn.Dcl {
   211  			if !ssa.IsMergeCandidate(v) {
   212  				continue
   213  			}
   214  			fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used(), v.Type().Size(), v.Type().Alignment(), v.Type().String())
   215  		}
   216  	}
   217  
   218  	// Reassign stack offsets of the locals that are used.
   219  	lastHasPtr := false
   220  	for i, n := range fn.Dcl {
   221  		if n.Op() != ir.ONAME || n.Class != ir.PAUTO && !(n.Class == ir.PPARAMOUT && n.IsOutputParamInRegisters()) {
   222  			// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
   223  			continue
   224  		}
   225  		if mls != nil && mls.Subsumed(n) {
   226  			continue
   227  		}
   228  		if !n.Used() {
   229  			fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
   230  			fn.Dcl = fn.Dcl[:i]
   231  			break
   232  		}
   233  		types.CalcSize(n.Type())
   234  		w := n.Type().Size()
   235  		if w >= types.MaxWidth || w < 0 {
   236  			base.Fatalf("bad width")
   237  		}
   238  		if w == 0 && lastHasPtr {
   239  			// Pad between a pointer-containing object and a zero-sized object.
   240  			// This prevents a pointer to the zero-sized object from being interpreted
   241  			// as a pointer to the pointer-containing object (and causing it
   242  			// to be scanned when it shouldn't be). See issue 24993.
   243  			w = 1
   244  		}
   245  		s.stksize += w
   246  		s.stksize = types.RoundUp(s.stksize, n.Type().Alignment())
   247  		if n.Type().Alignment() > int64(types.RegSize) {
   248  			s.stkalign = n.Type().Alignment()
   249  		}
   250  		if n.Type().HasPointers() {
   251  			s.stkptrsize = s.stksize
   252  			lastHasPtr = true
   253  		} else {
   254  			lastHasPtr = false
   255  		}
   256  		n.SetFrameOffset(-s.stksize)
   257  		if mls != nil && mls.IsLeader(n) {
   258  			leaders[n] = -s.stksize
   259  		}
   260  	}
   261  
   262  	if mls != nil {
   263  		// Update offsets of followers (subsumed vars) to be the
   264  		// same as the leader var in their partition.
   265  		for i := 0; i < len(fn.Dcl); i++ {
   266  			n := fn.Dcl[i]
   267  			if !mls.Subsumed(n) {
   268  				continue
   269  			}
   270  			leader := mls.Leader(n)
   271  			off, ok := leaders[leader]
   272  			if !ok {
   273  				panic("internal error missing leader")
   274  			}
   275  			// Set the stack offset this subsumed (followed) var
   276  			// to be the same as the leader.
   277  			n.SetFrameOffset(off)
   278  		}
   279  
   280  		if base.Debug.MergeLocalsTrace > 1 {
   281  			fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
   282  			for i, v := range fn.Dcl {
   283  				if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
   284  					continue
   285  				}
   286  				fmt.Fprintf(os.Stderr, " %d: %q frameoff %d isleader=%v subsumed=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, v.FrameOffset(), mls.IsLeader(v), mls.Subsumed(v), v.Type().Size(), v.Type().Alignment(), v.Type().String())
   287  			}
   288  		}
   289  	}
   290  
   291  	s.stksize = types.RoundUp(s.stksize, s.stkalign)
   292  	s.stkptrsize = types.RoundUp(s.stkptrsize, s.stkalign)
   293  }
   294  
   295  const maxStackSize = 1 << 30
   296  
   297  // Compile builds an SSA backend function,
   298  // uses it to generate a plist,
   299  // and flushes that plist to machine code.
   300  // worker indicates which of the backend workers is doing the processing.
   301  func Compile(fn *ir.Func, worker int, profile *pgoir.Profile) {
   302  	f := buildssa(fn, worker, inline.IsPgoHotFunc(fn, profile) || inline.HasPgoHotInline(fn))
   303  	// Note: check arg size to fix issue 25507.
   304  	if f.Frontend().(*ssafn).stksize >= maxStackSize || f.OwnAux.ArgWidth() >= maxStackSize {
   305  		largeStackFramesMu.Lock()
   306  		largeStackFrames = append(largeStackFrames, largeStack{locals: f.Frontend().(*ssafn).stksize, args: f.OwnAux.ArgWidth(), pos: fn.Pos()})
   307  		largeStackFramesMu.Unlock()
   308  		return
   309  	}
   310  	pp := objw.NewProgs(fn, worker)
   311  	defer pp.Free()
   312  	genssa(f, pp)
   313  	// Check frame size again.
   314  	// The check above included only the space needed for local variables.
   315  	// After genssa, the space needed includes local variables and the callee arg region.
   316  	// We must do this check prior to calling pp.Flush.
   317  	// If there are any oversized stack frames,
   318  	// the assembler may emit inscrutable complaints about invalid instructions.
   319  	if pp.Text.To.Offset >= maxStackSize {
   320  		largeStackFramesMu.Lock()
   321  		locals := f.Frontend().(*ssafn).stksize
   322  		largeStackFrames = append(largeStackFrames, largeStack{locals: locals, args: f.OwnAux.ArgWidth(), callee: pp.Text.To.Offset - locals, pos: fn.Pos()})
   323  		largeStackFramesMu.Unlock()
   324  		return
   325  	}
   326  
   327  	pp.Flush() // assemble, fill in boilerplate, etc.
   328  
   329  	// If we're compiling the package init function, search for any
   330  	// relocations that target global map init outline functions and
   331  	// turn them into weak relocs.
   332  	if fn.IsPackageInit() && base.Debug.WrapGlobalMapCtl != 1 {
   333  		weakenGlobalMapInitRelocs(fn)
   334  	}
   335  
   336  	// fieldtrack must be called after pp.Flush. See issue 20014.
   337  	fieldtrack(pp.Text.From.Sym, fn.FieldTrack)
   338  }
   339  
   340  // globalMapInitLsyms records the LSym of each map.init.NNN outlined
   341  // map initializer function created by the compiler.
   342  var globalMapInitLsyms map[*obj.LSym]struct{}
   343  
   344  // RegisterMapInitLsym records "s" in the set of outlined map initializer
   345  // functions.
   346  func RegisterMapInitLsym(s *obj.LSym) {
   347  	if globalMapInitLsyms == nil {
   348  		globalMapInitLsyms = make(map[*obj.LSym]struct{})
   349  	}
   350  	globalMapInitLsyms[s] = struct{}{}
   351  }
   352  
   353  // weakenGlobalMapInitRelocs walks through all of the relocations on a
   354  // given a package init function "fn" and looks for relocs that target
   355  // outlined global map initializer functions; if it finds any such
   356  // relocs, it flags them as R_WEAK.
   357  func weakenGlobalMapInitRelocs(fn *ir.Func) {
   358  	if globalMapInitLsyms == nil {
   359  		return
   360  	}
   361  	for i := range fn.LSym.R {
   362  		tgt := fn.LSym.R[i].Sym
   363  		if tgt == nil {
   364  			continue
   365  		}
   366  		if _, ok := globalMapInitLsyms[tgt]; !ok {
   367  			continue
   368  		}
   369  		if base.Debug.WrapGlobalMapDbg > 1 {
   370  			fmt.Fprintf(os.Stderr, "=-= weakify fn %v reloc %d %+v\n", fn, i,
   371  				fn.LSym.R[i])
   372  		}
   373  		// set the R_WEAK bit, leave rest of reloc type intact
   374  		fn.LSym.R[i].Type |= objabi.R_WEAK
   375  	}
   376  }
   377  
   378  // StackOffset returns the stack location of a LocalSlot relative to the
   379  // stack pointer, suitable for use in a DWARF location entry. This has nothing
   380  // to do with its offset in the user variable.
   381  func StackOffset(slot ssa.LocalSlot) int32 {
   382  	n := slot.N
   383  	var off int64
   384  	switch n.Class {
   385  	case ir.PPARAM, ir.PPARAMOUT:
   386  		if !n.IsOutputParamInRegisters() {
   387  			off = n.FrameOffset() + base.Ctxt.Arch.FixedFrameSize
   388  			break
   389  		}
   390  		fallthrough // PPARAMOUT in registers allocates like an AUTO
   391  	case ir.PAUTO:
   392  		off = n.FrameOffset()
   393  		if base.Ctxt.Arch.FixedFrameSize == 0 {
   394  			off -= int64(types.PtrSize)
   395  		}
   396  		if buildcfg.FramePointerEnabled {
   397  			off -= int64(types.PtrSize)
   398  		}
   399  	}
   400  	return int32(off + slot.Off)
   401  }
   402  
   403  // fieldtrack adds R_USEFIELD relocations to fnsym to record any
   404  // struct fields that it used.
   405  func fieldtrack(fnsym *obj.LSym, tracked map[*obj.LSym]struct{}) {
   406  	if fnsym == nil {
   407  		return
   408  	}
   409  	if !buildcfg.Experiment.FieldTrack || len(tracked) == 0 {
   410  		return
   411  	}
   412  
   413  	trackSyms := make([]*obj.LSym, 0, len(tracked))
   414  	for sym := range tracked {
   415  		trackSyms = append(trackSyms, sym)
   416  	}
   417  	sort.Slice(trackSyms, func(i, j int) bool { return trackSyms[i].Name < trackSyms[j].Name })
   418  	for _, sym := range trackSyms {
   419  		r := obj.Addrel(fnsym)
   420  		r.Sym = sym
   421  		r.Type = objabi.R_USEFIELD
   422  	}
   423  }
   424  
   425  // largeStack is info about a function whose stack frame is too large (rare).
   426  type largeStack struct {
   427  	locals int64
   428  	args   int64
   429  	callee int64
   430  	pos    src.XPos
   431  }
   432  
   433  var (
   434  	largeStackFramesMu sync.Mutex // protects largeStackFrames
   435  	largeStackFrames   []largeStack
   436  )
   437  
   438  func CheckLargeStacks() {
   439  	// Check whether any of the functions we have compiled have gigantic stack frames.
   440  	sort.Slice(largeStackFrames, func(i, j int) bool {
   441  		return largeStackFrames[i].pos.Before(largeStackFrames[j].pos)
   442  	})
   443  	for _, large := range largeStackFrames {
   444  		if large.callee != 0 {
   445  			base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args + %d MB callee", large.locals>>20, large.args>>20, large.callee>>20)
   446  		} else {
   447  			base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args", large.locals>>20, large.args>>20)
   448  		}
   449  	}
   450  }
   451  

View as plain text