Source file src/cmd/compile/internal/ppc64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/objw"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/ssagen"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/ppc64"
    17  	"internal/buildcfg"
    18  	"math"
    19  	"strings"
    20  )
    21  
    22  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    23  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    24  	//	flive := b.FlagsLiveAtEnd
    25  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    26  	//		flive = true
    27  	//	}
    28  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    29  	//		v := b.Values[i]
    30  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    31  	//			// The "mark" is any non-nil Aux value.
    32  	//			v.Aux = v
    33  	//		}
    34  	//		if v.Type.IsFlags() {
    35  	//			flive = false
    36  	//		}
    37  	//		for _, a := range v.Args {
    38  	//			if a.Type.IsFlags() {
    39  	//				flive = true
    40  	//			}
    41  	//		}
    42  	//	}
    43  }
    44  
    45  // loadByType returns the load instruction of the given type.
    46  func loadByType(t *types.Type) obj.As {
    47  	if t.IsFloat() {
    48  		switch t.Size() {
    49  		case 4:
    50  			return ppc64.AFMOVS
    51  		case 8:
    52  			return ppc64.AFMOVD
    53  		}
    54  	} else {
    55  		switch t.Size() {
    56  		case 1:
    57  			if t.IsSigned() {
    58  				return ppc64.AMOVB
    59  			} else {
    60  				return ppc64.AMOVBZ
    61  			}
    62  		case 2:
    63  			if t.IsSigned() {
    64  				return ppc64.AMOVH
    65  			} else {
    66  				return ppc64.AMOVHZ
    67  			}
    68  		case 4:
    69  			if t.IsSigned() {
    70  				return ppc64.AMOVW
    71  			} else {
    72  				return ppc64.AMOVWZ
    73  			}
    74  		case 8:
    75  			return ppc64.AMOVD
    76  		}
    77  	}
    78  	panic("bad load type")
    79  }
    80  
    81  // storeByType returns the store instruction of the given type.
    82  func storeByType(t *types.Type) obj.As {
    83  	if t.IsFloat() {
    84  		switch t.Size() {
    85  		case 4:
    86  			return ppc64.AFMOVS
    87  		case 8:
    88  			return ppc64.AFMOVD
    89  		}
    90  	} else {
    91  		switch t.Size() {
    92  		case 1:
    93  			return ppc64.AMOVB
    94  		case 2:
    95  			return ppc64.AMOVH
    96  		case 4:
    97  			return ppc64.AMOVW
    98  		case 8:
    99  			return ppc64.AMOVD
   100  		}
   101  	}
   102  	panic("bad store type")
   103  }
   104  
   105  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   106  	switch v.Op {
   107  	case ssa.OpCopy:
   108  		t := v.Type
   109  		if t.IsMemory() {
   110  			return
   111  		}
   112  		x := v.Args[0].Reg()
   113  		y := v.Reg()
   114  		if x != y {
   115  			rt := obj.TYPE_REG
   116  			op := ppc64.AMOVD
   117  
   118  			if t.IsFloat() {
   119  				op = ppc64.AFMOVD
   120  			}
   121  			p := s.Prog(op)
   122  			p.From.Type = rt
   123  			p.From.Reg = x
   124  			p.To.Type = rt
   125  			p.To.Reg = y
   126  		}
   127  
   128  	case ssa.OpPPC64LoweredAtomicAnd8,
   129  		ssa.OpPPC64LoweredAtomicAnd32,
   130  		ssa.OpPPC64LoweredAtomicOr8,
   131  		ssa.OpPPC64LoweredAtomicOr32:
   132  		// LWSYNC
   133  		// LBAR/LWAR	(Rarg0), Rtmp
   134  		// AND/OR	Rarg1, Rtmp
   135  		// STBCCC/STWCCC Rtmp, (Rarg0)
   136  		// BNE		-3(PC)
   137  		ld := ppc64.ALBAR
   138  		st := ppc64.ASTBCCC
   139  		if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
   140  			ld = ppc64.ALWAR
   141  			st = ppc64.ASTWCCC
   142  		}
   143  		r0 := v.Args[0].Reg()
   144  		r1 := v.Args[1].Reg()
   145  		// LWSYNC - Assuming shared data not write-through-required nor
   146  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   147  		plwsync := s.Prog(ppc64.ALWSYNC)
   148  		plwsync.To.Type = obj.TYPE_NONE
   149  		// LBAR or LWAR
   150  		p := s.Prog(ld)
   151  		p.From.Type = obj.TYPE_MEM
   152  		p.From.Reg = r0
   153  		p.To.Type = obj.TYPE_REG
   154  		p.To.Reg = ppc64.REGTMP
   155  		// AND/OR reg1,out
   156  		p1 := s.Prog(v.Op.Asm())
   157  		p1.From.Type = obj.TYPE_REG
   158  		p1.From.Reg = r1
   159  		p1.To.Type = obj.TYPE_REG
   160  		p1.To.Reg = ppc64.REGTMP
   161  		// STBCCC or STWCCC
   162  		p2 := s.Prog(st)
   163  		p2.From.Type = obj.TYPE_REG
   164  		p2.From.Reg = ppc64.REGTMP
   165  		p2.To.Type = obj.TYPE_MEM
   166  		p2.To.Reg = r0
   167  		p2.RegTo2 = ppc64.REGTMP
   168  		// BNE retry
   169  		p3 := s.Prog(ppc64.ABNE)
   170  		p3.To.Type = obj.TYPE_BRANCH
   171  		p3.To.SetTarget(p)
   172  
   173  	case ssa.OpPPC64LoweredAtomicAdd32,
   174  		ssa.OpPPC64LoweredAtomicAdd64:
   175  		// LWSYNC
   176  		// LDAR/LWAR    (Rarg0), Rout
   177  		// ADD		Rarg1, Rout
   178  		// STDCCC/STWCCC Rout, (Rarg0)
   179  		// BNE         -3(PC)
   180  		// MOVW		Rout,Rout (if Add32)
   181  		ld := ppc64.ALDAR
   182  		st := ppc64.ASTDCCC
   183  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   184  			ld = ppc64.ALWAR
   185  			st = ppc64.ASTWCCC
   186  		}
   187  		r0 := v.Args[0].Reg()
   188  		r1 := v.Args[1].Reg()
   189  		out := v.Reg0()
   190  		// LWSYNC - Assuming shared data not write-through-required nor
   191  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   192  		plwsync := s.Prog(ppc64.ALWSYNC)
   193  		plwsync.To.Type = obj.TYPE_NONE
   194  		// LDAR or LWAR
   195  		p := s.Prog(ld)
   196  		p.From.Type = obj.TYPE_MEM
   197  		p.From.Reg = r0
   198  		p.To.Type = obj.TYPE_REG
   199  		p.To.Reg = out
   200  		// ADD reg1,out
   201  		p1 := s.Prog(ppc64.AADD)
   202  		p1.From.Type = obj.TYPE_REG
   203  		p1.From.Reg = r1
   204  		p1.To.Reg = out
   205  		p1.To.Type = obj.TYPE_REG
   206  		// STDCCC or STWCCC
   207  		p3 := s.Prog(st)
   208  		p3.From.Type = obj.TYPE_REG
   209  		p3.From.Reg = out
   210  		p3.To.Type = obj.TYPE_MEM
   211  		p3.To.Reg = r0
   212  		// BNE retry
   213  		p4 := s.Prog(ppc64.ABNE)
   214  		p4.To.Type = obj.TYPE_BRANCH
   215  		p4.To.SetTarget(p)
   216  
   217  		// Ensure a 32 bit result
   218  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   219  			p5 := s.Prog(ppc64.AMOVWZ)
   220  			p5.To.Type = obj.TYPE_REG
   221  			p5.To.Reg = out
   222  			p5.From.Type = obj.TYPE_REG
   223  			p5.From.Reg = out
   224  		}
   225  
   226  	case ssa.OpPPC64LoweredAtomicExchange32,
   227  		ssa.OpPPC64LoweredAtomicExchange64:
   228  		// LWSYNC
   229  		// LDAR/LWAR    (Rarg0), Rout
   230  		// STDCCC/STWCCC Rout, (Rarg0)
   231  		// BNE         -2(PC)
   232  		// ISYNC
   233  		ld := ppc64.ALDAR
   234  		st := ppc64.ASTDCCC
   235  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   236  			ld = ppc64.ALWAR
   237  			st = ppc64.ASTWCCC
   238  		}
   239  		r0 := v.Args[0].Reg()
   240  		r1 := v.Args[1].Reg()
   241  		out := v.Reg0()
   242  		// LWSYNC - Assuming shared data not write-through-required nor
   243  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   244  		plwsync := s.Prog(ppc64.ALWSYNC)
   245  		plwsync.To.Type = obj.TYPE_NONE
   246  		// LDAR or LWAR
   247  		p := s.Prog(ld)
   248  		p.From.Type = obj.TYPE_MEM
   249  		p.From.Reg = r0
   250  		p.To.Type = obj.TYPE_REG
   251  		p.To.Reg = out
   252  		// STDCCC or STWCCC
   253  		p1 := s.Prog(st)
   254  		p1.From.Type = obj.TYPE_REG
   255  		p1.From.Reg = r1
   256  		p1.To.Type = obj.TYPE_MEM
   257  		p1.To.Reg = r0
   258  		// BNE retry
   259  		p2 := s.Prog(ppc64.ABNE)
   260  		p2.To.Type = obj.TYPE_BRANCH
   261  		p2.To.SetTarget(p)
   262  		// ISYNC
   263  		pisync := s.Prog(ppc64.AISYNC)
   264  		pisync.To.Type = obj.TYPE_NONE
   265  
   266  	case ssa.OpPPC64LoweredAtomicLoad8,
   267  		ssa.OpPPC64LoweredAtomicLoad32,
   268  		ssa.OpPPC64LoweredAtomicLoad64,
   269  		ssa.OpPPC64LoweredAtomicLoadPtr:
   270  		// SYNC
   271  		// MOVB/MOVD/MOVW (Rarg0), Rout
   272  		// CMP Rout,Rout
   273  		// BNE 1(PC)
   274  		// ISYNC
   275  		ld := ppc64.AMOVD
   276  		cmp := ppc64.ACMP
   277  		switch v.Op {
   278  		case ssa.OpPPC64LoweredAtomicLoad8:
   279  			ld = ppc64.AMOVBZ
   280  		case ssa.OpPPC64LoweredAtomicLoad32:
   281  			ld = ppc64.AMOVWZ
   282  			cmp = ppc64.ACMPW
   283  		}
   284  		arg0 := v.Args[0].Reg()
   285  		out := v.Reg0()
   286  		// SYNC when AuxInt == 1; otherwise, load-acquire
   287  		if v.AuxInt == 1 {
   288  			psync := s.Prog(ppc64.ASYNC)
   289  			psync.To.Type = obj.TYPE_NONE
   290  		}
   291  		// Load
   292  		p := s.Prog(ld)
   293  		p.From.Type = obj.TYPE_MEM
   294  		p.From.Reg = arg0
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = out
   297  		// CMP
   298  		p1 := s.Prog(cmp)
   299  		p1.From.Type = obj.TYPE_REG
   300  		p1.From.Reg = out
   301  		p1.To.Type = obj.TYPE_REG
   302  		p1.To.Reg = out
   303  		// BNE
   304  		p2 := s.Prog(ppc64.ABNE)
   305  		p2.To.Type = obj.TYPE_BRANCH
   306  		// ISYNC
   307  		pisync := s.Prog(ppc64.AISYNC)
   308  		pisync.To.Type = obj.TYPE_NONE
   309  		p2.To.SetTarget(pisync)
   310  
   311  	case ssa.OpPPC64LoweredAtomicStore8,
   312  		ssa.OpPPC64LoweredAtomicStore32,
   313  		ssa.OpPPC64LoweredAtomicStore64:
   314  		// SYNC or LWSYNC
   315  		// MOVB/MOVW/MOVD arg1,(arg0)
   316  		st := ppc64.AMOVD
   317  		switch v.Op {
   318  		case ssa.OpPPC64LoweredAtomicStore8:
   319  			st = ppc64.AMOVB
   320  		case ssa.OpPPC64LoweredAtomicStore32:
   321  			st = ppc64.AMOVW
   322  		}
   323  		arg0 := v.Args[0].Reg()
   324  		arg1 := v.Args[1].Reg()
   325  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   326  		// SYNC
   327  		syncOp := ppc64.ASYNC
   328  		if v.AuxInt == 0 {
   329  			syncOp = ppc64.ALWSYNC
   330  		}
   331  		psync := s.Prog(syncOp)
   332  		psync.To.Type = obj.TYPE_NONE
   333  		// Store
   334  		p := s.Prog(st)
   335  		p.To.Type = obj.TYPE_MEM
   336  		p.To.Reg = arg0
   337  		p.From.Type = obj.TYPE_REG
   338  		p.From.Reg = arg1
   339  
   340  	case ssa.OpPPC64LoweredAtomicCas64,
   341  		ssa.OpPPC64LoweredAtomicCas32:
   342  		// MOVD        $0, Rout
   343  		// LWSYNC
   344  		// loop:
   345  		// LDAR        (Rarg0), MutexHint, Rtmp
   346  		// CMP         Rarg1, Rtmp
   347  		// BNE         end
   348  		// STDCCC      Rarg2, (Rarg0)
   349  		// BNE         loop
   350  		// MOVD        $1, Rout
   351  		// end:
   352  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   353  		ld := ppc64.ALDAR
   354  		st := ppc64.ASTDCCC
   355  		cmp := ppc64.ACMP
   356  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   357  			ld = ppc64.ALWAR
   358  			st = ppc64.ASTWCCC
   359  			cmp = ppc64.ACMPW
   360  		}
   361  		r0 := v.Args[0].Reg()
   362  		r1 := v.Args[1].Reg()
   363  		r2 := v.Args[2].Reg()
   364  		out := v.Reg0()
   365  		// Initialize return value to false
   366  		p := s.Prog(ppc64.AMOVD)
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = 0
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = out
   371  		// LWSYNC - Assuming shared data not write-through-required nor
   372  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   373  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   374  		plwsync1.To.Type = obj.TYPE_NONE
   375  		// LDAR or LWAR
   376  		p0 := s.Prog(ld)
   377  		p0.From.Type = obj.TYPE_MEM
   378  		p0.From.Reg = r0
   379  		p0.To.Type = obj.TYPE_REG
   380  		p0.To.Reg = ppc64.REGTMP
   381  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   382  		// the release hint.
   383  		if v.AuxInt == 0 {
   384  			p0.AddRestSourceConst(0)
   385  		}
   386  		// CMP reg1,reg2
   387  		p1 := s.Prog(cmp)
   388  		p1.From.Type = obj.TYPE_REG
   389  		p1.From.Reg = r1
   390  		p1.To.Reg = ppc64.REGTMP
   391  		p1.To.Type = obj.TYPE_REG
   392  		// BNE done with return value = false
   393  		p2 := s.Prog(ppc64.ABNE)
   394  		p2.To.Type = obj.TYPE_BRANCH
   395  		// STDCCC or STWCCC
   396  		p3 := s.Prog(st)
   397  		p3.From.Type = obj.TYPE_REG
   398  		p3.From.Reg = r2
   399  		p3.To.Type = obj.TYPE_MEM
   400  		p3.To.Reg = r0
   401  		// BNE retry
   402  		p4 := s.Prog(ppc64.ABNE)
   403  		p4.To.Type = obj.TYPE_BRANCH
   404  		p4.To.SetTarget(p0)
   405  		// return value true
   406  		p5 := s.Prog(ppc64.AMOVD)
   407  		p5.From.Type = obj.TYPE_CONST
   408  		p5.From.Offset = 1
   409  		p5.To.Type = obj.TYPE_REG
   410  		p5.To.Reg = out
   411  		// LWSYNC - Assuming shared data not write-through-required nor
   412  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   413  		// If the operation is a CAS-Release, then synchronization is not necessary.
   414  		if v.AuxInt != 0 {
   415  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   416  			plwsync2.To.Type = obj.TYPE_NONE
   417  			p2.To.SetTarget(plwsync2)
   418  		} else {
   419  			// done (label)
   420  			p6 := s.Prog(obj.ANOP)
   421  			p2.To.SetTarget(p6)
   422  		}
   423  
   424  	case ssa.OpPPC64LoweredPubBarrier:
   425  		// LWSYNC
   426  		s.Prog(v.Op.Asm())
   427  
   428  	case ssa.OpPPC64LoweredGetClosurePtr:
   429  		// Closure pointer is R11 (already)
   430  		ssagen.CheckLoweredGetClosurePtr(v)
   431  
   432  	case ssa.OpPPC64LoweredGetCallerSP:
   433  		// caller's SP is FixedFrameSize below the address of the first arg
   434  		p := s.Prog(ppc64.AMOVD)
   435  		p.From.Type = obj.TYPE_ADDR
   436  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
   437  		p.From.Name = obj.NAME_PARAM
   438  		p.To.Type = obj.TYPE_REG
   439  		p.To.Reg = v.Reg()
   440  
   441  	case ssa.OpPPC64LoweredGetCallerPC:
   442  		p := s.Prog(obj.AGETCALLERPC)
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg()
   445  
   446  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   447  		// input is already rounded
   448  
   449  	case ssa.OpLoadReg:
   450  		loadOp := loadByType(v.Type)
   451  		p := s.Prog(loadOp)
   452  		ssagen.AddrAuto(&p.From, v.Args[0])
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = v.Reg()
   455  
   456  	case ssa.OpStoreReg:
   457  		storeOp := storeByType(v.Type)
   458  		p := s.Prog(storeOp)
   459  		p.From.Type = obj.TYPE_REG
   460  		p.From.Reg = v.Args[0].Reg()
   461  		ssagen.AddrAuto(&p.To, v)
   462  
   463  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   464  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   465  		// The loop only runs once.
   466  		for _, a := range v.Block.Func.RegArgs {
   467  			// Pass the spill/unspill information along to the assembler, offset by size of
   468  			// the saved LR slot.
   469  			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   470  			s.FuncInfo().AddSpill(
   471  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   472  		}
   473  		v.Block.Func.RegArgs = nil
   474  
   475  		ssagen.CheckArgReg(v)
   476  
   477  	case ssa.OpPPC64DIVD:
   478  		// For now,
   479  		//
   480  		// cmp arg1, -1
   481  		// be  ahead
   482  		// v = arg0 / arg1
   483  		// b over
   484  		// ahead: v = - arg0
   485  		// over: nop
   486  		r := v.Reg()
   487  		r0 := v.Args[0].Reg()
   488  		r1 := v.Args[1].Reg()
   489  
   490  		p := s.Prog(ppc64.ACMP)
   491  		p.From.Type = obj.TYPE_REG
   492  		p.From.Reg = r1
   493  		p.To.Type = obj.TYPE_CONST
   494  		p.To.Offset = -1
   495  
   496  		pbahead := s.Prog(ppc64.ABEQ)
   497  		pbahead.To.Type = obj.TYPE_BRANCH
   498  
   499  		p = s.Prog(v.Op.Asm())
   500  		p.From.Type = obj.TYPE_REG
   501  		p.From.Reg = r1
   502  		p.Reg = r0
   503  		p.To.Type = obj.TYPE_REG
   504  		p.To.Reg = r
   505  
   506  		pbover := s.Prog(obj.AJMP)
   507  		pbover.To.Type = obj.TYPE_BRANCH
   508  
   509  		p = s.Prog(ppc64.ANEG)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = r
   512  		p.From.Type = obj.TYPE_REG
   513  		p.From.Reg = r0
   514  		pbahead.To.SetTarget(p)
   515  
   516  		p = s.Prog(obj.ANOP)
   517  		pbover.To.SetTarget(p)
   518  
   519  	case ssa.OpPPC64DIVW:
   520  		// word-width version of above
   521  		r := v.Reg()
   522  		r0 := v.Args[0].Reg()
   523  		r1 := v.Args[1].Reg()
   524  
   525  		p := s.Prog(ppc64.ACMPW)
   526  		p.From.Type = obj.TYPE_REG
   527  		p.From.Reg = r1
   528  		p.To.Type = obj.TYPE_CONST
   529  		p.To.Offset = -1
   530  
   531  		pbahead := s.Prog(ppc64.ABEQ)
   532  		pbahead.To.Type = obj.TYPE_BRANCH
   533  
   534  		p = s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = r1
   537  		p.Reg = r0
   538  		p.To.Type = obj.TYPE_REG
   539  		p.To.Reg = r
   540  
   541  		pbover := s.Prog(obj.AJMP)
   542  		pbover.To.Type = obj.TYPE_BRANCH
   543  
   544  		p = s.Prog(ppc64.ANEG)
   545  		p.To.Type = obj.TYPE_REG
   546  		p.To.Reg = r
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = r0
   549  		pbahead.To.SetTarget(p)
   550  
   551  		p = s.Prog(obj.ANOP)
   552  		pbover.To.SetTarget(p)
   553  
   554  	case ssa.OpPPC64CLRLSLWI:
   555  		r := v.Reg()
   556  		r1 := v.Args[0].Reg()
   557  		shifts := v.AuxInt
   558  		p := s.Prog(v.Op.Asm())
   559  		// clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
   560  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   561  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   562  		p.Reg = r1
   563  		p.To.Type = obj.TYPE_REG
   564  		p.To.Reg = r
   565  
   566  	case ssa.OpPPC64CLRLSLDI:
   567  		r := v.Reg()
   568  		r1 := v.Args[0].Reg()
   569  		shifts := v.AuxInt
   570  		p := s.Prog(v.Op.Asm())
   571  		// clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
   572  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   573  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   574  		p.Reg = r1
   575  		p.To.Type = obj.TYPE_REG
   576  		p.To.Reg = r
   577  
   578  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   579  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   580  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   581  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   582  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   583  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   584  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
   585  		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
   586  		r := v.Reg()
   587  		r1 := v.Args[0].Reg()
   588  		r2 := v.Args[1].Reg()
   589  		p := s.Prog(v.Op.Asm())
   590  		p.From.Type = obj.TYPE_REG
   591  		p.From.Reg = r2
   592  		p.Reg = r1
   593  		p.To.Type = obj.TYPE_REG
   594  		p.To.Reg = r
   595  
   596  	case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
   597  		ssa.OpPPC64ANDNCC:
   598  		r1 := v.Args[0].Reg()
   599  		r2 := v.Args[1].Reg()
   600  		p := s.Prog(v.Op.Asm())
   601  		p.From.Type = obj.TYPE_REG
   602  		p.From.Reg = r2
   603  		p.Reg = r1
   604  		p.To.Type = obj.TYPE_REG
   605  		p.To.Reg = v.Reg0()
   606  
   607  	case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.To.Type = obj.TYPE_REG
   610  		p.To.Reg = v.Reg0()
   611  		p.From.Type = obj.TYPE_REG
   612  		p.From.Reg = v.Args[0].Reg()
   613  
   614  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   615  		p := s.Prog(v.Op.Asm())
   616  		p.From.Type = obj.TYPE_CONST
   617  		p.From.Offset = v.AuxInt
   618  		p.Reg = v.Args[0].Reg()
   619  		p.To.Type = obj.TYPE_REG
   620  		p.To.Reg = v.Reg()
   621  
   622  		// Auxint holds encoded rotate + mask
   623  	case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
   624  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   625  		p := s.Prog(v.Op.Asm())
   626  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   627  		p.Reg = v.Args[0].Reg()
   628  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
   629  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   630  		// Auxint holds mask
   631  
   632  	case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
   633  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   634  		p := s.Prog(v.Op.Asm())
   635  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
   636  		switch v.Op {
   637  		case ssa.OpPPC64RLDICL:
   638  			p.AddRestSourceConst(mb)
   639  		case ssa.OpPPC64RLDICR:
   640  			p.AddRestSourceConst(me)
   641  		}
   642  		p.Reg = v.Args[0].Reg()
   643  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   644  
   645  	case ssa.OpPPC64RLWNM:
   646  		_, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   647  		p := s.Prog(v.Op.Asm())
   648  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   649  		p.Reg = v.Args[0].Reg()
   650  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
   651  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   652  
   653  	case ssa.OpPPC64MADDLD:
   654  		r := v.Reg()
   655  		r1 := v.Args[0].Reg()
   656  		r2 := v.Args[1].Reg()
   657  		r3 := v.Args[2].Reg()
   658  		// r = r1*r2 ± r3
   659  		p := s.Prog(v.Op.Asm())
   660  		p.From.Type = obj.TYPE_REG
   661  		p.From.Reg = r1
   662  		p.Reg = r2
   663  		p.AddRestSourceReg(r3)
   664  		p.To.Type = obj.TYPE_REG
   665  		p.To.Reg = r
   666  
   667  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   668  		r := v.Reg()
   669  		r1 := v.Args[0].Reg()
   670  		r2 := v.Args[1].Reg()
   671  		r3 := v.Args[2].Reg()
   672  		// r = r1*r2 ± r3
   673  		p := s.Prog(v.Op.Asm())
   674  		p.From.Type = obj.TYPE_REG
   675  		p.From.Reg = r1
   676  		p.Reg = r3
   677  		p.AddRestSourceReg(r2)
   678  		p.To.Type = obj.TYPE_REG
   679  		p.To.Reg = r
   680  
   681  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   682  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   683  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   684  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
   685  		r := v.Reg()
   686  		p := s.Prog(v.Op.Asm())
   687  		p.To.Type = obj.TYPE_REG
   688  		p.To.Reg = r
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = v.Args[0].Reg()
   691  
   692  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   693  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
   694  		ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
   695  		p := s.Prog(v.Op.Asm())
   696  		p.Reg = v.Args[0].Reg()
   697  		p.From.Type = obj.TYPE_CONST
   698  		p.From.Offset = v.AuxInt
   699  		p.To.Type = obj.TYPE_REG
   700  		p.To.Reg = v.Reg()
   701  
   702  	case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
   703  		r := v.Reg0() // CA is the first, implied argument.
   704  		r1 := v.Args[0].Reg()
   705  		r2 := v.Args[1].Reg()
   706  		p := s.Prog(v.Op.Asm())
   707  		p.From.Type = obj.TYPE_REG
   708  		p.From.Reg = r2
   709  		p.Reg = r1
   710  		p.To.Type = obj.TYPE_REG
   711  		p.To.Reg = r
   712  
   713  	case ssa.OpPPC64ADDZE:
   714  		p := s.Prog(v.Op.Asm())
   715  		p.From.Type = obj.TYPE_REG
   716  		p.From.Reg = v.Args[0].Reg()
   717  		p.To.Type = obj.TYPE_REG
   718  		p.To.Reg = v.Reg0()
   719  
   720  	case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
   721  		p := s.Prog(v.Op.Asm())
   722  		p.From.Type = obj.TYPE_REG
   723  		p.From.Reg = ppc64.REG_R0
   724  		p.To.Type = obj.TYPE_REG
   725  		p.To.Reg = v.Reg()
   726  
   727  	case ssa.OpPPC64ADDCconst:
   728  		p := s.Prog(v.Op.Asm())
   729  		p.Reg = v.Args[0].Reg()
   730  		p.From.Type = obj.TYPE_CONST
   731  		p.From.Offset = v.AuxInt
   732  		p.To.Type = obj.TYPE_REG
   733  		// Output is a pair, the second is the CA, which is implied.
   734  		p.To.Reg = v.Reg0()
   735  
   736  	case ssa.OpPPC64SUBCconst:
   737  		p := s.Prog(v.Op.Asm())
   738  		p.AddRestSourceConst(v.AuxInt)
   739  		p.From.Type = obj.TYPE_REG
   740  		p.From.Reg = v.Args[0].Reg()
   741  		p.To.Type = obj.TYPE_REG
   742  		p.To.Reg = v.Reg0()
   743  
   744  	case ssa.OpPPC64SUBFCconst:
   745  		p := s.Prog(v.Op.Asm())
   746  		p.AddRestSourceConst(v.AuxInt)
   747  		p.From.Type = obj.TYPE_REG
   748  		p.From.Reg = v.Args[0].Reg()
   749  		p.To.Type = obj.TYPE_REG
   750  		p.To.Reg = v.Reg()
   751  
   752  	case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
   753  		p := s.Prog(v.Op.Asm())
   754  		p.Reg = v.Args[0].Reg()
   755  		p.From.Type = obj.TYPE_CONST
   756  		p.From.Offset = v.AuxInt
   757  		p.To.Type = obj.TYPE_REG
   758  		p.To.Reg = v.Reg0()
   759  
   760  	case ssa.OpPPC64MOVDaddr:
   761  		switch v.Aux.(type) {
   762  		default:
   763  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   764  		case nil:
   765  			// If aux offset and aux int are both 0, and the same
   766  			// input and output regs are used, no instruction
   767  			// needs to be generated, since it would just be
   768  			// addi rx, rx, 0.
   769  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   770  				p := s.Prog(ppc64.AMOVD)
   771  				p.From.Type = obj.TYPE_ADDR
   772  				p.From.Reg = v.Args[0].Reg()
   773  				p.From.Offset = v.AuxInt
   774  				p.To.Type = obj.TYPE_REG
   775  				p.To.Reg = v.Reg()
   776  			}
   777  
   778  		case *obj.LSym, ir.Node:
   779  			p := s.Prog(ppc64.AMOVD)
   780  			p.From.Type = obj.TYPE_ADDR
   781  			p.From.Reg = v.Args[0].Reg()
   782  			p.To.Type = obj.TYPE_REG
   783  			p.To.Reg = v.Reg()
   784  			ssagen.AddAux(&p.From, v)
   785  
   786  		}
   787  
   788  	case ssa.OpPPC64MOVDconst:
   789  		p := s.Prog(v.Op.Asm())
   790  		p.From.Type = obj.TYPE_CONST
   791  		p.From.Offset = v.AuxInt
   792  		p.To.Type = obj.TYPE_REG
   793  		p.To.Reg = v.Reg()
   794  
   795  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   796  		p := s.Prog(v.Op.Asm())
   797  		p.From.Type = obj.TYPE_FCONST
   798  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   799  		p.To.Type = obj.TYPE_REG
   800  		p.To.Reg = v.Reg()
   801  
   802  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   803  		p := s.Prog(v.Op.Asm())
   804  		p.From.Type = obj.TYPE_REG
   805  		p.From.Reg = v.Args[0].Reg()
   806  		p.To.Type = obj.TYPE_REG
   807  		p.To.Reg = v.Args[1].Reg()
   808  
   809  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   810  		p := s.Prog(v.Op.Asm())
   811  		p.From.Type = obj.TYPE_REG
   812  		p.From.Reg = v.Args[0].Reg()
   813  		p.To.Type = obj.TYPE_CONST
   814  		p.To.Offset = v.AuxInt
   815  
   816  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   817  		// Shift in register to required size
   818  		p := s.Prog(v.Op.Asm())
   819  		p.From.Type = obj.TYPE_REG
   820  		p.From.Reg = v.Args[0].Reg()
   821  		p.To.Reg = v.Reg()
   822  		p.To.Type = obj.TYPE_REG
   823  
   824  	case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
   825  
   826  		// MOVDload and MOVWload are DS form instructions that are restricted to
   827  		// offsets that are a multiple of 4. If the offset is not a multiple of 4,
   828  		// then the address of the symbol to be loaded is computed (base + offset)
   829  		// and used as the new base register and the offset field in the instruction
   830  		// can be set to zero.
   831  
   832  		// This same problem can happen with gostrings since the final offset is not
   833  		// known yet, but could be unaligned after the relocation is resolved.
   834  		// So gostrings are handled the same way.
   835  
   836  		// This allows the MOVDload and MOVWload to be generated in more cases and
   837  		// eliminates some offset and alignment checking in the rules file.
   838  
   839  		fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   840  		ssagen.AddAux(&fromAddr, v)
   841  
   842  		genAddr := false
   843  
   844  		switch fromAddr.Name {
   845  		case obj.NAME_EXTERN, obj.NAME_STATIC:
   846  			// Special case for a rule combines the bytes of gostring.
   847  			// The v alignment might seem OK, but we don't want to load it
   848  			// using an offset because relocation comes later.
   849  			genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
   850  		default:
   851  			genAddr = fromAddr.Offset%4 != 0
   852  		}
   853  		if genAddr {
   854  			// Load full address into the temp register.
   855  			p := s.Prog(ppc64.AMOVD)
   856  			p.From.Type = obj.TYPE_ADDR
   857  			p.From.Reg = v.Args[0].Reg()
   858  			ssagen.AddAux(&p.From, v)
   859  			// Load target using temp as base register
   860  			// and offset zero. Setting NAME_NONE
   861  			// prevents any extra offsets from being
   862  			// added.
   863  			p.To.Type = obj.TYPE_REG
   864  			p.To.Reg = ppc64.REGTMP
   865  			fromAddr.Reg = ppc64.REGTMP
   866  			// Clear the offset field and other
   867  			// information that might be used
   868  			// by the assembler to add to the
   869  			// final offset value.
   870  			fromAddr.Offset = 0
   871  			fromAddr.Name = obj.NAME_NONE
   872  			fromAddr.Sym = nil
   873  		}
   874  		p := s.Prog(v.Op.Asm())
   875  		p.From = fromAddr
   876  		p.To.Type = obj.TYPE_REG
   877  		p.To.Reg = v.Reg()
   878  
   879  	case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   880  		p := s.Prog(v.Op.Asm())
   881  		p.From.Type = obj.TYPE_MEM
   882  		p.From.Reg = v.Args[0].Reg()
   883  		ssagen.AddAux(&p.From, v)
   884  		p.To.Type = obj.TYPE_REG
   885  		p.To.Reg = v.Reg()
   886  
   887  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   888  		p := s.Prog(v.Op.Asm())
   889  		p.From.Type = obj.TYPE_MEM
   890  		p.From.Reg = v.Args[0].Reg()
   891  		p.To.Type = obj.TYPE_REG
   892  		p.To.Reg = v.Reg()
   893  
   894  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   895  		p := s.Prog(v.Op.Asm())
   896  		p.To.Type = obj.TYPE_MEM
   897  		p.To.Reg = v.Args[0].Reg()
   898  		p.From.Type = obj.TYPE_REG
   899  		p.From.Reg = v.Args[1].Reg()
   900  
   901  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   902  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   903  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   904  		p := s.Prog(v.Op.Asm())
   905  		p.From.Type = obj.TYPE_MEM
   906  		p.From.Reg = v.Args[0].Reg()
   907  		p.From.Index = v.Args[1].Reg()
   908  		p.To.Type = obj.TYPE_REG
   909  		p.To.Reg = v.Reg()
   910  
   911  	case ssa.OpPPC64DCBT:
   912  		p := s.Prog(v.Op.Asm())
   913  		p.From.Type = obj.TYPE_MEM
   914  		p.From.Reg = v.Args[0].Reg()
   915  		p.To.Type = obj.TYPE_CONST
   916  		p.To.Offset = v.AuxInt
   917  
   918  	case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   919  		p := s.Prog(v.Op.Asm())
   920  		p.From.Type = obj.TYPE_REG
   921  		p.From.Reg = ppc64.REGZERO
   922  		p.To.Type = obj.TYPE_MEM
   923  		p.To.Reg = v.Args[0].Reg()
   924  		ssagen.AddAux(&p.To, v)
   925  
   926  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
   927  
   928  		// MOVDstore and MOVDstorezero become DS form instructions that are restricted
   929  		// to offset values that are a multiple of 4. If the offset field is not a
   930  		// multiple of 4, then the full address of the store target is computed (base +
   931  		// offset) and used as the new base register and the offset in the instruction
   932  		// is set to 0.
   933  
   934  		// This allows the MOVDstore and MOVDstorezero to be generated in more cases,
   935  		// and prevents checking of the offset value and alignment in the rules.
   936  
   937  		toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   938  		ssagen.AddAux(&toAddr, v)
   939  
   940  		if toAddr.Offset%4 != 0 {
   941  			p := s.Prog(ppc64.AMOVD)
   942  			p.From.Type = obj.TYPE_ADDR
   943  			p.From.Reg = v.Args[0].Reg()
   944  			ssagen.AddAux(&p.From, v)
   945  			p.To.Type = obj.TYPE_REG
   946  			p.To.Reg = ppc64.REGTMP
   947  			toAddr.Reg = ppc64.REGTMP
   948  			// Clear the offset field and other
   949  			// information that might be used
   950  			// by the assembler to add to the
   951  			// final offset value.
   952  			toAddr.Offset = 0
   953  			toAddr.Name = obj.NAME_NONE
   954  			toAddr.Sym = nil
   955  		}
   956  		p := s.Prog(v.Op.Asm())
   957  		p.To = toAddr
   958  		p.From.Type = obj.TYPE_REG
   959  		if v.Op == ssa.OpPPC64MOVDstorezero {
   960  			p.From.Reg = ppc64.REGZERO
   961  		} else {
   962  			p.From.Reg = v.Args[1].Reg()
   963  		}
   964  
   965  	case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   966  		p := s.Prog(v.Op.Asm())
   967  		p.From.Type = obj.TYPE_REG
   968  		p.From.Reg = v.Args[1].Reg()
   969  		p.To.Type = obj.TYPE_MEM
   970  		p.To.Reg = v.Args[0].Reg()
   971  		ssagen.AddAux(&p.To, v)
   972  
   973  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   974  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   975  		ssa.OpPPC64MOVHBRstoreidx:
   976  		p := s.Prog(v.Op.Asm())
   977  		p.From.Type = obj.TYPE_REG
   978  		p.From.Reg = v.Args[2].Reg()
   979  		p.To.Index = v.Args[1].Reg()
   980  		p.To.Type = obj.TYPE_MEM
   981  		p.To.Reg = v.Args[0].Reg()
   982  
   983  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
   984  		// ISEL  AuxInt ? arg0 : arg1
   985  		// ISELZ is a special case of ISEL where arg1 is implicitly $0.
   986  		//
   987  		// AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
   988  		// ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
   989  		// Convert the condition to a CR bit argument by the following conversion:
   990  		//
   991  		// AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
   992  		// AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
   993  		p := s.Prog(v.Op.Asm())
   994  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   995  		p.Reg = v.Args[0].Reg()
   996  		if v.Op == ssa.OpPPC64ISEL {
   997  			p.AddRestSourceReg(v.Args[1].Reg())
   998  		} else {
   999  			p.AddRestSourceReg(ppc64.REG_R0)
  1000  		}
  1001  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
  1002  		if v.AuxInt > 3 {
  1003  			p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
  1004  		}
  1005  		p.From.SetConst(v.AuxInt & 3)
  1006  
  1007  	case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
  1008  		p := s.Prog(v.Op.Asm())
  1009  		p.To.Type = obj.TYPE_REG
  1010  		p.To.Reg = v.Reg()
  1011  		p.From.Type = obj.TYPE_REG
  1012  		p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
  1013  
  1014  	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
  1015  		// The LoweredQuad code generation
  1016  		// generates STXV instructions on
  1017  		// power9. The Short variation is used
  1018  		// if no loop is generated.
  1019  
  1020  		// sizes >= 64 generate a loop as follows:
  1021  
  1022  		// Set up loop counter in CTR, used by BC
  1023  		// XXLXOR clears VS32
  1024  		//       XXLXOR VS32,VS32,VS32
  1025  		//       MOVD len/64,REG_TMP
  1026  		//       MOVD REG_TMP,CTR
  1027  		//       loop:
  1028  		//       STXV VS32,0(R20)
  1029  		//       STXV VS32,16(R20)
  1030  		//       STXV VS32,32(R20)
  1031  		//       STXV VS32,48(R20)
  1032  		//       ADD  $64,R20
  1033  		//       BC   16, 0, loop
  1034  
  1035  		// Bytes per iteration
  1036  		ctr := v.AuxInt / 64
  1037  
  1038  		// Remainder bytes
  1039  		rem := v.AuxInt % 64
  1040  
  1041  		// Only generate a loop if there is more
  1042  		// than 1 iteration.
  1043  		if ctr > 1 {
  1044  			// Set up VS32 (V0) to hold 0s
  1045  			p := s.Prog(ppc64.AXXLXOR)
  1046  			p.From.Type = obj.TYPE_REG
  1047  			p.From.Reg = ppc64.REG_VS32
  1048  			p.To.Type = obj.TYPE_REG
  1049  			p.To.Reg = ppc64.REG_VS32
  1050  			p.Reg = ppc64.REG_VS32
  1051  
  1052  			// Set up CTR loop counter
  1053  			p = s.Prog(ppc64.AMOVD)
  1054  			p.From.Type = obj.TYPE_CONST
  1055  			p.From.Offset = ctr
  1056  			p.To.Type = obj.TYPE_REG
  1057  			p.To.Reg = ppc64.REGTMP
  1058  
  1059  			p = s.Prog(ppc64.AMOVD)
  1060  			p.From.Type = obj.TYPE_REG
  1061  			p.From.Reg = ppc64.REGTMP
  1062  			p.To.Type = obj.TYPE_REG
  1063  			p.To.Reg = ppc64.REG_CTR
  1064  
  1065  			// Don't generate padding for
  1066  			// loops with few iterations.
  1067  			if ctr > 3 {
  1068  				p = s.Prog(obj.APCALIGN)
  1069  				p.From.Type = obj.TYPE_CONST
  1070  				p.From.Offset = 16
  1071  			}
  1072  
  1073  			// generate 4 STXVs to zero 64 bytes
  1074  			var top *obj.Prog
  1075  
  1076  			p = s.Prog(ppc64.ASTXV)
  1077  			p.From.Type = obj.TYPE_REG
  1078  			p.From.Reg = ppc64.REG_VS32
  1079  			p.To.Type = obj.TYPE_MEM
  1080  			p.To.Reg = v.Args[0].Reg()
  1081  
  1082  			//  Save the top of loop
  1083  			if top == nil {
  1084  				top = p
  1085  			}
  1086  			p = s.Prog(ppc64.ASTXV)
  1087  			p.From.Type = obj.TYPE_REG
  1088  			p.From.Reg = ppc64.REG_VS32
  1089  			p.To.Type = obj.TYPE_MEM
  1090  			p.To.Reg = v.Args[0].Reg()
  1091  			p.To.Offset = 16
  1092  
  1093  			p = s.Prog(ppc64.ASTXV)
  1094  			p.From.Type = obj.TYPE_REG
  1095  			p.From.Reg = ppc64.REG_VS32
  1096  			p.To.Type = obj.TYPE_MEM
  1097  			p.To.Reg = v.Args[0].Reg()
  1098  			p.To.Offset = 32
  1099  
  1100  			p = s.Prog(ppc64.ASTXV)
  1101  			p.From.Type = obj.TYPE_REG
  1102  			p.From.Reg = ppc64.REG_VS32
  1103  			p.To.Type = obj.TYPE_MEM
  1104  			p.To.Reg = v.Args[0].Reg()
  1105  			p.To.Offset = 48
  1106  
  1107  			// Increment address for the
  1108  			// 64 bytes just zeroed.
  1109  			p = s.Prog(ppc64.AADD)
  1110  			p.Reg = v.Args[0].Reg()
  1111  			p.From.Type = obj.TYPE_CONST
  1112  			p.From.Offset = 64
  1113  			p.To.Type = obj.TYPE_REG
  1114  			p.To.Reg = v.Args[0].Reg()
  1115  
  1116  			// Branch back to top of loop
  1117  			// based on CTR
  1118  			// BC with BO_BCTR generates bdnz
  1119  			p = s.Prog(ppc64.ABC)
  1120  			p.From.Type = obj.TYPE_CONST
  1121  			p.From.Offset = ppc64.BO_BCTR
  1122  			p.Reg = ppc64.REG_CR0LT
  1123  			p.To.Type = obj.TYPE_BRANCH
  1124  			p.To.SetTarget(top)
  1125  		}
  1126  		// When ctr == 1 the loop was not generated but
  1127  		// there are at least 64 bytes to clear, so add
  1128  		// that to the remainder to generate the code
  1129  		// to clear those doublewords
  1130  		if ctr == 1 {
  1131  			rem += 64
  1132  		}
  1133  
  1134  		// Clear the remainder starting at offset zero
  1135  		offset := int64(0)
  1136  
  1137  		if rem >= 16 && ctr <= 1 {
  1138  			// If the XXLXOR hasn't already been
  1139  			// generated, do it here to initialize
  1140  			// VS32 (V0) to 0.
  1141  			p := s.Prog(ppc64.AXXLXOR)
  1142  			p.From.Type = obj.TYPE_REG
  1143  			p.From.Reg = ppc64.REG_VS32
  1144  			p.To.Type = obj.TYPE_REG
  1145  			p.To.Reg = ppc64.REG_VS32
  1146  			p.Reg = ppc64.REG_VS32
  1147  		}
  1148  		// Generate STXV for 32 or 64
  1149  		// bytes.
  1150  		for rem >= 32 {
  1151  			p := s.Prog(ppc64.ASTXV)
  1152  			p.From.Type = obj.TYPE_REG
  1153  			p.From.Reg = ppc64.REG_VS32
  1154  			p.To.Type = obj.TYPE_MEM
  1155  			p.To.Reg = v.Args[0].Reg()
  1156  			p.To.Offset = offset
  1157  
  1158  			p = s.Prog(ppc64.ASTXV)
  1159  			p.From.Type = obj.TYPE_REG
  1160  			p.From.Reg = ppc64.REG_VS32
  1161  			p.To.Type = obj.TYPE_MEM
  1162  			p.To.Reg = v.Args[0].Reg()
  1163  			p.To.Offset = offset + 16
  1164  			offset += 32
  1165  			rem -= 32
  1166  		}
  1167  		// Generate 16 bytes
  1168  		if rem >= 16 {
  1169  			p := s.Prog(ppc64.ASTXV)
  1170  			p.From.Type = obj.TYPE_REG
  1171  			p.From.Reg = ppc64.REG_VS32
  1172  			p.To.Type = obj.TYPE_MEM
  1173  			p.To.Reg = v.Args[0].Reg()
  1174  			p.To.Offset = offset
  1175  			offset += 16
  1176  			rem -= 16
  1177  		}
  1178  
  1179  		// first clear as many doublewords as possible
  1180  		// then clear remaining sizes as available
  1181  		for rem > 0 {
  1182  			op, size := ppc64.AMOVB, int64(1)
  1183  			switch {
  1184  			case rem >= 8:
  1185  				op, size = ppc64.AMOVD, 8
  1186  			case rem >= 4:
  1187  				op, size = ppc64.AMOVW, 4
  1188  			case rem >= 2:
  1189  				op, size = ppc64.AMOVH, 2
  1190  			}
  1191  			p := s.Prog(op)
  1192  			p.From.Type = obj.TYPE_REG
  1193  			p.From.Reg = ppc64.REG_R0
  1194  			p.To.Type = obj.TYPE_MEM
  1195  			p.To.Reg = v.Args[0].Reg()
  1196  			p.To.Offset = offset
  1197  			rem -= size
  1198  			offset += size
  1199  		}
  1200  
  1201  	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
  1202  
  1203  		// Unaligned data doesn't hurt performance
  1204  		// for these instructions on power8.
  1205  
  1206  		// For sizes >= 64 generate a loop as follows:
  1207  
  1208  		// Set up loop counter in CTR, used by BC
  1209  		//       XXLXOR VS32,VS32,VS32
  1210  		//	 MOVD len/32,REG_TMP
  1211  		//	 MOVD REG_TMP,CTR
  1212  		//       MOVD $16,REG_TMP
  1213  		//	 loop:
  1214  		//	 STXVD2X VS32,(R0)(R20)
  1215  		//	 STXVD2X VS32,(R31)(R20)
  1216  		//	 ADD  $32,R20
  1217  		//	 BC   16, 0, loop
  1218  		//
  1219  		// any remainder is done as described below
  1220  
  1221  		// for sizes < 64 bytes, first clear as many doublewords as possible,
  1222  		// then handle the remainder
  1223  		//	MOVD R0,(R20)
  1224  		//	MOVD R0,8(R20)
  1225  		// .... etc.
  1226  		//
  1227  		// the remainder bytes are cleared using one or more
  1228  		// of the following instructions with the appropriate
  1229  		// offsets depending which instructions are needed
  1230  		//
  1231  		//	MOVW R0,n1(R20)	4 bytes
  1232  		//	MOVH R0,n2(R20)	2 bytes
  1233  		//	MOVB R0,n3(R20)	1 byte
  1234  		//
  1235  		// 7 bytes: MOVW, MOVH, MOVB
  1236  		// 6 bytes: MOVW, MOVH
  1237  		// 5 bytes: MOVW, MOVB
  1238  		// 3 bytes: MOVH, MOVB
  1239  
  1240  		// each loop iteration does 32 bytes
  1241  		ctr := v.AuxInt / 32
  1242  
  1243  		// remainder bytes
  1244  		rem := v.AuxInt % 32
  1245  
  1246  		// only generate a loop if there is more
  1247  		// than 1 iteration.
  1248  		if ctr > 1 {
  1249  			// Set up VS32 (V0) to hold 0s
  1250  			p := s.Prog(ppc64.AXXLXOR)
  1251  			p.From.Type = obj.TYPE_REG
  1252  			p.From.Reg = ppc64.REG_VS32
  1253  			p.To.Type = obj.TYPE_REG
  1254  			p.To.Reg = ppc64.REG_VS32
  1255  			p.Reg = ppc64.REG_VS32
  1256  
  1257  			// Set up CTR loop counter
  1258  			p = s.Prog(ppc64.AMOVD)
  1259  			p.From.Type = obj.TYPE_CONST
  1260  			p.From.Offset = ctr
  1261  			p.To.Type = obj.TYPE_REG
  1262  			p.To.Reg = ppc64.REGTMP
  1263  
  1264  			p = s.Prog(ppc64.AMOVD)
  1265  			p.From.Type = obj.TYPE_REG
  1266  			p.From.Reg = ppc64.REGTMP
  1267  			p.To.Type = obj.TYPE_REG
  1268  			p.To.Reg = ppc64.REG_CTR
  1269  
  1270  			// Set up R31 to hold index value 16
  1271  			p = s.Prog(ppc64.AMOVD)
  1272  			p.From.Type = obj.TYPE_CONST
  1273  			p.From.Offset = 16
  1274  			p.To.Type = obj.TYPE_REG
  1275  			p.To.Reg = ppc64.REGTMP
  1276  
  1277  			// Don't add padding for alignment
  1278  			// with few loop iterations.
  1279  			if ctr > 3 {
  1280  				p = s.Prog(obj.APCALIGN)
  1281  				p.From.Type = obj.TYPE_CONST
  1282  				p.From.Offset = 16
  1283  			}
  1284  
  1285  			// generate 2 STXVD2Xs to store 16 bytes
  1286  			// when this is a loop then the top must be saved
  1287  			var top *obj.Prog
  1288  			// This is the top of loop
  1289  
  1290  			p = s.Prog(ppc64.ASTXVD2X)
  1291  			p.From.Type = obj.TYPE_REG
  1292  			p.From.Reg = ppc64.REG_VS32
  1293  			p.To.Type = obj.TYPE_MEM
  1294  			p.To.Reg = v.Args[0].Reg()
  1295  			p.To.Index = ppc64.REGZERO
  1296  			// Save the top of loop
  1297  			if top == nil {
  1298  				top = p
  1299  			}
  1300  			p = s.Prog(ppc64.ASTXVD2X)
  1301  			p.From.Type = obj.TYPE_REG
  1302  			p.From.Reg = ppc64.REG_VS32
  1303  			p.To.Type = obj.TYPE_MEM
  1304  			p.To.Reg = v.Args[0].Reg()
  1305  			p.To.Index = ppc64.REGTMP
  1306  
  1307  			// Increment address for the
  1308  			// 4 doublewords just zeroed.
  1309  			p = s.Prog(ppc64.AADD)
  1310  			p.Reg = v.Args[0].Reg()
  1311  			p.From.Type = obj.TYPE_CONST
  1312  			p.From.Offset = 32
  1313  			p.To.Type = obj.TYPE_REG
  1314  			p.To.Reg = v.Args[0].Reg()
  1315  
  1316  			// Branch back to top of loop
  1317  			// based on CTR
  1318  			// BC with BO_BCTR generates bdnz
  1319  			p = s.Prog(ppc64.ABC)
  1320  			p.From.Type = obj.TYPE_CONST
  1321  			p.From.Offset = ppc64.BO_BCTR
  1322  			p.Reg = ppc64.REG_CR0LT
  1323  			p.To.Type = obj.TYPE_BRANCH
  1324  			p.To.SetTarget(top)
  1325  		}
  1326  
  1327  		// when ctr == 1 the loop was not generated but
  1328  		// there are at least 32 bytes to clear, so add
  1329  		// that to the remainder to generate the code
  1330  		// to clear those doublewords
  1331  		if ctr == 1 {
  1332  			rem += 32
  1333  		}
  1334  
  1335  		// clear the remainder starting at offset zero
  1336  		offset := int64(0)
  1337  
  1338  		// first clear as many doublewords as possible
  1339  		// then clear remaining sizes as available
  1340  		for rem > 0 {
  1341  			op, size := ppc64.AMOVB, int64(1)
  1342  			switch {
  1343  			case rem >= 8:
  1344  				op, size = ppc64.AMOVD, 8
  1345  			case rem >= 4:
  1346  				op, size = ppc64.AMOVW, 4
  1347  			case rem >= 2:
  1348  				op, size = ppc64.AMOVH, 2
  1349  			}
  1350  			p := s.Prog(op)
  1351  			p.From.Type = obj.TYPE_REG
  1352  			p.From.Reg = ppc64.REG_R0
  1353  			p.To.Type = obj.TYPE_MEM
  1354  			p.To.Reg = v.Args[0].Reg()
  1355  			p.To.Offset = offset
  1356  			rem -= size
  1357  			offset += size
  1358  		}
  1359  
  1360  	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
  1361  
  1362  		bytesPerLoop := int64(32)
  1363  		// This will be used when moving more
  1364  		// than 8 bytes.  Moves start with
  1365  		// as many 8 byte moves as possible, then
  1366  		// 4, 2, or 1 byte(s) as remaining.  This will
  1367  		// work and be efficient for power8 or later.
  1368  		// If there are 64 or more bytes, then a
  1369  		// loop is generated to move 32 bytes and
  1370  		// update the src and dst addresses on each
  1371  		// iteration. When < 64 bytes, the appropriate
  1372  		// number of moves are generated based on the
  1373  		// size.
  1374  		// When moving >= 64 bytes a loop is used
  1375  		//	MOVD len/32,REG_TMP
  1376  		//	MOVD REG_TMP,CTR
  1377  		//	MOVD $16,REG_TMP
  1378  		// top:
  1379  		//	LXVD2X (R0)(R21),VS32
  1380  		//	LXVD2X (R31)(R21),VS33
  1381  		//	ADD $32,R21
  1382  		//	STXVD2X VS32,(R0)(R20)
  1383  		//	STXVD2X VS33,(R31)(R20)
  1384  		//	ADD $32,R20
  1385  		//	BC 16,0,top
  1386  		// Bytes not moved by this loop are moved
  1387  		// with a combination of the following instructions,
  1388  		// starting with the largest sizes and generating as
  1389  		// many as needed, using the appropriate offset value.
  1390  		//	MOVD  n(R21),R31
  1391  		//	MOVD  R31,n(R20)
  1392  		//	MOVW  n1(R21),R31
  1393  		//	MOVW  R31,n1(R20)
  1394  		//	MOVH  n2(R21),R31
  1395  		//	MOVH  R31,n2(R20)
  1396  		//	MOVB  n3(R21),R31
  1397  		//	MOVB  R31,n3(R20)
  1398  
  1399  		// Each loop iteration moves 32 bytes
  1400  		ctr := v.AuxInt / bytesPerLoop
  1401  
  1402  		// Remainder after the loop
  1403  		rem := v.AuxInt % bytesPerLoop
  1404  
  1405  		dstReg := v.Args[0].Reg()
  1406  		srcReg := v.Args[1].Reg()
  1407  
  1408  		// The set of registers used here, must match the clobbered reg list
  1409  		// in PPC64Ops.go.
  1410  		offset := int64(0)
  1411  
  1412  		// top of the loop
  1413  		var top *obj.Prog
  1414  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1415  		if ctr > 1 {
  1416  			// Set up the CTR
  1417  			p := s.Prog(ppc64.AMOVD)
  1418  			p.From.Type = obj.TYPE_CONST
  1419  			p.From.Offset = ctr
  1420  			p.To.Type = obj.TYPE_REG
  1421  			p.To.Reg = ppc64.REGTMP
  1422  
  1423  			p = s.Prog(ppc64.AMOVD)
  1424  			p.From.Type = obj.TYPE_REG
  1425  			p.From.Reg = ppc64.REGTMP
  1426  			p.To.Type = obj.TYPE_REG
  1427  			p.To.Reg = ppc64.REG_CTR
  1428  
  1429  			// Use REGTMP as index reg
  1430  			p = s.Prog(ppc64.AMOVD)
  1431  			p.From.Type = obj.TYPE_CONST
  1432  			p.From.Offset = 16
  1433  			p.To.Type = obj.TYPE_REG
  1434  			p.To.Reg = ppc64.REGTMP
  1435  
  1436  			// Don't adding padding for
  1437  			// alignment with small iteration
  1438  			// counts.
  1439  			if ctr > 3 {
  1440  				p = s.Prog(obj.APCALIGN)
  1441  				p.From.Type = obj.TYPE_CONST
  1442  				p.From.Offset = 16
  1443  			}
  1444  
  1445  			// Generate 16 byte loads and stores.
  1446  			// Use temp register for index (16)
  1447  			// on the second one.
  1448  
  1449  			p = s.Prog(ppc64.ALXVD2X)
  1450  			p.From.Type = obj.TYPE_MEM
  1451  			p.From.Reg = srcReg
  1452  			p.From.Index = ppc64.REGZERO
  1453  			p.To.Type = obj.TYPE_REG
  1454  			p.To.Reg = ppc64.REG_VS32
  1455  			if top == nil {
  1456  				top = p
  1457  			}
  1458  			p = s.Prog(ppc64.ALXVD2X)
  1459  			p.From.Type = obj.TYPE_MEM
  1460  			p.From.Reg = srcReg
  1461  			p.From.Index = ppc64.REGTMP
  1462  			p.To.Type = obj.TYPE_REG
  1463  			p.To.Reg = ppc64.REG_VS33
  1464  
  1465  			// increment the src reg for next iteration
  1466  			p = s.Prog(ppc64.AADD)
  1467  			p.Reg = srcReg
  1468  			p.From.Type = obj.TYPE_CONST
  1469  			p.From.Offset = bytesPerLoop
  1470  			p.To.Type = obj.TYPE_REG
  1471  			p.To.Reg = srcReg
  1472  
  1473  			// generate 16 byte stores
  1474  			p = s.Prog(ppc64.ASTXVD2X)
  1475  			p.From.Type = obj.TYPE_REG
  1476  			p.From.Reg = ppc64.REG_VS32
  1477  			p.To.Type = obj.TYPE_MEM
  1478  			p.To.Reg = dstReg
  1479  			p.To.Index = ppc64.REGZERO
  1480  
  1481  			p = s.Prog(ppc64.ASTXVD2X)
  1482  			p.From.Type = obj.TYPE_REG
  1483  			p.From.Reg = ppc64.REG_VS33
  1484  			p.To.Type = obj.TYPE_MEM
  1485  			p.To.Reg = dstReg
  1486  			p.To.Index = ppc64.REGTMP
  1487  
  1488  			// increment the dst reg for next iteration
  1489  			p = s.Prog(ppc64.AADD)
  1490  			p.Reg = dstReg
  1491  			p.From.Type = obj.TYPE_CONST
  1492  			p.From.Offset = bytesPerLoop
  1493  			p.To.Type = obj.TYPE_REG
  1494  			p.To.Reg = dstReg
  1495  
  1496  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1497  			// to loop top.
  1498  			p = s.Prog(ppc64.ABC)
  1499  			p.From.Type = obj.TYPE_CONST
  1500  			p.From.Offset = ppc64.BO_BCTR
  1501  			p.Reg = ppc64.REG_CR0LT
  1502  			p.To.Type = obj.TYPE_BRANCH
  1503  			p.To.SetTarget(top)
  1504  
  1505  			// srcReg and dstReg were incremented in the loop, so
  1506  			// later instructions start with offset 0.
  1507  			offset = int64(0)
  1508  		}
  1509  
  1510  		// No loop was generated for one iteration, so
  1511  		// add 32 bytes to the remainder to move those bytes.
  1512  		if ctr == 1 {
  1513  			rem += bytesPerLoop
  1514  		}
  1515  
  1516  		if rem >= 16 {
  1517  			// Generate 16 byte loads and stores.
  1518  			// Use temp register for index (value 16)
  1519  			// on the second one.
  1520  			p := s.Prog(ppc64.ALXVD2X)
  1521  			p.From.Type = obj.TYPE_MEM
  1522  			p.From.Reg = srcReg
  1523  			p.From.Index = ppc64.REGZERO
  1524  			p.To.Type = obj.TYPE_REG
  1525  			p.To.Reg = ppc64.REG_VS32
  1526  
  1527  			p = s.Prog(ppc64.ASTXVD2X)
  1528  			p.From.Type = obj.TYPE_REG
  1529  			p.From.Reg = ppc64.REG_VS32
  1530  			p.To.Type = obj.TYPE_MEM
  1531  			p.To.Reg = dstReg
  1532  			p.To.Index = ppc64.REGZERO
  1533  
  1534  			offset = 16
  1535  			rem -= 16
  1536  
  1537  			if rem >= 16 {
  1538  				// Use REGTMP as index reg
  1539  				p := s.Prog(ppc64.AMOVD)
  1540  				p.From.Type = obj.TYPE_CONST
  1541  				p.From.Offset = 16
  1542  				p.To.Type = obj.TYPE_REG
  1543  				p.To.Reg = ppc64.REGTMP
  1544  
  1545  				p = s.Prog(ppc64.ALXVD2X)
  1546  				p.From.Type = obj.TYPE_MEM
  1547  				p.From.Reg = srcReg
  1548  				p.From.Index = ppc64.REGTMP
  1549  				p.To.Type = obj.TYPE_REG
  1550  				p.To.Reg = ppc64.REG_VS32
  1551  
  1552  				p = s.Prog(ppc64.ASTXVD2X)
  1553  				p.From.Type = obj.TYPE_REG
  1554  				p.From.Reg = ppc64.REG_VS32
  1555  				p.To.Type = obj.TYPE_MEM
  1556  				p.To.Reg = dstReg
  1557  				p.To.Index = ppc64.REGTMP
  1558  
  1559  				offset = 32
  1560  				rem -= 16
  1561  			}
  1562  		}
  1563  
  1564  		// Generate all the remaining load and store pairs, starting with
  1565  		// as many 8 byte moves as possible, then 4, 2, 1.
  1566  		for rem > 0 {
  1567  			op, size := ppc64.AMOVB, int64(1)
  1568  			switch {
  1569  			case rem >= 8:
  1570  				op, size = ppc64.AMOVD, 8
  1571  			case rem >= 4:
  1572  				op, size = ppc64.AMOVWZ, 4
  1573  			case rem >= 2:
  1574  				op, size = ppc64.AMOVH, 2
  1575  			}
  1576  			// Load
  1577  			p := s.Prog(op)
  1578  			p.To.Type = obj.TYPE_REG
  1579  			p.To.Reg = ppc64.REGTMP
  1580  			p.From.Type = obj.TYPE_MEM
  1581  			p.From.Reg = srcReg
  1582  			p.From.Offset = offset
  1583  
  1584  			// Store
  1585  			p = s.Prog(op)
  1586  			p.From.Type = obj.TYPE_REG
  1587  			p.From.Reg = ppc64.REGTMP
  1588  			p.To.Type = obj.TYPE_MEM
  1589  			p.To.Reg = dstReg
  1590  			p.To.Offset = offset
  1591  			rem -= size
  1592  			offset += size
  1593  		}
  1594  
  1595  	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
  1596  		bytesPerLoop := int64(64)
  1597  		// This is used when moving more
  1598  		// than 8 bytes on power9.  Moves start with
  1599  		// as many 8 byte moves as possible, then
  1600  		// 4, 2, or 1 byte(s) as remaining.  This will
  1601  		// work and be efficient for power8 or later.
  1602  		// If there are 64 or more bytes, then a
  1603  		// loop is generated to move 32 bytes and
  1604  		// update the src and dst addresses on each
  1605  		// iteration. When < 64 bytes, the appropriate
  1606  		// number of moves are generated based on the
  1607  		// size.
  1608  		// When moving >= 64 bytes a loop is used
  1609  		//      MOVD len/32,REG_TMP
  1610  		//      MOVD REG_TMP,CTR
  1611  		// top:
  1612  		//      LXV 0(R21),VS32
  1613  		//      LXV 16(R21),VS33
  1614  		//      ADD $32,R21
  1615  		//      STXV VS32,0(R20)
  1616  		//      STXV VS33,16(R20)
  1617  		//      ADD $32,R20
  1618  		//      BC 16,0,top
  1619  		// Bytes not moved by this loop are moved
  1620  		// with a combination of the following instructions,
  1621  		// starting with the largest sizes and generating as
  1622  		// many as needed, using the appropriate offset value.
  1623  		//      MOVD  n(R21),R31
  1624  		//      MOVD  R31,n(R20)
  1625  		//      MOVW  n1(R21),R31
  1626  		//      MOVW  R31,n1(R20)
  1627  		//      MOVH  n2(R21),R31
  1628  		//      MOVH  R31,n2(R20)
  1629  		//      MOVB  n3(R21),R31
  1630  		//      MOVB  R31,n3(R20)
  1631  
  1632  		// Each loop iteration moves 32 bytes
  1633  		ctr := v.AuxInt / bytesPerLoop
  1634  
  1635  		// Remainder after the loop
  1636  		rem := v.AuxInt % bytesPerLoop
  1637  
  1638  		dstReg := v.Args[0].Reg()
  1639  		srcReg := v.Args[1].Reg()
  1640  
  1641  		offset := int64(0)
  1642  
  1643  		// top of the loop
  1644  		var top *obj.Prog
  1645  
  1646  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1647  		if ctr > 1 {
  1648  			// Set up the CTR
  1649  			p := s.Prog(ppc64.AMOVD)
  1650  			p.From.Type = obj.TYPE_CONST
  1651  			p.From.Offset = ctr
  1652  			p.To.Type = obj.TYPE_REG
  1653  			p.To.Reg = ppc64.REGTMP
  1654  
  1655  			p = s.Prog(ppc64.AMOVD)
  1656  			p.From.Type = obj.TYPE_REG
  1657  			p.From.Reg = ppc64.REGTMP
  1658  			p.To.Type = obj.TYPE_REG
  1659  			p.To.Reg = ppc64.REG_CTR
  1660  
  1661  			p = s.Prog(obj.APCALIGN)
  1662  			p.From.Type = obj.TYPE_CONST
  1663  			p.From.Offset = 16
  1664  
  1665  			// Generate 16 byte loads and stores.
  1666  			p = s.Prog(ppc64.ALXV)
  1667  			p.From.Type = obj.TYPE_MEM
  1668  			p.From.Reg = srcReg
  1669  			p.From.Offset = offset
  1670  			p.To.Type = obj.TYPE_REG
  1671  			p.To.Reg = ppc64.REG_VS32
  1672  			if top == nil {
  1673  				top = p
  1674  			}
  1675  			p = s.Prog(ppc64.ALXV)
  1676  			p.From.Type = obj.TYPE_MEM
  1677  			p.From.Reg = srcReg
  1678  			p.From.Offset = offset + 16
  1679  			p.To.Type = obj.TYPE_REG
  1680  			p.To.Reg = ppc64.REG_VS33
  1681  
  1682  			// generate 16 byte stores
  1683  			p = s.Prog(ppc64.ASTXV)
  1684  			p.From.Type = obj.TYPE_REG
  1685  			p.From.Reg = ppc64.REG_VS32
  1686  			p.To.Type = obj.TYPE_MEM
  1687  			p.To.Reg = dstReg
  1688  			p.To.Offset = offset
  1689  
  1690  			p = s.Prog(ppc64.ASTXV)
  1691  			p.From.Type = obj.TYPE_REG
  1692  			p.From.Reg = ppc64.REG_VS33
  1693  			p.To.Type = obj.TYPE_MEM
  1694  			p.To.Reg = dstReg
  1695  			p.To.Offset = offset + 16
  1696  
  1697  			// Generate 16 byte loads and stores.
  1698  			p = s.Prog(ppc64.ALXV)
  1699  			p.From.Type = obj.TYPE_MEM
  1700  			p.From.Reg = srcReg
  1701  			p.From.Offset = offset + 32
  1702  			p.To.Type = obj.TYPE_REG
  1703  			p.To.Reg = ppc64.REG_VS32
  1704  
  1705  			p = s.Prog(ppc64.ALXV)
  1706  			p.From.Type = obj.TYPE_MEM
  1707  			p.From.Reg = srcReg
  1708  			p.From.Offset = offset + 48
  1709  			p.To.Type = obj.TYPE_REG
  1710  			p.To.Reg = ppc64.REG_VS33
  1711  
  1712  			// generate 16 byte stores
  1713  			p = s.Prog(ppc64.ASTXV)
  1714  			p.From.Type = obj.TYPE_REG
  1715  			p.From.Reg = ppc64.REG_VS32
  1716  			p.To.Type = obj.TYPE_MEM
  1717  			p.To.Reg = dstReg
  1718  			p.To.Offset = offset + 32
  1719  
  1720  			p = s.Prog(ppc64.ASTXV)
  1721  			p.From.Type = obj.TYPE_REG
  1722  			p.From.Reg = ppc64.REG_VS33
  1723  			p.To.Type = obj.TYPE_MEM
  1724  			p.To.Reg = dstReg
  1725  			p.To.Offset = offset + 48
  1726  
  1727  			// increment the src reg for next iteration
  1728  			p = s.Prog(ppc64.AADD)
  1729  			p.Reg = srcReg
  1730  			p.From.Type = obj.TYPE_CONST
  1731  			p.From.Offset = bytesPerLoop
  1732  			p.To.Type = obj.TYPE_REG
  1733  			p.To.Reg = srcReg
  1734  
  1735  			// increment the dst reg for next iteration
  1736  			p = s.Prog(ppc64.AADD)
  1737  			p.Reg = dstReg
  1738  			p.From.Type = obj.TYPE_CONST
  1739  			p.From.Offset = bytesPerLoop
  1740  			p.To.Type = obj.TYPE_REG
  1741  			p.To.Reg = dstReg
  1742  
  1743  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1744  			// to loop top.
  1745  			p = s.Prog(ppc64.ABC)
  1746  			p.From.Type = obj.TYPE_CONST
  1747  			p.From.Offset = ppc64.BO_BCTR
  1748  			p.Reg = ppc64.REG_CR0LT
  1749  			p.To.Type = obj.TYPE_BRANCH
  1750  			p.To.SetTarget(top)
  1751  
  1752  			// srcReg and dstReg were incremented in the loop, so
  1753  			// later instructions start with offset 0.
  1754  			offset = int64(0)
  1755  		}
  1756  
  1757  		// No loop was generated for one iteration, so
  1758  		// add 32 bytes to the remainder to move those bytes.
  1759  		if ctr == 1 {
  1760  			rem += bytesPerLoop
  1761  		}
  1762  		if rem >= 32 {
  1763  			p := s.Prog(ppc64.ALXV)
  1764  			p.From.Type = obj.TYPE_MEM
  1765  			p.From.Reg = srcReg
  1766  			p.To.Type = obj.TYPE_REG
  1767  			p.To.Reg = ppc64.REG_VS32
  1768  
  1769  			p = s.Prog(ppc64.ALXV)
  1770  			p.From.Type = obj.TYPE_MEM
  1771  			p.From.Reg = srcReg
  1772  			p.From.Offset = 16
  1773  			p.To.Type = obj.TYPE_REG
  1774  			p.To.Reg = ppc64.REG_VS33
  1775  
  1776  			p = s.Prog(ppc64.ASTXV)
  1777  			p.From.Type = obj.TYPE_REG
  1778  			p.From.Reg = ppc64.REG_VS32
  1779  			p.To.Type = obj.TYPE_MEM
  1780  			p.To.Reg = dstReg
  1781  
  1782  			p = s.Prog(ppc64.ASTXV)
  1783  			p.From.Type = obj.TYPE_REG
  1784  			p.From.Reg = ppc64.REG_VS33
  1785  			p.To.Type = obj.TYPE_MEM
  1786  			p.To.Reg = dstReg
  1787  			p.To.Offset = 16
  1788  
  1789  			offset = 32
  1790  			rem -= 32
  1791  		}
  1792  
  1793  		if rem >= 16 {
  1794  			// Generate 16 byte loads and stores.
  1795  			p := s.Prog(ppc64.ALXV)
  1796  			p.From.Type = obj.TYPE_MEM
  1797  			p.From.Reg = srcReg
  1798  			p.From.Offset = offset
  1799  			p.To.Type = obj.TYPE_REG
  1800  			p.To.Reg = ppc64.REG_VS32
  1801  
  1802  			p = s.Prog(ppc64.ASTXV)
  1803  			p.From.Type = obj.TYPE_REG
  1804  			p.From.Reg = ppc64.REG_VS32
  1805  			p.To.Type = obj.TYPE_MEM
  1806  			p.To.Reg = dstReg
  1807  			p.To.Offset = offset
  1808  
  1809  			offset += 16
  1810  			rem -= 16
  1811  
  1812  			if rem >= 16 {
  1813  				p := s.Prog(ppc64.ALXV)
  1814  				p.From.Type = obj.TYPE_MEM
  1815  				p.From.Reg = srcReg
  1816  				p.From.Offset = offset
  1817  				p.To.Type = obj.TYPE_REG
  1818  				p.To.Reg = ppc64.REG_VS32
  1819  
  1820  				p = s.Prog(ppc64.ASTXV)
  1821  				p.From.Type = obj.TYPE_REG
  1822  				p.From.Reg = ppc64.REG_VS32
  1823  				p.To.Type = obj.TYPE_MEM
  1824  				p.To.Reg = dstReg
  1825  				p.To.Offset = offset
  1826  
  1827  				offset += 16
  1828  				rem -= 16
  1829  			}
  1830  		}
  1831  		// Generate all the remaining load and store pairs, starting with
  1832  		// as many 8 byte moves as possible, then 4, 2, 1.
  1833  		for rem > 0 {
  1834  			op, size := ppc64.AMOVB, int64(1)
  1835  			switch {
  1836  			case rem >= 8:
  1837  				op, size = ppc64.AMOVD, 8
  1838  			case rem >= 4:
  1839  				op, size = ppc64.AMOVWZ, 4
  1840  			case rem >= 2:
  1841  				op, size = ppc64.AMOVH, 2
  1842  			}
  1843  			// Load
  1844  			p := s.Prog(op)
  1845  			p.To.Type = obj.TYPE_REG
  1846  			p.To.Reg = ppc64.REGTMP
  1847  			p.From.Type = obj.TYPE_MEM
  1848  			p.From.Reg = srcReg
  1849  			p.From.Offset = offset
  1850  
  1851  			// Store
  1852  			p = s.Prog(op)
  1853  			p.From.Type = obj.TYPE_REG
  1854  			p.From.Reg = ppc64.REGTMP
  1855  			p.To.Type = obj.TYPE_MEM
  1856  			p.To.Reg = dstReg
  1857  			p.To.Offset = offset
  1858  			rem -= size
  1859  			offset += size
  1860  		}
  1861  
  1862  	case ssa.OpPPC64CALLstatic:
  1863  		s.Call(v)
  1864  
  1865  	case ssa.OpPPC64CALLtail:
  1866  		s.TailCall(v)
  1867  
  1868  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1869  		p := s.Prog(ppc64.AMOVD)
  1870  		p.From.Type = obj.TYPE_REG
  1871  		p.From.Reg = v.Args[0].Reg()
  1872  		p.To.Type = obj.TYPE_REG
  1873  		p.To.Reg = ppc64.REG_LR
  1874  
  1875  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1876  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1877  		}
  1878  
  1879  		pp := s.Call(v)
  1880  
  1881  		// Convert the call into a blrl with hint this is not a subroutine return.
  1882  		// The full bclrl opcode must be specified when passing a hint.
  1883  		pp.As = ppc64.ABCL
  1884  		pp.From.Type = obj.TYPE_CONST
  1885  		pp.From.Offset = ppc64.BO_ALWAYS
  1886  		pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
  1887  		pp.To.Reg = ppc64.REG_LR
  1888  		pp.AddRestSourceConst(1)
  1889  
  1890  		if ppc64.NeedTOCpointer(base.Ctxt) {
  1891  			// When compiling Go into PIC, the function we just
  1892  			// called via pointer might have been implemented in
  1893  			// a separate module and so overwritten the TOC
  1894  			// pointer in R2; reload it.
  1895  			q := s.Prog(ppc64.AMOVD)
  1896  			q.From.Type = obj.TYPE_MEM
  1897  			q.From.Offset = 24
  1898  			q.From.Reg = ppc64.REGSP
  1899  			q.To.Type = obj.TYPE_REG
  1900  			q.To.Reg = ppc64.REG_R2
  1901  		}
  1902  
  1903  	case ssa.OpPPC64LoweredWB:
  1904  		p := s.Prog(obj.ACALL)
  1905  		p.To.Type = obj.TYPE_MEM
  1906  		p.To.Name = obj.NAME_EXTERN
  1907  		// AuxInt encodes how many buffer entries we need.
  1908  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1909  
  1910  	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1911  		p := s.Prog(obj.ACALL)
  1912  		p.To.Type = obj.TYPE_MEM
  1913  		p.To.Name = obj.NAME_EXTERN
  1914  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1915  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1916  
  1917  	case ssa.OpPPC64LoweredNilCheck:
  1918  		if buildcfg.GOOS == "aix" {
  1919  			// CMP Rarg0, R0
  1920  			// BNE 2(PC)
  1921  			// STW R0, 0(R0)
  1922  			// NOP (so the BNE has somewhere to land)
  1923  
  1924  			// CMP Rarg0, R0
  1925  			p := s.Prog(ppc64.ACMP)
  1926  			p.From.Type = obj.TYPE_REG
  1927  			p.From.Reg = v.Args[0].Reg()
  1928  			p.To.Type = obj.TYPE_REG
  1929  			p.To.Reg = ppc64.REG_R0
  1930  
  1931  			// BNE 2(PC)
  1932  			p2 := s.Prog(ppc64.ABNE)
  1933  			p2.To.Type = obj.TYPE_BRANCH
  1934  
  1935  			// STW R0, 0(R0)
  1936  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1937  			p = s.Prog(ppc64.AMOVW)
  1938  			p.From.Type = obj.TYPE_REG
  1939  			p.From.Reg = ppc64.REG_R0
  1940  			p.To.Type = obj.TYPE_MEM
  1941  			p.To.Reg = ppc64.REG_R0
  1942  
  1943  			// NOP (so the BNE has somewhere to land)
  1944  			nop := s.Prog(obj.ANOP)
  1945  			p2.To.SetTarget(nop)
  1946  
  1947  		} else {
  1948  			// Issue a load which will fault if arg is nil.
  1949  			p := s.Prog(ppc64.AMOVBZ)
  1950  			p.From.Type = obj.TYPE_MEM
  1951  			p.From.Reg = v.Args[0].Reg()
  1952  			ssagen.AddAux(&p.From, v)
  1953  			p.To.Type = obj.TYPE_REG
  1954  			p.To.Reg = ppc64.REGTMP
  1955  		}
  1956  		if logopt.Enabled() {
  1957  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1958  		}
  1959  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1960  			base.WarnfAt(v.Pos, "generated nil check")
  1961  		}
  1962  
  1963  	// These should be resolved by rules and not make it here.
  1964  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  1965  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  1966  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  1967  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  1968  	case ssa.OpPPC64InvertFlags:
  1969  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1970  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1971  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1972  	case ssa.OpClobber, ssa.OpClobberReg:
  1973  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1974  	default:
  1975  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1976  	}
  1977  }
  1978  
  1979  var blockJump = [...]struct {
  1980  	asm, invasm     obj.As
  1981  	asmeq, invasmun bool
  1982  }{
  1983  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1984  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1985  
  1986  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1987  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1988  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1989  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1990  
  1991  	// TODO: need to work FP comparisons into block jumps
  1992  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1993  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1994  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1995  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1996  }
  1997  
  1998  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1999  	switch b.Kind {
  2000  	case ssa.BlockDefer:
  2001  		// defer returns in R3:
  2002  		// 0 if we should continue executing
  2003  		// 1 if we should jump to deferreturn call
  2004  		p := s.Prog(ppc64.ACMP)
  2005  		p.From.Type = obj.TYPE_REG
  2006  		p.From.Reg = ppc64.REG_R3
  2007  		p.To.Type = obj.TYPE_CONST
  2008  		p.To.Offset = 0
  2009  
  2010  		p = s.Prog(ppc64.ABNE)
  2011  		p.To.Type = obj.TYPE_BRANCH
  2012  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  2013  		if b.Succs[0].Block() != next {
  2014  			p := s.Prog(obj.AJMP)
  2015  			p.To.Type = obj.TYPE_BRANCH
  2016  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2017  		}
  2018  
  2019  	case ssa.BlockPlain:
  2020  		if b.Succs[0].Block() != next {
  2021  			p := s.Prog(obj.AJMP)
  2022  			p.To.Type = obj.TYPE_BRANCH
  2023  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2024  		}
  2025  	case ssa.BlockExit, ssa.BlockRetJmp:
  2026  	case ssa.BlockRet:
  2027  		s.Prog(obj.ARET)
  2028  
  2029  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  2030  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  2031  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  2032  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  2033  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  2034  		jmp := blockJump[b.Kind]
  2035  		switch next {
  2036  		case b.Succs[0].Block():
  2037  			s.Br(jmp.invasm, b.Succs[1].Block())
  2038  			if jmp.invasmun {
  2039  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2040  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  2041  			}
  2042  		case b.Succs[1].Block():
  2043  			s.Br(jmp.asm, b.Succs[0].Block())
  2044  			if jmp.asmeq {
  2045  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2046  			}
  2047  		default:
  2048  			if b.Likely != ssa.BranchUnlikely {
  2049  				s.Br(jmp.asm, b.Succs[0].Block())
  2050  				if jmp.asmeq {
  2051  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2052  				}
  2053  				s.Br(obj.AJMP, b.Succs[1].Block())
  2054  			} else {
  2055  				s.Br(jmp.invasm, b.Succs[1].Block())
  2056  				if jmp.invasmun {
  2057  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2058  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  2059  				}
  2060  				s.Br(obj.AJMP, b.Succs[0].Block())
  2061  			}
  2062  		}
  2063  	default:
  2064  		b.Fatalf("branch not implemented: %s", b.LongString())
  2065  	}
  2066  }
  2067  
  2068  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2069  	p := s.Prog(loadByType(t))
  2070  	p.From.Type = obj.TYPE_MEM
  2071  	p.From.Name = obj.NAME_AUTO
  2072  	p.From.Sym = n.Linksym()
  2073  	p.From.Offset = n.FrameOffset() + off
  2074  	p.To.Type = obj.TYPE_REG
  2075  	p.To.Reg = reg
  2076  	return p
  2077  }
  2078  
  2079  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2080  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  2081  	p.To.Name = obj.NAME_PARAM
  2082  	p.To.Sym = n.Linksym()
  2083  	p.Pos = p.Pos.WithNotStmt()
  2084  	return p
  2085  }
  2086  

View as plain text