Source file src/cmd/compile/internal/ssa/_gen/PPC64Ops.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Less-than-64-bit integer types live in the low portion of registers.
    11  //    The upper portion is junk.
    12  //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
    13  //  - *const instructions may use a constant larger than the instruction can encode.
    14  //    In this case the assembler expands to multiple instructions and uses tmp
    15  //    register (R31).
    16  
    17  var regNamesPPC64 = []string{
    18  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    19  	"SP", // REGSP
    20  	"SB", // REGSB
    21  	"R3",
    22  	"R4",
    23  	"R5",
    24  	"R6",
    25  	"R7",
    26  	"R8",
    27  	"R9",
    28  	"R10",
    29  	"R11", // REGCTXT for closures
    30  	"R12",
    31  	"R13", // REGTLS
    32  	"R14",
    33  	"R15",
    34  	"R16",
    35  	"R17",
    36  	"R18",
    37  	"R19",
    38  	"R20",
    39  	"R21",
    40  	"R22",
    41  	"R23",
    42  	"R24",
    43  	"R25",
    44  	"R26",
    45  	"R27",
    46  	"R28",
    47  	"R29",
    48  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    49  	"R31", // REGTMP
    50  
    51  	"F0",
    52  	"F1",
    53  	"F2",
    54  	"F3",
    55  	"F4",
    56  	"F5",
    57  	"F6",
    58  	"F7",
    59  	"F8",
    60  	"F9",
    61  	"F10",
    62  	"F11",
    63  	"F12",
    64  	"F13",
    65  	"F14",
    66  	"F15",
    67  	"F16",
    68  	"F17",
    69  	"F18",
    70  	"F19",
    71  	"F20",
    72  	"F21",
    73  	"F22",
    74  	"F23",
    75  	"F24",
    76  	"F25",
    77  	"F26",
    78  	"F27",
    79  	"F28",
    80  	"F29",
    81  	"F30",
    82  	// "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
    83  
    84  	"XER",
    85  
    86  	// If you add registers, update asyncPreempt in runtime.
    87  
    88  	// "CR0",
    89  	// "CR1",
    90  	// "CR2",
    91  	// "CR3",
    92  	// "CR4",
    93  	// "CR5",
    94  	// "CR6",
    95  	// "CR7",
    96  
    97  	// "CR",
    98  	// "LR",
    99  	// "CTR",
   100  }
   101  
   102  func init() {
   103  	// Make map from reg names to reg integers.
   104  	if len(regNamesPPC64) > 64 {
   105  		panic("too many registers")
   106  	}
   107  	num := map[string]int{}
   108  	for i, name := range regNamesPPC64 {
   109  		num[name] = i
   110  	}
   111  	buildReg := func(s string) regMask {
   112  		m := regMask(0)
   113  		for _, r := range strings.Split(s, " ") {
   114  			if n, ok := num[r]; ok {
   115  				m |= regMask(1) << uint(n)
   116  				continue
   117  			}
   118  			panic("register " + r + " not found")
   119  		}
   120  		return m
   121  	}
   122  
   123  	var (
   124  		gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   125  		fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
   126  		sp  = buildReg("SP")
   127  		sb  = buildReg("SB")
   128  		gr  = buildReg("g")
   129  		xer = buildReg("XER")
   130  		// cr  = buildReg("CR")
   131  		// ctr = buildReg("CTR")
   132  		// lr  = buildReg("LR")
   133  		tmp     = buildReg("R31")
   134  		ctxt    = buildReg("R11")
   135  		callptr = buildReg("R12")
   136  		// tls = buildReg("R13")
   137  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   138  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   139  		xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
   140  		gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   141  		gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
   142  		gp1xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   143  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   144  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   145  		gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   146  		gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
   147  		gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   148  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   149  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   150  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   151  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   152  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   153  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   154  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   155  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   156  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   157  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   158  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   159  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   160  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   161  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   162  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   163  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   164  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   165  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   166  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   167  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   168  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   169  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   170  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   171  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   172  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   173  		callerSave  = regMask(gp | fp | gr | xer)
   174  		first7      = buildReg("R3 R4 R5 R6 R7 R8 R9")
   175  	)
   176  	ops := []opData{
   177  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
   178  		{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"},      // arg0 + arg1
   179  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},                              // arg0 + auxInt
   180  		{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
   181  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                            // arg0+arg1
   182  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                          // arg0+arg1
   183  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                 // arg0-arg1
   184  		{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"},                         // arg0-arg1 sets CC
   185  		{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"},                       // auxInt - arg0 (carry is ignored)
   186  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
   187  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
   188  
   189  		// Note, the FPU works with float64 in register.
   190  		{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
   191  		{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
   192  
   193  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   194  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   195  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   196  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   197  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   198  
   199  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},                             // (arg0 * arg1) >> 64, signed
   200  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},                             // (arg0 * arg1) >> 32, signed
   201  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true},                           // (arg0 * arg1) >> 64, unsigned
   202  		{name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
   203  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true},                           // (arg0 * arg1) >> 32, unsigned
   204  
   205  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   206  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   207  
   208  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   209  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   210  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   211  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   212  
   213  		{name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   214  		{name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   215  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
   216  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
   217  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
   218  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
   219  
   220  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   221  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   222  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   223  		// The constant shift values are packed into the aux int32.
   224  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   225  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   226  
   227  		// Operations which consume or generate the CA (xer)
   228  		{name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
   229  		{name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
   230  		{name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
   231  		{name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
   232  		{name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
   233  		{name: "ADDZE", argLength: 2, reg: gp1xer1xer, asm: "ADDZE", typ: "(UInt64, UInt64)"},                  // arg0 + CA (arg1) -> out, CA
   234  		{name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
   235  		{name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
   236  		{name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
   237  
   238  		{name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   239  		{name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   240  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   241  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   242  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   243  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   244  
   245  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   246  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   247  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   248  
   249  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                           // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   250  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                            // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   251  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true},      // "rlwimi" similar aux encoding as above
   252  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                          // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
   253  		{name: "RLDICLCC", argLength: 1, reg: gp11, asm: "RLDICLCC", aux: "Int64", typ: "(Int, Flags)"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63. Sets CC.
   254  		{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                          // Likewise, but only ME and SH are valid. MB is always 0.
   255  
   256  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"},                          // count leading zeros
   257  		{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
   258  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"},                          // count leading zeros (32 bit)
   259  
   260  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   261  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   262  
   263  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   264  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   265  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   266  
   267  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   268  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   269  
   270  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   271  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   272  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   273  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   274  
   275  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   276  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   277  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   278  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   279  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   280  
   281  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   282  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   283  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   284  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   285  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   286  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   287  
   288  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   289  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   290  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianness issues).
   291  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   292  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   293  
   294  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   295  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   296  
   297  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                           // arg0&arg1
   298  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                            // arg0&^arg1
   299  		{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"},                  // arg0&^arg1 sets CC
   300  		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
   301  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                             // arg0|arg1
   302  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                              // arg0|^arg1
   303  		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
   304  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                           // ^(arg0|arg1)
   305  		{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"},   // ^(arg0|arg1) sets CC
   306  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},             // arg0^arg1
   307  		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
   308  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},             // arg0^^arg1
   309  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                              // -arg0 (integer)
   310  		{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"},                      // -arg0 (integer) sets CC
   311  		{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                              // reversebytes64(arg0)
   312  		{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                              // reversebytes32(arg0)
   313  		{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                              // reversebytes16(arg0)
   314  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                            // -arg0 (floating point)
   315  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                          // sqrt(arg0) (floating point)
   316  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                        // sqrt(arg0) (floating point, single precision)
   317  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                          // floor(arg0), float64
   318  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                           // ceil(arg0), float64
   319  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                          // trunc(arg0), float64
   320  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                          // round(arg0), float64
   321  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                            // abs(arg0), float64
   322  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                          // -abs(arg0), float64
   323  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                        // copysign arg0 -> arg1, float64
   324  
   325  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                                 // arg0|aux
   326  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                               // arg0^aux
   327  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"},           // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   328  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, clobberFlags: true, asm: "ANDCC", aux: "Int64", typ: "Int"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   329  
   330  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   331  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   332  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   333  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   334  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   335  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   336  
   337  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   338  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   339  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   340  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   341  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   342  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   343  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   344  
   345  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   346  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   347  		// In these cases the index register field is set to 0 and the full address is in the base register.
   348  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
   349  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
   350  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
   351  
   352  		// In these cases an index register is used in addition to a base register
   353  		// Loads from memory location arg[0] + arg[1].
   354  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   355  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   356  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   357  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   358  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   359  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   360  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   361  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   362  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   363  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   364  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   365  
   366  		// Prefetch instruction
   367  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   368  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   369  
   370  		// Store bytes in the reverse endian order of the arch into arg0.
   371  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   372  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
   373  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
   374  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
   375  
   376  		// Floating point loads from arg0+aux+auxint
   377  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   378  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   379  
   380  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   381  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   382  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   383  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   384  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   385  
   386  		// Store floating point value into arg0+aux+auxint
   387  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   388  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   389  
   390  		// Stores using index and base registers
   391  		// Stores to arg[0] + arg[1]
   392  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   393  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   394  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   395  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   396  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   397  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   398  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   399  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   400  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   401  
   402  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   403  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   404  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   405  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   406  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   407  
   408  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   409  
   410  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   411  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   412  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   413  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   414  
   415  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   416  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   417  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   418  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   419  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   420  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   421  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   422  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   423  
   424  		// ISEL  arg2 ? arg0 : arg1
   425  		// ISELZ arg1 ? arg0 : $0
   426  		// auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
   427  		// Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
   428  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
   429  		{name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
   430  
   431  		// SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
   432  		{name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
   433  		// SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
   434  		{name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
   435  
   436  		// pseudo-ops
   437  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   438  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   439  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   440  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   441  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   442  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   443  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   444  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   445  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   446  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   447  
   448  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   449  		// and sorts it to the very beginning of the block to prevent other
   450  		// use of the closure pointer.
   451  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   452  
   453  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   454  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   455  
   456  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   457  		// I.e., if f calls g "calls" sys.GetCallerPC,
   458  		// the result should be the PC within f that g will return to.
   459  		// See runtime/stubs.go for a more detailed discussion.
   460  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   461  
   462  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   463  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   464  		// Round ops to block fused-multiply-add extraction.
   465  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   466  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   467  
   468  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                                // call static function aux.(*obj.LSym).  last arg=mem, auxint=argsize, returns mem
   469  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                                  // tail call static function aux.(*obj.LSym).  last arg=mem, auxint=argsize, returns mem
   470  		{name: "CALLtailinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call fn by pointer.  arg0=codeptr, last arg=mem, auxint=argsize, returns mem
   471  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},          // call function via closure.  arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem
   472  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                     // call fn by pointer.  arg0=codeptr, last arg=mem, auxint=argsize, returns mem
   473  
   474  		// large or unaligned zeroing
   475  		// arg0 = address of memory to zero (in R3, changed as side effect)
   476  		// returns mem
   477  		//
   478  		// a loop is generated when there is more than one iteration
   479  		// needed to clear 4 doublewords
   480  		//
   481  		//	XXLXOR	VS32,VS32,VS32
   482  		// 	MOVD	$len/32,R31
   483  		//	MOVD	R31,CTR
   484  		//	MOVD	$16,R31
   485  		//	loop:
   486  		//	STXVD2X VS32,(R0)(R3)
   487  		//	STXVD2X	VS32,(R31)(R3)
   488  		//	ADD	R3,32
   489  		//	BC	loop
   490  
   491  		// remaining doubleword clears generated as needed
   492  		//	MOVD	R0,(R3)
   493  		//	MOVD	R0,8(R3)
   494  		//	MOVD	R0,16(R3)
   495  		//	MOVD	R0,24(R3)
   496  
   497  		// one or more of these to clear remainder < 8 bytes
   498  		//	MOVW	R0,n1(R3)
   499  		//	MOVH	R0,n2(R3)
   500  		//	MOVB	R0,n3(R3)
   501  		{
   502  			name:      "LoweredZero",
   503  			aux:       "Int64",
   504  			argLength: 2,
   505  			reg: regInfo{
   506  				inputs:   []regMask{buildReg("R20")},
   507  				clobbers: buildReg("R20"),
   508  			},
   509  			clobberFlags:   true,
   510  			typ:            "Mem",
   511  			faultOnNilArg0: true,
   512  			unsafePoint:    true,
   513  		},
   514  		{
   515  			name:      "LoweredZeroShort",
   516  			aux:       "Int64",
   517  			argLength: 2,
   518  			reg: regInfo{
   519  				inputs: []regMask{gp}},
   520  			typ:            "Mem",
   521  			faultOnNilArg0: true,
   522  			unsafePoint:    true,
   523  		},
   524  		{
   525  			name:      "LoweredQuadZeroShort",
   526  			aux:       "Int64",
   527  			argLength: 2,
   528  			reg: regInfo{
   529  				inputs: []regMask{gp},
   530  			},
   531  			typ:            "Mem",
   532  			faultOnNilArg0: true,
   533  			unsafePoint:    true,
   534  		},
   535  		{
   536  			name:      "LoweredQuadZero",
   537  			aux:       "Int64",
   538  			argLength: 2,
   539  			reg: regInfo{
   540  				inputs:   []regMask{buildReg("R20")},
   541  				clobbers: buildReg("R20"),
   542  			},
   543  			clobberFlags:   true,
   544  			typ:            "Mem",
   545  			faultOnNilArg0: true,
   546  			unsafePoint:    true,
   547  		},
   548  
   549  		// R31 is temp register
   550  		// Loop code:
   551  		//	MOVD len/32,R31		set up loop ctr
   552  		//	MOVD R31,CTR
   553  		//	MOVD $16,R31		index register
   554  		// loop:
   555  		//	LXVD2X (R0)(R4),VS32
   556  		//	LXVD2X (R31)(R4),VS33
   557  		//	ADD  R4,$32          increment src
   558  		//	STXVD2X VS32,(R0)(R3)
   559  		//	STXVD2X VS33,(R31)(R3)
   560  		//	ADD  R3,$32          increment dst
   561  		//	BC 16,0,loop         branch ctr
   562  		// For this purpose, VS32 and VS33 are treated as
   563  		// scratch registers. Since regalloc does not
   564  		// track vector registers, even if it could be marked
   565  		// as clobbered it would have no effect.
   566  		// TODO: If vector registers are managed by regalloc
   567  		// mark these as clobbered.
   568  		//
   569  		// Bytes not moved by this loop are moved
   570  		// with a combination of the following instructions,
   571  		// starting with the largest sizes and generating as
   572  		// many as needed, using the appropriate offset value.
   573  		//	MOVD  n(R4),R14
   574  		//	MOVD  R14,n(R3)
   575  		//	MOVW  n1(R4),R14
   576  		//	MOVW  R14,n1(R3)
   577  		//	MOVH  n2(R4),R14
   578  		//	MOVH  R14,n2(R3)
   579  		//	MOVB  n3(R4),R14
   580  		//	MOVB  R14,n3(R3)
   581  
   582  		{
   583  			name:      "LoweredMove",
   584  			aux:       "Int64",
   585  			argLength: 3,
   586  			reg: regInfo{
   587  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   588  				clobbers: buildReg("R20 R21"),
   589  			},
   590  			clobberFlags:   true,
   591  			typ:            "Mem",
   592  			faultOnNilArg0: true,
   593  			faultOnNilArg1: true,
   594  			unsafePoint:    true,
   595  		},
   596  		{
   597  			name:      "LoweredMoveShort",
   598  			aux:       "Int64",
   599  			argLength: 3,
   600  			reg: regInfo{
   601  				inputs: []regMask{gp, gp},
   602  			},
   603  			typ:            "Mem",
   604  			faultOnNilArg0: true,
   605  			faultOnNilArg1: true,
   606  			unsafePoint:    true,
   607  		},
   608  
   609  		// The following is similar to the LoweredMove, but uses
   610  		// LXV instead of LXVD2X, which does not require an index
   611  		// register and will do 4 in a loop instead of only.
   612  		{
   613  			name:      "LoweredQuadMove",
   614  			aux:       "Int64",
   615  			argLength: 3,
   616  			reg: regInfo{
   617  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   618  				clobbers: buildReg("R20 R21"),
   619  			},
   620  			clobberFlags:   true,
   621  			typ:            "Mem",
   622  			faultOnNilArg0: true,
   623  			faultOnNilArg1: true,
   624  			unsafePoint:    true,
   625  		},
   626  
   627  		{
   628  			name:      "LoweredQuadMoveShort",
   629  			aux:       "Int64",
   630  			argLength: 3,
   631  			reg: regInfo{
   632  				inputs: []regMask{gp, gp},
   633  			},
   634  			typ:            "Mem",
   635  			faultOnNilArg0: true,
   636  			faultOnNilArg1: true,
   637  			unsafePoint:    true,
   638  		},
   639  
   640  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   641  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   642  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   643  
   644  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   645  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   646  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   647  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   648  
   649  		// atomic add32, 64
   650  		// LWSYNC
   651  		// LDAR         (Rarg0), Rout
   652  		// ADD		Rarg1, Rout
   653  		// STDCCC       Rout, (Rarg0)
   654  		// BNE          -3(PC)
   655  		// return new sum
   656  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   657  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   658  
   659  		// atomic exchange8, 32, 64
   660  		// LWSYNC
   661  		// LDAR         (Rarg0), Rout
   662  		// STDCCC       Rarg1, (Rarg0)
   663  		// BNE          -2(PC)
   664  		// ISYNC
   665  		// return old val
   666  		{name: "LoweredAtomicExchange8", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   667  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   668  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   669  
   670  		// atomic compare and swap.
   671  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   672  		// if *arg0 == arg1 {
   673  		//   *arg0 = arg2
   674  		//   return (true, memory)
   675  		// } else {
   676  		//   return (false, memory)
   677  		// }
   678  		// SYNC
   679  		// LDAR		(Rarg0), Rtmp
   680  		// CMP		Rarg1, Rtmp
   681  		// BNE		3(PC)
   682  		// STDCCC	Rarg2, (Rarg0)
   683  		// BNE		-4(PC)
   684  		// CBNZ         Rtmp, -4(PC)
   685  		// CSET         EQ, Rout
   686  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   687  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   688  
   689  		// atomic 8/32 and/or.
   690  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   691  		// LBAR/LWAT	(Rarg0), Rtmp
   692  		// AND/OR	Rarg1, Rtmp
   693  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   694  		// BNE		Rtmp, -3(PC)
   695  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   696  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   697  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   698  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   699  
   700  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
   701  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
   702  		// but may clobber anything else, including R31 (REGTMP).
   703  		// Returns a pointer to a write barrier buffer in R29.
   704  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
   705  
   706  		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
   707  
   708  		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
   709  		// the RC and CR versions are used when one of the arguments is a constant. CC is used
   710  		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
   711  		// failure means the length must have also been 0).
   712  		// AuxInt contains a report code (see PanicBounds in genericOps.go).
   713  		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first7, first7}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
   714  		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=x, arg1=mem, returns memory.
   715  		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=y, arg1=mem, returns memory.
   716  		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                          // arg0=mem, returns memory.
   717  
   718  		// (InvertFlags (CMP a b)) == (CMP b a)
   719  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   720  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   721  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   722  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   723  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   724  
   725  		// Constant flag values. For any comparison, there are 3 possible
   726  		// outcomes: either the three from the signed total order (<,==,>)
   727  		// or the three from the unsigned total order, depending on which
   728  		// comparison operation was used (CMP or CMPU -- PPC is different from
   729  		// the other architectures, which have a single comparison producing
   730  		// both signed and unsigned comparison results.)
   731  
   732  		// These ops are for temporary use by rewrite rules. They
   733  		// cannot appear in the generated assembly.
   734  		{name: "FlagEQ"}, // equal
   735  		{name: "FlagLT"}, // signed < or unsigned <
   736  		{name: "FlagGT"}, // signed > or unsigned >
   737  	}
   738  
   739  	blocks := []blockData{
   740  		{name: "EQ", controls: 1},
   741  		{name: "NE", controls: 1},
   742  		{name: "LT", controls: 1},
   743  		{name: "LE", controls: 1},
   744  		{name: "GT", controls: 1},
   745  		{name: "GE", controls: 1},
   746  		{name: "FLT", controls: 1},
   747  		{name: "FLE", controls: 1},
   748  		{name: "FGT", controls: 1},
   749  		{name: "FGE", controls: 1},
   750  	}
   751  
   752  	archs = append(archs, arch{
   753  		name:               "PPC64",
   754  		pkg:                "cmd/internal/obj/ppc64",
   755  		genfile:            "../../ppc64/ssa.go",
   756  		ops:                ops,
   757  		blocks:             blocks,
   758  		regnames:           regNamesPPC64,
   759  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   760  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   761  		gpregmask:          gp,
   762  		fpregmask:          fp,
   763  		specialregmask:     xer,
   764  		framepointerreg:    -1,
   765  		linkreg:            -1, // not used
   766  	})
   767  }
   768  

View as plain text