Text file src/crypto/internal/bigmod/nat_ppc64x.s

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego && (ppc64 || ppc64le)
     6  
     7  #include "textflag.h"
     8  
     9  // func addMulVVW1024(z, x *uint, y uint) (c uint)
    10  TEXT ·addMulVVW1024(SB), $0-32
    11  	MOVD	$4, R6 // R6 = z_len/4
    12  	JMP		addMulVVWx<>(SB)
    13  
    14  // func addMulVVW1536(z, x *uint, y uint) (c uint)
    15  TEXT ·addMulVVW1536(SB), $0-32
    16  	MOVD	$6, R6 // R6 = z_len/4
    17  	JMP		addMulVVWx<>(SB)
    18  
    19  // func addMulVVW2048(z, x *uint, y uint) (c uint)
    20  TEXT ·addMulVVW2048(SB), $0-32
    21  	MOVD	$8, R6 // R6 = z_len/4
    22  	JMP		addMulVVWx<>(SB)
    23  
    24  // This local function expects to be called only by
    25  // callers above. R6 contains the z length/4
    26  // since 4 values are processed for each
    27  // loop iteration, and is guaranteed to be > 0.
    28  // If other callers are added this function might
    29  // need to change.
    30  TEXT addMulVVWx<>(SB), NOSPLIT, $0
    31  	MOVD	z+0(FP), R3
    32  	MOVD	x+8(FP), R4
    33  	MOVD	y+16(FP), R5
    34  
    35  	MOVD	$0, R9		// R9 = c = 0
    36  	MOVD	R6, CTR		// Initialize loop counter
    37  	PCALIGN	$16
    38  
    39  loop:
    40  	MOVD	0(R4), R14	// x[i]
    41  	MOVD	8(R4), R16	// x[i+1]
    42  	MOVD	16(R4), R18	// x[i+2]
    43  	MOVD	24(R4), R20	// x[i+3]
    44  	MOVD	0(R3), R15	// z[i]
    45  	MOVD	8(R3), R17	// z[i+1]
    46  	MOVD	16(R3), R19	// z[i+2]
    47  	MOVD	24(R3), R21	// z[i+3]
    48  	MULLD	R5, R14, R10	// low x[i]*y
    49  	MULHDU	R5, R14, R11	// high x[i]*y
    50  	ADDC	R15, R10
    51  	ADDZE	R11
    52  	ADDC	R9, R10
    53  	ADDZE	R11, R9
    54  	MULLD	R5, R16, R14	// low x[i+1]*y
    55  	MULHDU	R5, R16, R15	// high x[i+1]*y
    56  	ADDC	R17, R14
    57  	ADDZE	R15
    58  	ADDC	R9, R14
    59  	ADDZE	R15, R9
    60  	MULLD	R5, R18, R16	// low x[i+2]*y
    61  	MULHDU	R5, R18, R17	// high x[i+2]*y
    62  	ADDC	R19, R16
    63  	ADDZE	R17
    64  	ADDC	R9, R16
    65  	ADDZE	R17, R9
    66  	MULLD	R5, R20, R18	// low x[i+3]*y
    67  	MULHDU	R5, R20, R19	// high x[i+3]*y
    68  	ADDC	R21, R18
    69  	ADDZE	R19
    70  	ADDC	R9, R18
    71  	ADDZE	R19, R9
    72  	MOVD	R10, 0(R3)	// z[i]
    73  	MOVD	R14, 8(R3)	// z[i+1]
    74  	MOVD	R16, 16(R3)	// z[i+2]
    75  	MOVD	R18, 24(R3)	// z[i+3]
    76  	ADD	$32, R3
    77  	ADD	$32, R4
    78  	BDNZ	loop
    79  
    80  done:
    81  	MOVD	R9, c+24(FP)
    82  	RET
    83  

View as plain text