Text file src/crypto/aes/asm_amd64.s

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    10  TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
    11  	MOVQ nr+0(FP), CX
    12  	MOVQ xk+8(FP), AX
    13  	MOVQ dst+16(FP), DX
    14  	MOVQ src+24(FP), BX
    15  	MOVUPS 0(AX), X1
    16  	MOVUPS 0(BX), X0
    17  	ADDQ $16, AX
    18  	PXOR X1, X0
    19  	SUBQ $12, CX
    20  	JE Lenc192
    21  	JB Lenc128
    22  Lenc256:
    23  	MOVUPS 0(AX), X1
    24  	AESENC X1, X0
    25  	MOVUPS 16(AX), X1
    26  	AESENC X1, X0
    27  	ADDQ $32, AX
    28  Lenc192:
    29  	MOVUPS 0(AX), X1
    30  	AESENC X1, X0
    31  	MOVUPS 16(AX), X1
    32  	AESENC X1, X0
    33  	ADDQ $32, AX
    34  Lenc128:
    35  	MOVUPS 0(AX), X1
    36  	AESENC X1, X0
    37  	MOVUPS 16(AX), X1
    38  	AESENC X1, X0
    39  	MOVUPS 32(AX), X1
    40  	AESENC X1, X0
    41  	MOVUPS 48(AX), X1
    42  	AESENC X1, X0
    43  	MOVUPS 64(AX), X1
    44  	AESENC X1, X0
    45  	MOVUPS 80(AX), X1
    46  	AESENC X1, X0
    47  	MOVUPS 96(AX), X1
    48  	AESENC X1, X0
    49  	MOVUPS 112(AX), X1
    50  	AESENC X1, X0
    51  	MOVUPS 128(AX), X1
    52  	AESENC X1, X0
    53  	MOVUPS 144(AX), X1
    54  	AESENCLAST X1, X0
    55  	MOVUPS X0, 0(DX)
    56  	RET
    57  
    58  // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    59  TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    60  	MOVQ nr+0(FP), CX
    61  	MOVQ xk+8(FP), AX
    62  	MOVQ dst+16(FP), DX
    63  	MOVQ src+24(FP), BX
    64  	MOVUPS 0(AX), X1
    65  	MOVUPS 0(BX), X0
    66  	ADDQ $16, AX
    67  	PXOR X1, X0
    68  	SUBQ $12, CX
    69  	JE Ldec192
    70  	JB Ldec128
    71  Ldec256:
    72  	MOVUPS 0(AX), X1
    73  	AESDEC X1, X0
    74  	MOVUPS 16(AX), X1
    75  	AESDEC X1, X0
    76  	ADDQ $32, AX
    77  Ldec192:
    78  	MOVUPS 0(AX), X1
    79  	AESDEC X1, X0
    80  	MOVUPS 16(AX), X1
    81  	AESDEC X1, X0
    82  	ADDQ $32, AX
    83  Ldec128:
    84  	MOVUPS 0(AX), X1
    85  	AESDEC X1, X0
    86  	MOVUPS 16(AX), X1
    87  	AESDEC X1, X0
    88  	MOVUPS 32(AX), X1
    89  	AESDEC X1, X0
    90  	MOVUPS 48(AX), X1
    91  	AESDEC X1, X0
    92  	MOVUPS 64(AX), X1
    93  	AESDEC X1, X0
    94  	MOVUPS 80(AX), X1
    95  	AESDEC X1, X0
    96  	MOVUPS 96(AX), X1
    97  	AESDEC X1, X0
    98  	MOVUPS 112(AX), X1
    99  	AESDEC X1, X0
   100  	MOVUPS 128(AX), X1
   101  	AESDEC X1, X0
   102  	MOVUPS 144(AX), X1
   103  	AESDECLAST X1, X0
   104  	MOVUPS X0, 0(DX)
   105  	RET
   106  
   107  // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   108  // Note that round keys are stored in uint128 format, not uint32
   109  TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   110  	MOVQ nr+0(FP), CX
   111  	MOVQ key+8(FP), AX
   112  	MOVQ enc+16(FP), BX
   113  	MOVQ dec+24(FP), DX
   114  	MOVUPS (AX), X0
   115  	// enc
   116  	MOVUPS X0, (BX)
   117  	ADDQ $16, BX
   118  	PXOR X4, X4 // _expand_key_* expect X4 to be zero
   119  	CMPL CX, $12
   120  	JE Lexp_enc192
   121  	JB Lexp_enc128
   122  Lexp_enc256:
   123  	MOVUPS 16(AX), X2
   124  	MOVUPS X2, (BX)
   125  	ADDQ $16, BX
   126  	AESKEYGENASSIST $0x01, X2, X1
   127  	CALL _expand_key_256a<>(SB)
   128  	AESKEYGENASSIST $0x01, X0, X1
   129  	CALL _expand_key_256b<>(SB)
   130  	AESKEYGENASSIST $0x02, X2, X1
   131  	CALL _expand_key_256a<>(SB)
   132  	AESKEYGENASSIST $0x02, X0, X1
   133  	CALL _expand_key_256b<>(SB)
   134  	AESKEYGENASSIST $0x04, X2, X1
   135  	CALL _expand_key_256a<>(SB)
   136  	AESKEYGENASSIST $0x04, X0, X1
   137  	CALL _expand_key_256b<>(SB)
   138  	AESKEYGENASSIST $0x08, X2, X1
   139  	CALL _expand_key_256a<>(SB)
   140  	AESKEYGENASSIST $0x08, X0, X1
   141  	CALL _expand_key_256b<>(SB)
   142  	AESKEYGENASSIST $0x10, X2, X1
   143  	CALL _expand_key_256a<>(SB)
   144  	AESKEYGENASSIST $0x10, X0, X1
   145  	CALL _expand_key_256b<>(SB)
   146  	AESKEYGENASSIST $0x20, X2, X1
   147  	CALL _expand_key_256a<>(SB)
   148  	AESKEYGENASSIST $0x20, X0, X1
   149  	CALL _expand_key_256b<>(SB)
   150  	AESKEYGENASSIST $0x40, X2, X1
   151  	CALL _expand_key_256a<>(SB)
   152  	JMP Lexp_dec
   153  Lexp_enc192:
   154  	MOVQ 16(AX), X2
   155  	AESKEYGENASSIST $0x01, X2, X1
   156  	CALL _expand_key_192a<>(SB)
   157  	AESKEYGENASSIST $0x02, X2, X1
   158  	CALL _expand_key_192b<>(SB)
   159  	AESKEYGENASSIST $0x04, X2, X1
   160  	CALL _expand_key_192a<>(SB)
   161  	AESKEYGENASSIST $0x08, X2, X1
   162  	CALL _expand_key_192b<>(SB)
   163  	AESKEYGENASSIST $0x10, X2, X1
   164  	CALL _expand_key_192a<>(SB)
   165  	AESKEYGENASSIST $0x20, X2, X1
   166  	CALL _expand_key_192b<>(SB)
   167  	AESKEYGENASSIST $0x40, X2, X1
   168  	CALL _expand_key_192a<>(SB)
   169  	AESKEYGENASSIST $0x80, X2, X1
   170  	CALL _expand_key_192b<>(SB)
   171  	JMP Lexp_dec
   172  Lexp_enc128:
   173  	AESKEYGENASSIST $0x01, X0, X1
   174  	CALL _expand_key_128<>(SB)
   175  	AESKEYGENASSIST $0x02, X0, X1
   176  	CALL _expand_key_128<>(SB)
   177  	AESKEYGENASSIST $0x04, X0, X1
   178  	CALL _expand_key_128<>(SB)
   179  	AESKEYGENASSIST $0x08, X0, X1
   180  	CALL _expand_key_128<>(SB)
   181  	AESKEYGENASSIST $0x10, X0, X1
   182  	CALL _expand_key_128<>(SB)
   183  	AESKEYGENASSIST $0x20, X0, X1
   184  	CALL _expand_key_128<>(SB)
   185  	AESKEYGENASSIST $0x40, X0, X1
   186  	CALL _expand_key_128<>(SB)
   187  	AESKEYGENASSIST $0x80, X0, X1
   188  	CALL _expand_key_128<>(SB)
   189  	AESKEYGENASSIST $0x1b, X0, X1
   190  	CALL _expand_key_128<>(SB)
   191  	AESKEYGENASSIST $0x36, X0, X1
   192  	CALL _expand_key_128<>(SB)
   193  Lexp_dec:
   194  	// dec
   195  	SUBQ $16, BX
   196  	MOVUPS (BX), X1
   197  	MOVUPS X1, (DX)
   198  	DECQ CX
   199  Lexp_dec_loop:
   200  	MOVUPS -16(BX), X1
   201  	AESIMC X1, X0
   202  	MOVUPS X0, 16(DX)
   203  	SUBQ $16, BX
   204  	ADDQ $16, DX
   205  	DECQ CX
   206  	JNZ Lexp_dec_loop
   207  	MOVUPS -16(BX), X0
   208  	MOVUPS X0, 16(DX)
   209  	RET
   210  
   211  TEXT _expand_key_128<>(SB),NOSPLIT,$0
   212  	PSHUFD $0xff, X1, X1
   213  	SHUFPS $0x10, X0, X4
   214  	PXOR X4, X0
   215  	SHUFPS $0x8c, X0, X4
   216  	PXOR X4, X0
   217  	PXOR X1, X0
   218  	MOVUPS X0, (BX)
   219  	ADDQ $16, BX
   220  	RET
   221  
   222  TEXT _expand_key_192a<>(SB),NOSPLIT,$0
   223  	PSHUFD $0x55, X1, X1
   224  	SHUFPS $0x10, X0, X4
   225  	PXOR X4, X0
   226  	SHUFPS $0x8c, X0, X4
   227  	PXOR X4, X0
   228  	PXOR X1, X0
   229  
   230  	MOVAPS X2, X5
   231  	MOVAPS X2, X6
   232  	PSLLDQ $0x4, X5
   233  	PSHUFD $0xff, X0, X3
   234  	PXOR X3, X2
   235  	PXOR X5, X2
   236  
   237  	MOVAPS X0, X1
   238  	SHUFPS $0x44, X0, X6
   239  	MOVUPS X6, (BX)
   240  	SHUFPS $0x4e, X2, X1
   241  	MOVUPS X1, 16(BX)
   242  	ADDQ $32, BX
   243  	RET
   244  
   245  TEXT _expand_key_192b<>(SB),NOSPLIT,$0
   246  	PSHUFD $0x55, X1, X1
   247  	SHUFPS $0x10, X0, X4
   248  	PXOR X4, X0
   249  	SHUFPS $0x8c, X0, X4
   250  	PXOR X4, X0
   251  	PXOR X1, X0
   252  
   253  	MOVAPS X2, X5
   254  	PSLLDQ $0x4, X5
   255  	PSHUFD $0xff, X0, X3
   256  	PXOR X3, X2
   257  	PXOR X5, X2
   258  
   259  	MOVUPS X0, (BX)
   260  	ADDQ $16, BX
   261  	RET
   262  
   263  TEXT _expand_key_256a<>(SB),NOSPLIT,$0
   264  	JMP _expand_key_128<>(SB)
   265  
   266  TEXT _expand_key_256b<>(SB),NOSPLIT,$0
   267  	PSHUFD $0xaa, X1, X1
   268  	SHUFPS $0x10, X2, X4
   269  	PXOR X4, X2
   270  	SHUFPS $0x8c, X2, X4
   271  	PXOR X4, X2
   272  	PXOR X1, X2
   273  
   274  	MOVUPS X2, (BX)
   275  	ADDQ $16, BX
   276  	RET
   277  

View as plain text