Text file src/crypto/md5/md5block_arm.s

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  // ARM version of md5block.go
     6  
     7  //go:build !purego
     8  
     9  #include "textflag.h"
    10  
    11  // Register definitions
    12  #define Rtable	R0	// Pointer to MD5 constants table
    13  #define Rdata	R1	// Pointer to data to hash
    14  #define Ra	R2	// MD5 accumulator
    15  #define Rb	R3	// MD5 accumulator
    16  #define Rc	R4	// MD5 accumulator
    17  #define Rd	R5	// MD5 accumulator
    18  #define Rc0	R6	// MD5 constant
    19  #define Rc1	R7	// MD5 constant
    20  #define Rc2	R8	// MD5 constant
    21  // r9, r10 are forbidden
    22  // r11 is OK provided you check the assembler that no synthetic instructions use it
    23  #define Rc3	R11	// MD5 constant
    24  #define Rt0	R12	// temporary
    25  #define Rt1	R14	// temporary
    26  
    27  // func block(dig *digest, p []byte)
    28  // 0(FP) is *digest
    29  // 4(FP) is p.array (struct Slice)
    30  // 8(FP) is p.len
    31  //12(FP) is p.cap
    32  //
    33  // Stack frame
    34  #define p_end	end-4(SP)	// pointer to the end of data
    35  #define p_data	data-8(SP)	// current data pointer
    36  #define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
    37  		// 3 words at 4..12(R13) for called routine parameters
    38  
    39  TEXT	·block(SB), NOSPLIT, $84-16
    40  	MOVW	p+4(FP), Rdata	// pointer to the data
    41  	MOVW	p_len+8(FP), Rt0	// number of bytes
    42  	ADD	Rdata, Rt0
    43  	MOVW	Rt0, p_end	// pointer to end of data
    44  
    45  loop:
    46  	MOVW	Rdata, p_data	// Save Rdata
    47  	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
    48  	BEQ	aligned			// aligned detected - skip copy
    49  
    50  	// Copy the unaligned source data into the aligned temporary buffer
    51  	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
    52  	MOVW	$buf, Rtable	// to
    53  	MOVW	$64, Rc0		// n
    54  	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
    55  	BL	runtime·memmove(SB)
    56  
    57  	// Point to the local aligned copy of the data
    58  	MOVW	$buf, Rdata
    59  
    60  aligned:
    61  	// Point to the table of constants
    62  	// A PC relative add would be cheaper than this
    63  	MOVW	$·table(SB), Rtable
    64  
    65  	// Load up initial MD5 accumulator
    66  	MOVW	dig+0(FP), Rc0
    67  	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
    68  
    69  // a += (((c^d)&b)^d) + X[index] + const
    70  // a = a<<shift | a>>(32-shift) + b
    71  #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    72  	EOR	Rc, Rd, Rt0		; \
    73  	AND	Rb, Rt0			; \
    74  	EOR	Rd, Rt0			; \
    75  	MOVW	(index<<2)(Rdata), Rt1	; \
    76  	ADD	Rt1, Rt0			; \
    77  	ADD	Rconst, Rt0			; \
    78  	ADD	Rt0, Ra			; \
    79  	ADD	Ra@>(32-shift), Rb, Ra	;
    80  
    81  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    82  	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
    83  	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
    84  	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
    85  	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
    86  
    87  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    88  	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
    89  	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
    90  	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
    91  	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
    92  
    93  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    94  	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
    95  	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
    96  	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
    97  	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
    98  
    99  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   100  	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
   101  	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
   102  	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
   103  	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
   104  
   105  // a += (((b^c)&d)^c) + X[index] + const
   106  // a = a<<shift | a>>(32-shift) + b
   107  #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   108  	EOR	Rb, Rc, Rt0		; \
   109  	AND	Rd, Rt0			; \
   110  	EOR	Rc, Rt0			; \
   111  	MOVW	(index<<2)(Rdata), Rt1	; \
   112  	ADD	Rt1, Rt0			; \
   113  	ADD	Rconst, Rt0			; \
   114  	ADD	Rt0, Ra			; \
   115  	ADD	Ra@>(32-shift), Rb, Ra	;
   116  
   117  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   118  	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
   119  	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
   120  	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
   121  	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
   122  
   123  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   124  	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
   125  	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
   126  	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
   127  	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
   128  
   129  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   130  	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
   131  	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
   132  	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
   133  	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
   134  
   135  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   136  	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
   137  	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
   138  	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
   139  	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
   140  
   141  // a += (b^c^d) + X[index] + const
   142  // a = a<<shift | a>>(32-shift) + b
   143  #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   144  	EOR	Rb, Rc, Rt0		; \
   145  	EOR	Rd, Rt0			; \
   146  	MOVW	(index<<2)(Rdata), Rt1	; \
   147  	ADD	Rt1, Rt0			; \
   148  	ADD	Rconst, Rt0			; \
   149  	ADD	Rt0, Ra			; \
   150  	ADD	Ra@>(32-shift), Rb, Ra	;
   151  
   152  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   153  	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
   154  	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
   155  	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
   156  	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
   157  
   158  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   159  	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
   160  	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
   161  	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
   162  	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
   163  
   164  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   165  	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
   166  	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
   167  	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
   168  	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
   169  
   170  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   171  	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
   172  	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
   173  	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
   174  	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
   175  
   176  // a += (c^(b|^d)) + X[index] + const
   177  // a = a<<shift | a>>(32-shift) + b
   178  #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   179  	MVN	Rd, Rt0			; \
   180  	ORR	Rb, Rt0			; \
   181  	EOR	Rc, Rt0			; \
   182  	MOVW	(index<<2)(Rdata), Rt1	; \
   183  	ADD	Rt1, Rt0			; \
   184  	ADD	Rconst, Rt0			; \
   185  	ADD	Rt0, Ra			; \
   186  	ADD	Ra@>(32-shift), Rb, Ra	;
   187  
   188  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   189  	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
   190  	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
   191  	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
   192  	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
   193  
   194  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   195  	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
   196  	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
   197  	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
   198  	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
   199  
   200  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   201  	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
   202  	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
   203  	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
   204  	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
   205  
   206  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   207  	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
   208  	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
   209  	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
   210  	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
   211  
   212  	MOVW	dig+0(FP), Rt0
   213  	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
   214  
   215  	ADD	Rc0, Ra
   216  	ADD	Rc1, Rb
   217  	ADD	Rc2, Rc
   218  	ADD	Rc3, Rd
   219  
   220  	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
   221  
   222  	MOVW	p_data, Rdata
   223  	MOVW	p_end, Rt0
   224  	ADD	$64, Rdata
   225  	CMP	Rt0, Rdata
   226  	BLO	loop
   227  
   228  	RET
   229  
   230  // MD5 constants table
   231  
   232  	// Round 1
   233  	DATA	·table+0x00(SB)/4, $0xd76aa478
   234  	DATA	·table+0x04(SB)/4, $0xe8c7b756
   235  	DATA	·table+0x08(SB)/4, $0x242070db
   236  	DATA	·table+0x0c(SB)/4, $0xc1bdceee
   237  	DATA	·table+0x10(SB)/4, $0xf57c0faf
   238  	DATA	·table+0x14(SB)/4, $0x4787c62a
   239  	DATA	·table+0x18(SB)/4, $0xa8304613
   240  	DATA	·table+0x1c(SB)/4, $0xfd469501
   241  	DATA	·table+0x20(SB)/4, $0x698098d8
   242  	DATA	·table+0x24(SB)/4, $0x8b44f7af
   243  	DATA	·table+0x28(SB)/4, $0xffff5bb1
   244  	DATA	·table+0x2c(SB)/4, $0x895cd7be
   245  	DATA	·table+0x30(SB)/4, $0x6b901122
   246  	DATA	·table+0x34(SB)/4, $0xfd987193
   247  	DATA	·table+0x38(SB)/4, $0xa679438e
   248  	DATA	·table+0x3c(SB)/4, $0x49b40821
   249  	// Round 2
   250  	DATA	·table+0x40(SB)/4, $0xf61e2562
   251  	DATA	·table+0x44(SB)/4, $0xc040b340
   252  	DATA	·table+0x48(SB)/4, $0x265e5a51
   253  	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
   254  	DATA	·table+0x50(SB)/4, $0xd62f105d
   255  	DATA	·table+0x54(SB)/4, $0x02441453
   256  	DATA	·table+0x58(SB)/4, $0xd8a1e681
   257  	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
   258  	DATA	·table+0x60(SB)/4, $0x21e1cde6
   259  	DATA	·table+0x64(SB)/4, $0xc33707d6
   260  	DATA	·table+0x68(SB)/4, $0xf4d50d87
   261  	DATA	·table+0x6c(SB)/4, $0x455a14ed
   262  	DATA	·table+0x70(SB)/4, $0xa9e3e905
   263  	DATA	·table+0x74(SB)/4, $0xfcefa3f8
   264  	DATA	·table+0x78(SB)/4, $0x676f02d9
   265  	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
   266  	// Round 3
   267  	DATA	·table+0x80(SB)/4, $0xfffa3942
   268  	DATA	·table+0x84(SB)/4, $0x8771f681
   269  	DATA	·table+0x88(SB)/4, $0x6d9d6122
   270  	DATA	·table+0x8c(SB)/4, $0xfde5380c
   271  	DATA	·table+0x90(SB)/4, $0xa4beea44
   272  	DATA	·table+0x94(SB)/4, $0x4bdecfa9
   273  	DATA	·table+0x98(SB)/4, $0xf6bb4b60
   274  	DATA	·table+0x9c(SB)/4, $0xbebfbc70
   275  	DATA	·table+0xa0(SB)/4, $0x289b7ec6
   276  	DATA	·table+0xa4(SB)/4, $0xeaa127fa
   277  	DATA	·table+0xa8(SB)/4, $0xd4ef3085
   278  	DATA	·table+0xac(SB)/4, $0x04881d05
   279  	DATA	·table+0xb0(SB)/4, $0xd9d4d039
   280  	DATA	·table+0xb4(SB)/4, $0xe6db99e5
   281  	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
   282  	DATA	·table+0xbc(SB)/4, $0xc4ac5665
   283  	// Round 4
   284  	DATA	·table+0xc0(SB)/4, $0xf4292244
   285  	DATA	·table+0xc4(SB)/4, $0x432aff97
   286  	DATA	·table+0xc8(SB)/4, $0xab9423a7
   287  	DATA	·table+0xcc(SB)/4, $0xfc93a039
   288  	DATA	·table+0xd0(SB)/4, $0x655b59c3
   289  	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
   290  	DATA	·table+0xd8(SB)/4, $0xffeff47d
   291  	DATA	·table+0xdc(SB)/4, $0x85845dd1
   292  	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
   293  	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
   294  	DATA	·table+0xe8(SB)/4, $0xa3014314
   295  	DATA	·table+0xec(SB)/4, $0x4e0811a1
   296  	DATA	·table+0xf0(SB)/4, $0xf7537e82
   297  	DATA	·table+0xf4(SB)/4, $0xbd3af235
   298  	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
   299  	DATA	·table+0xfc(SB)/4, $0xeb86d391
   300  	// Global definition
   301  	GLOBL	·table(SB),8,$256
   302  

View as plain text