// Code generated by command: go run ctr_amd64_asm.go -out ../../ctr_amd64.s. DO NOT EDIT. //go:build !purego #include "textflag.h" // func ctrBlocks1Asm(nr int, xk *[60]uint32, dst *[16]byte, src *[16]byte, ivlo uint64, ivhi uint64) // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 TEXT ·ctrBlocks1Asm(SB), $0-48 MOVQ nr+0(FP), AX MOVQ xk+8(FP), CX MOVQ dst+16(FP), DX MOVQ src+24(FP), BX MOVQ ivlo+32(FP), SI MOVQ ivhi+40(FP), DI MOVOU bswapMask<>+0(SB), X0 MOVQ SI, X1 PINSRQ $0x01, DI, X1 PSHUFB X0, X1 MOVUPS (CX), X0 PXOR X0, X1 ADDQ $0x10, CX SUBQ $0x0c, AX JE enc192 JB enc128 MOVUPS (CX), X0 AESENC X0, X1 MOVUPS 16(CX), X0 AESENC X0, X1 ADDQ $0x20, CX enc192: MOVUPS (CX), X0 AESENC X0, X1 MOVUPS 16(CX), X0 AESENC X0, X1 ADDQ $0x20, CX enc128: MOVUPS (CX), X0 AESENC X0, X1 MOVUPS 16(CX), X0 AESENC X0, X1 MOVUPS 32(CX), X0 AESENC X0, X1 MOVUPS 48(CX), X0 AESENC X0, X1 MOVUPS 64(CX), X0 AESENC X0, X1 MOVUPS 80(CX), X0 AESENC X0, X1 MOVUPS 96(CX), X0 AESENC X0, X1 MOVUPS 112(CX), X0 AESENC X0, X1 MOVUPS 128(CX), X0 AESENC X0, X1 MOVUPS 144(CX), X0 AESENCLAST X0, X1 MOVUPS (BX), X0 PXOR X1, X0 MOVUPS X0, (DX) RET DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f DATA bswapMask<>+8(SB)/8, $0x0001020304050607 GLOBL bswapMask<>(SB), RODATA|NOPTR, $16 // func ctrBlocks2Asm(nr int, xk *[60]uint32, dst *[32]byte, src *[32]byte, ivlo uint64, ivhi uint64) // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 TEXT ·ctrBlocks2Asm(SB), $0-48 MOVQ nr+0(FP), AX MOVQ xk+8(FP), CX MOVQ dst+16(FP), DX MOVQ src+24(FP), BX MOVQ ivlo+32(FP), SI MOVQ ivhi+40(FP), DI MOVOU bswapMask<>+0(SB), X0 MOVQ SI, X1 PINSRQ $0x01, DI, X1 PSHUFB X0, X1 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X2 PINSRQ $0x01, DI, X2 PSHUFB X0, X2 MOVUPS (CX), X0 PXOR X0, X1 PXOR X0, X2 ADDQ $0x10, CX SUBQ $0x0c, AX JE enc192 JB enc128 MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 ADDQ $0x20, CX enc192: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 ADDQ $0x20, CX enc128: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 32(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 48(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 64(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 80(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 96(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 112(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 128(CX), X0 AESENC X0, X1 AESENC X0, X2 MOVUPS 144(CX), X0 AESENCLAST X0, X1 AESENCLAST X0, X2 MOVUPS (BX), X0 PXOR X1, X0 MOVUPS X0, (DX) MOVUPS 16(BX), X0 PXOR X2, X0 MOVUPS X0, 16(DX) RET // func ctrBlocks4Asm(nr int, xk *[60]uint32, dst *[64]byte, src *[64]byte, ivlo uint64, ivhi uint64) // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 TEXT ·ctrBlocks4Asm(SB), $0-48 MOVQ nr+0(FP), AX MOVQ xk+8(FP), CX MOVQ dst+16(FP), DX MOVQ src+24(FP), BX MOVQ ivlo+32(FP), SI MOVQ ivhi+40(FP), DI MOVOU bswapMask<>+0(SB), X0 MOVQ SI, X1 PINSRQ $0x01, DI, X1 PSHUFB X0, X1 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X2 PINSRQ $0x01, DI, X2 PSHUFB X0, X2 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X3 PINSRQ $0x01, DI, X3 PSHUFB X0, X3 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X4 PINSRQ $0x01, DI, X4 PSHUFB X0, X4 MOVUPS (CX), X0 PXOR X0, X1 PXOR X0, X2 PXOR X0, X3 PXOR X0, X4 ADDQ $0x10, CX SUBQ $0x0c, AX JE enc192 JB enc128 MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 ADDQ $0x20, CX enc192: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 ADDQ $0x20, CX enc128: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 32(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 48(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 64(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 80(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 96(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 112(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 128(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 MOVUPS 144(CX), X0 AESENCLAST X0, X1 AESENCLAST X0, X2 AESENCLAST X0, X3 AESENCLAST X0, X4 MOVUPS (BX), X0 PXOR X1, X0 MOVUPS X0, (DX) MOVUPS 16(BX), X0 PXOR X2, X0 MOVUPS X0, 16(DX) MOVUPS 32(BX), X0 PXOR X3, X0 MOVUPS X0, 32(DX) MOVUPS 48(BX), X0 PXOR X4, X0 MOVUPS X0, 48(DX) RET // func ctrBlocks8Asm(nr int, xk *[60]uint32, dst *[128]byte, src *[128]byte, ivlo uint64, ivhi uint64) // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 TEXT ·ctrBlocks8Asm(SB), $0-48 MOVQ nr+0(FP), AX MOVQ xk+8(FP), CX MOVQ dst+16(FP), DX MOVQ src+24(FP), BX MOVQ ivlo+32(FP), SI MOVQ ivhi+40(FP), DI MOVOU bswapMask<>+0(SB), X0 MOVQ SI, X1 PINSRQ $0x01, DI, X1 PSHUFB X0, X1 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X2 PINSRQ $0x01, DI, X2 PSHUFB X0, X2 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X3 PINSRQ $0x01, DI, X3 PSHUFB X0, X3 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X4 PINSRQ $0x01, DI, X4 PSHUFB X0, X4 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X5 PINSRQ $0x01, DI, X5 PSHUFB X0, X5 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X6 PINSRQ $0x01, DI, X6 PSHUFB X0, X6 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X7 PINSRQ $0x01, DI, X7 PSHUFB X0, X7 ADDQ $0x01, SI ADCQ $0x00, DI MOVQ SI, X8 PINSRQ $0x01, DI, X8 PSHUFB X0, X8 MOVUPS (CX), X0 PXOR X0, X1 PXOR X0, X2 PXOR X0, X3 PXOR X0, X4 PXOR X0, X5 PXOR X0, X6 PXOR X0, X7 PXOR X0, X8 ADDQ $0x10, CX SUBQ $0x0c, AX JE enc192 JB enc128 MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 ADDQ $0x20, CX enc192: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 ADDQ $0x20, CX enc128: MOVUPS (CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 16(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 32(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 48(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 64(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 80(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 96(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 112(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 128(CX), X0 AESENC X0, X1 AESENC X0, X2 AESENC X0, X3 AESENC X0, X4 AESENC X0, X5 AESENC X0, X6 AESENC X0, X7 AESENC X0, X8 MOVUPS 144(CX), X0 AESENCLAST X0, X1 AESENCLAST X0, X2 AESENCLAST X0, X3 AESENCLAST X0, X4 AESENCLAST X0, X5 AESENCLAST X0, X6 AESENCLAST X0, X7 AESENCLAST X0, X8 MOVUPS (BX), X0 PXOR X1, X0 MOVUPS X0, (DX) MOVUPS 16(BX), X0 PXOR X2, X0 MOVUPS X0, 16(DX) MOVUPS 32(BX), X0 PXOR X3, X0 MOVUPS X0, 32(DX) MOVUPS 48(BX), X0 PXOR X4, X0 MOVUPS X0, 48(DX) MOVUPS 64(BX), X0 PXOR X5, X0 MOVUPS X0, 64(DX) MOVUPS 80(BX), X0 PXOR X6, X0 MOVUPS X0, 80(DX) MOVUPS 96(BX), X0 PXOR X7, X0 MOVUPS X0, 96(DX) MOVUPS 112(BX), X0 PXOR X8, X0 MOVUPS X0, 112(DX) RET