Text file src/internal/runtime/gc/scan/expand_amd64.s

     1  // Code generated by mkasm.go. DO NOT EDIT.
     2  
     3  #include "go_asm.h"
     4  #include "textflag.h"
     5  
     6  GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
     7  DATA  ·gcExpandersAVX512+0x00(SB)/8, $0
     8  DATA  ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
     9  DATA  ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
    10  DATA  ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
    11  DATA  ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
    12  DATA  ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
    13  DATA  ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
    14  DATA  ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
    15  DATA  ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
    16  DATA  ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
    17  DATA  ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
    18  DATA  ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
    19  DATA  ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
    20  DATA  ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
    21  DATA  ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
    22  DATA  ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
    23  DATA  ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
    24  DATA  ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
    25  DATA  ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
    26  DATA  ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
    27  DATA  ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
    28  DATA  ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
    29  DATA  ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
    30  DATA  ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
    31  DATA  ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
    32  DATA  ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
    33  DATA  ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
    34  DATA  ·gcExpandersAVX512+0xd8(SB)/8, $0
    35  DATA  ·gcExpandersAVX512+0xe0(SB)/8, $0
    36  DATA  ·gcExpandersAVX512+0xe8(SB)/8, $0
    37  DATA  ·gcExpandersAVX512+0xf0(SB)/8, $0
    38  DATA  ·gcExpandersAVX512+0xf8(SB)/8, $0
    39  DATA  ·gcExpandersAVX512+0x100(SB)/8, $0
    40  DATA  ·gcExpandersAVX512+0x108(SB)/8, $0
    41  DATA  ·gcExpandersAVX512+0x110(SB)/8, $0
    42  DATA  ·gcExpandersAVX512+0x118(SB)/8, $0
    43  DATA  ·gcExpandersAVX512+0x120(SB)/8, $0
    44  DATA  ·gcExpandersAVX512+0x128(SB)/8, $0
    45  DATA  ·gcExpandersAVX512+0x130(SB)/8, $0
    46  DATA  ·gcExpandersAVX512+0x138(SB)/8, $0
    47  DATA  ·gcExpandersAVX512+0x140(SB)/8, $0
    48  DATA  ·gcExpandersAVX512+0x148(SB)/8, $0
    49  DATA  ·gcExpandersAVX512+0x150(SB)/8, $0
    50  DATA  ·gcExpandersAVX512+0x158(SB)/8, $0
    51  DATA  ·gcExpandersAVX512+0x160(SB)/8, $0
    52  DATA  ·gcExpandersAVX512+0x168(SB)/8, $0
    53  DATA  ·gcExpandersAVX512+0x170(SB)/8, $0
    54  DATA  ·gcExpandersAVX512+0x178(SB)/8, $0
    55  DATA  ·gcExpandersAVX512+0x180(SB)/8, $0
    56  DATA  ·gcExpandersAVX512+0x188(SB)/8, $0
    57  DATA  ·gcExpandersAVX512+0x190(SB)/8, $0
    58  DATA  ·gcExpandersAVX512+0x198(SB)/8, $0
    59  DATA  ·gcExpandersAVX512+0x1a0(SB)/8, $0
    60  DATA  ·gcExpandersAVX512+0x1a8(SB)/8, $0
    61  DATA  ·gcExpandersAVX512+0x1b0(SB)/8, $0
    62  DATA  ·gcExpandersAVX512+0x1b8(SB)/8, $0
    63  DATA  ·gcExpandersAVX512+0x1c0(SB)/8, $0
    64  DATA  ·gcExpandersAVX512+0x1c8(SB)/8, $0
    65  DATA  ·gcExpandersAVX512+0x1d0(SB)/8, $0
    66  DATA  ·gcExpandersAVX512+0x1d8(SB)/8, $0
    67  DATA  ·gcExpandersAVX512+0x1e0(SB)/8, $0
    68  DATA  ·gcExpandersAVX512+0x1e8(SB)/8, $0
    69  DATA  ·gcExpandersAVX512+0x1f0(SB)/8, $0
    70  DATA  ·gcExpandersAVX512+0x1f8(SB)/8, $0
    71  DATA  ·gcExpandersAVX512+0x200(SB)/8, $0
    72  DATA  ·gcExpandersAVX512+0x208(SB)/8, $0
    73  DATA  ·gcExpandersAVX512+0x210(SB)/8, $0
    74  DATA  ·gcExpandersAVX512+0x218(SB)/8, $0
    75  
    76  TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
    77  	VMOVDQU64 (AX), Z1
    78  	VMOVDQU64 64(AX), Z2
    79  	RET
    80  
    81  GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
    82  DATA  expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
    83  DATA  expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
    84  DATA  expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
    85  DATA  expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
    86  DATA  expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
    87  DATA  expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
    88  DATA  expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
    89  DATA  expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
    90  
    91  GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
    92  DATA  expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
    93  DATA  expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
    94  DATA  expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
    95  DATA  expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
    96  DATA  expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
    97  DATA  expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
    98  DATA  expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
    99  DATA  expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
   100  
   101  GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
   102  DATA  expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
   103  DATA  expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
   104  DATA  expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
   105  DATA  expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
   106  DATA  expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
   107  DATA  expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
   108  DATA  expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
   109  DATA  expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
   110  
   111  GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
   112  DATA  expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
   113  DATA  expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
   114  DATA  expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
   115  DATA  expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
   116  DATA  expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
   117  DATA  expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
   118  DATA  expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
   119  DATA  expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
   120  
   121  TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
   122  	VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
   123  	VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
   124  	VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
   125  	VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
   126  	VMOVDQU64 (AX), Z4
   127  	VPERMB Z4, Z0, Z0
   128  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   129  	VPERMB Z4, Z2, Z2
   130  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   131  	VPERMB Z0, Z3, Z1
   132  	VPERMB Z2, Z3, Z2
   133  	RET
   134  
   135  GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
   136  DATA  expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   137  DATA  expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   138  DATA  expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   139  DATA  expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   140  DATA  expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   141  DATA  expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   142  DATA  expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
   143  DATA  expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
   144  
   145  GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
   146  DATA  expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
   147  DATA  expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
   148  DATA  expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
   149  DATA  expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
   150  DATA  expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
   151  DATA  expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
   152  DATA  expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
   153  DATA  expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
   154  
   155  GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
   156  DATA  expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
   157  DATA  expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
   158  DATA  expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
   159  DATA  expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
   160  DATA  expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
   161  DATA  expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
   162  DATA  expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
   163  DATA  expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
   164  
   165  GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
   166  DATA  expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
   167  DATA  expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
   168  DATA  expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
   169  DATA  expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
   170  DATA  expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
   171  DATA  expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
   172  DATA  expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   173  DATA  expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   174  
   175  GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
   176  DATA  expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
   177  DATA  expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
   178  DATA  expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
   179  DATA  expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
   180  DATA  expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
   181  DATA  expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
   182  DATA  expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
   183  DATA  expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
   184  
   185  GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
   186  DATA  expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
   187  DATA  expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
   188  DATA  expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
   189  DATA  expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
   190  DATA  expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
   191  DATA  expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
   192  DATA  expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
   193  DATA  expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
   194  
   195  TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
   196  	VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
   197  	VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
   198  	VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
   199  	VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
   200  	VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
   201  	VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
   202  	VMOVDQU64 (AX), Z6
   203  	VPERMB Z6, Z0, Z0
   204  	VGF2P8AFFINEQB $0, Z3, Z0, Z0
   205  	VPERMB Z6, Z4, Z4
   206  	VGF2P8AFFINEQB $0, Z3, Z4, Z4
   207  	VPERMB Z6, Z5, Z5
   208  	VGF2P8AFFINEQB $0, Z3, Z5, Z3
   209  	VPERMI2B Z4, Z0, Z1
   210  	VPERMI2B Z3, Z4, Z2
   211  	RET
   212  
   213  GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
   214  DATA  expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   215  DATA  expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   216  DATA  expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   217  DATA  expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   218  DATA  expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   219  DATA  expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   220  DATA  expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
   221  DATA  expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
   222  
   223  GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
   224  DATA  expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
   225  DATA  expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
   226  DATA  expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
   227  DATA  expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
   228  DATA  expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
   229  DATA  expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
   230  DATA  expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
   231  DATA  expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
   232  
   233  GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
   234  DATA  expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
   235  DATA  expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
   236  DATA  expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
   237  DATA  expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
   238  DATA  expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
   239  DATA  expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
   240  DATA  expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
   241  DATA  expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
   242  
   243  GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
   244  DATA  expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
   245  DATA  expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
   246  DATA  expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
   247  DATA  expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
   248  DATA  expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
   249  DATA  expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
   250  DATA  expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
   251  DATA  expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
   252  
   253  TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
   254  	VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
   255  	VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
   256  	VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
   257  	VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
   258  	VMOVDQU64 (AX), Z4
   259  	VPERMB Z4, Z0, Z0
   260  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   261  	VPERMB Z4, Z2, Z2
   262  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   263  	VPERMB Z0, Z3, Z1
   264  	VPERMB Z2, Z3, Z2
   265  	RET
   266  
   267  GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
   268  DATA  expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   269  DATA  expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   270  DATA  expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   271  DATA  expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   272  DATA  expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
   273  DATA  expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
   274  DATA  expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
   275  DATA  expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
   276  
   277  GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
   278  DATA  expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
   279  DATA  expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
   280  DATA  expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
   281  DATA  expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
   282  DATA  expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
   283  DATA  expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
   284  DATA  expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
   285  DATA  expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
   286  
   287  GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
   288  DATA  expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
   289  DATA  expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
   290  DATA  expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
   291  DATA  expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   292  DATA  expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   293  DATA  expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   294  DATA  expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
   295  DATA  expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
   296  
   297  GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
   298  DATA  expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
   299  DATA  expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
   300  DATA  expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
   301  DATA  expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
   302  DATA  expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
   303  DATA  expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
   304  DATA  expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   305  DATA  expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   306  
   307  GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
   308  DATA  expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
   309  DATA  expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
   310  DATA  expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
   311  DATA  expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
   312  DATA  expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
   313  DATA  expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
   314  DATA  expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
   315  DATA  expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
   316  
   317  GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
   318  DATA  expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
   319  DATA  expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
   320  DATA  expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
   321  DATA  expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
   322  DATA  expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
   323  DATA  expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
   324  DATA  expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
   325  DATA  expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
   326  
   327  TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
   328  	VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
   329  	VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
   330  	VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
   331  	VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
   332  	VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
   333  	VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
   334  	VMOVDQU64 (AX), Z6
   335  	VPERMB Z6, Z0, Z0
   336  	VGF2P8AFFINEQB $0, Z3, Z0, Z0
   337  	VPERMB Z6, Z4, Z4
   338  	VGF2P8AFFINEQB $0, Z3, Z4, Z4
   339  	VPERMB Z6, Z5, Z5
   340  	VGF2P8AFFINEQB $0, Z3, Z5, Z3
   341  	VPERMI2B Z4, Z0, Z1
   342  	VPERMI2B Z3, Z4, Z2
   343  	RET
   344  
   345  GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
   346  DATA  expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
   347  DATA  expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
   348  DATA  expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
   349  DATA  expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
   350  DATA  expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
   351  DATA  expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
   352  DATA  expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
   353  DATA  expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
   354  
   355  GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
   356  DATA  expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
   357  DATA  expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
   358  DATA  expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
   359  DATA  expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
   360  DATA  expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
   361  DATA  expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
   362  DATA  expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
   363  DATA  expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
   364  
   365  GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
   366  DATA  expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
   367  DATA  expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
   368  DATA  expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
   369  DATA  expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
   370  DATA  expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
   371  DATA  expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
   372  DATA  expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
   373  DATA  expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
   374  
   375  GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
   376  DATA  expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
   377  DATA  expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
   378  DATA  expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
   379  DATA  expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
   380  DATA  expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
   381  DATA  expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
   382  DATA  expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
   383  DATA  expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
   384  
   385  TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
   386  	VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
   387  	VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
   388  	VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
   389  	VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
   390  	VMOVDQU64 (AX), Z4
   391  	VPERMB Z4, Z0, Z0
   392  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   393  	VPERMB Z4, Z2, Z2
   394  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   395  	VPERMB Z0, Z3, Z1
   396  	VPERMB Z2, Z3, Z2
   397  	RET
   398  
   399  GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
   400  DATA  expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
   401  DATA  expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
   402  DATA  expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
   403  DATA  expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
   404  DATA  expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
   405  DATA  expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
   406  DATA  expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
   407  DATA  expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
   408  
   409  GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
   410  DATA  expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
   411  DATA  expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
   412  DATA  expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
   413  DATA  expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
   414  DATA  expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
   415  DATA  expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
   416  DATA  expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
   417  DATA  expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
   418  
   419  GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
   420  DATA  expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
   421  DATA  expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
   422  DATA  expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
   423  DATA  expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
   424  DATA  expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
   425  DATA  expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
   426  DATA  expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
   427  DATA  expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
   428  
   429  GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
   430  DATA  expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
   431  DATA  expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
   432  DATA  expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
   433  DATA  expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
   434  DATA  expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
   435  DATA  expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
   436  DATA  expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
   437  DATA  expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
   438  
   439  GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
   440  DATA  expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
   441  DATA  expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
   442  DATA  expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
   443  DATA  expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
   444  DATA  expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
   445  DATA  expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
   446  DATA  expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
   447  DATA  expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
   448  
   449  GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
   450  DATA  expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
   451  DATA  expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
   452  DATA  expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
   453  DATA  expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
   454  DATA  expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
   455  DATA  expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
   456  DATA  expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
   457  DATA  expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
   458  
   459  GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
   460  DATA  expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
   461  DATA  expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
   462  DATA  expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
   463  DATA  expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
   464  DATA  expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
   465  DATA  expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
   466  DATA  expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
   467  DATA  expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
   468  
   469  GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
   470  DATA  expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
   471  DATA  expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
   472  DATA  expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
   473  DATA  expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
   474  DATA  expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
   475  DATA  expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
   476  DATA  expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
   477  DATA  expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
   478  
   479  TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
   480  	VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
   481  	VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
   482  	VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
   483  	VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
   484  	VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
   485  	VMOVDQU64 (AX), Z5
   486  	VPERMB Z5, Z0, Z0
   487  	VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
   488  	VPERMB Z5, Z3, Z3
   489  	VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
   490  	VPERMB Z5, Z4, Z4
   491  	VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
   492  	VPERMI2B Z3, Z0, Z1
   493  	VPERMI2B Z4, Z3, Z2
   494  	RET
   495  
   496  GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
   497  DATA  expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
   498  DATA  expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
   499  DATA  expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
   500  DATA  expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
   501  DATA  expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
   502  DATA  expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
   503  DATA  expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
   504  DATA  expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
   505  
   506  GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
   507  DATA  expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
   508  DATA  expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
   509  DATA  expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
   510  DATA  expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
   511  DATA  expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
   512  DATA  expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
   513  DATA  expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
   514  DATA  expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
   515  
   516  GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
   517  DATA  expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
   518  DATA  expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
   519  DATA  expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
   520  DATA  expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
   521  DATA  expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
   522  DATA  expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
   523  DATA  expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
   524  DATA  expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
   525  
   526  GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
   527  DATA  expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
   528  DATA  expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
   529  DATA  expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
   530  DATA  expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
   531  DATA  expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
   532  DATA  expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
   533  DATA  expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
   534  DATA  expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
   535  
   536  GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
   537  DATA  expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
   538  DATA  expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
   539  DATA  expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
   540  DATA  expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
   541  DATA  expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
   542  DATA  expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
   543  DATA  expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
   544  DATA  expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
   545  
   546  GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
   547  DATA  expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
   548  DATA  expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
   549  DATA  expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
   550  DATA  expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
   551  DATA  expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
   552  DATA  expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
   553  DATA  expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
   554  DATA  expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
   555  
   556  GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
   557  DATA  expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
   558  DATA  expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
   559  DATA  expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
   560  DATA  expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
   561  DATA  expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
   562  DATA  expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
   563  DATA  expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
   564  DATA  expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
   565  
   566  GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
   567  DATA  expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
   568  DATA  expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
   569  DATA  expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
   570  DATA  expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
   571  DATA  expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
   572  DATA  expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
   573  DATA  expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
   574  DATA  expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
   575  
   576  TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
   577  	VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
   578  	VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
   579  	VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
   580  	VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
   581  	VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
   582  	VMOVDQU64 (AX), Z5
   583  	VPERMB Z5, Z0, Z0
   584  	VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
   585  	VPERMB Z5, Z3, Z3
   586  	VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
   587  	VPERMB Z5, Z4, Z4
   588  	VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
   589  	VPERMI2B Z3, Z0, Z1
   590  	VPERMI2B Z4, Z3, Z2
   591  	RET
   592  
   593  GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
   594  DATA  expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
   595  DATA  expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
   596  DATA  expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
   597  DATA  expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
   598  DATA  expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
   599  DATA  expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
   600  DATA  expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
   601  DATA  expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
   602  
   603  GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
   604  DATA  expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
   605  DATA  expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
   606  DATA  expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
   607  DATA  expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
   608  DATA  expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
   609  DATA  expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
   610  DATA  expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
   611  DATA  expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
   612  
   613  GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
   614  DATA  expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
   615  DATA  expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
   616  DATA  expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
   617  DATA  expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
   618  DATA  expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
   619  DATA  expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
   620  DATA  expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
   621  DATA  expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
   622  
   623  GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
   624  DATA  expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
   625  DATA  expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
   626  DATA  expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
   627  DATA  expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
   628  DATA  expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
   629  DATA  expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
   630  DATA  expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
   631  DATA  expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
   632  
   633  GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
   634  DATA  expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
   635  DATA  expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
   636  DATA  expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
   637  DATA  expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
   638  DATA  expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
   639  DATA  expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
   640  DATA  expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
   641  DATA  expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
   642  
   643  GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
   644  DATA  expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
   645  DATA  expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
   646  DATA  expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
   647  DATA  expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
   648  DATA  expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
   649  DATA  expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
   650  DATA  expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
   651  DATA  expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
   652  
   653  GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
   654  DATA  expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
   655  DATA  expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
   656  DATA  expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
   657  DATA  expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
   658  DATA  expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
   659  DATA  expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
   660  DATA  expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
   661  DATA  expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
   662  
   663  GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
   664  DATA  expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
   665  DATA  expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
   666  DATA  expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
   667  DATA  expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
   668  DATA  expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
   669  DATA  expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
   670  DATA  expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
   671  DATA  expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
   672  
   673  GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
   674  DATA  expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
   675  DATA  expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
   676  DATA  expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
   677  DATA  expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
   678  DATA  expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
   679  DATA  expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
   680  DATA  expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
   681  DATA  expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
   682  
   683  GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
   684  DATA  expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
   685  DATA  expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
   686  DATA  expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
   687  DATA  expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
   688  DATA  expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
   689  DATA  expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
   690  DATA  expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
   691  DATA  expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
   692  
   693  GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
   694  DATA  expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
   695  DATA  expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
   696  DATA  expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
   697  DATA  expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
   698  DATA  expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
   699  DATA  expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
   700  DATA  expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
   701  DATA  expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
   702  
   703  TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
   704  	VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
   705  	VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
   706  	VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
   707  	VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
   708  	VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
   709  	VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
   710  	VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
   711  	VMOVDQU64 (AX), Z7
   712  	VPERMB Z7, Z0, Z0
   713  	VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
   714  	VPERMB Z7, Z2, Z2
   715  	VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
   716  	VPERMB Z7, Z3, Z3
   717  	VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
   718  	VPERMB Z7, Z4, Z4
   719  	VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
   720  	VPERMI2B Z2, Z0, Z1
   721  	MOVQ $0xff0ffc3ff0ffc3ff, AX
   722  	KMOVQ AX, K1
   723  	VPERMI2B.Z Z3, Z2, K1, Z5
   724  	MOVQ $0xf003c00f003c00, AX
   725  	KMOVQ AX, K1
   726  	VPERMB.Z Z4, Z6, K1, Z0
   727  	VPORQ Z0, Z5, Z2
   728  	RET
   729  
   730  GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
   731  DATA  expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   732  DATA  expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
   733  DATA  expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
   734  DATA  expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
   735  DATA  expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
   736  DATA  expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
   737  DATA  expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
   738  DATA  expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
   739  
   740  GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
   741  DATA  expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
   742  DATA  expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
   743  DATA  expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
   744  DATA  expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
   745  DATA  expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
   746  DATA  expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
   747  DATA  expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
   748  DATA  expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
   749  
   750  GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
   751  DATA  expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
   752  DATA  expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
   753  DATA  expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
   754  DATA  expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
   755  DATA  expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
   756  DATA  expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
   757  DATA  expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
   758  DATA  expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
   759  
   760  GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
   761  DATA  expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
   762  DATA  expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
   763  DATA  expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
   764  DATA  expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
   765  DATA  expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
   766  DATA  expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
   767  DATA  expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
   768  DATA  expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
   769  
   770  TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
   771  	VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
   772  	VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
   773  	VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
   774  	VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
   775  	VMOVDQU64 (AX), Z4
   776  	VPERMB Z4, Z0, Z0
   777  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
   778  	VPERMB Z4, Z2, Z2
   779  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
   780  	VPERMB Z0, Z3, Z1
   781  	VPERMB Z2, Z3, Z2
   782  	RET
   783  
   784  GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
   785  DATA  expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   786  DATA  expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
   787  DATA  expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
   788  DATA  expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
   789  DATA  expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
   790  DATA  expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
   791  DATA  expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
   792  DATA  expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
   793  
   794  GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
   795  DATA  expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
   796  DATA  expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
   797  DATA  expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
   798  DATA  expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
   799  DATA  expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
   800  DATA  expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
   801  DATA  expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
   802  DATA  expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
   803  
   804  GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
   805  DATA  expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
   806  DATA  expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
   807  DATA  expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
   808  DATA  expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
   809  DATA  expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
   810  DATA  expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
   811  DATA  expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
   812  DATA  expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
   813  
   814  GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
   815  DATA  expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
   816  DATA  expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
   817  DATA  expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
   818  DATA  expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
   819  DATA  expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
   820  DATA  expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
   821  DATA  expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
   822  DATA  expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
   823  
   824  GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
   825  DATA  expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
   826  DATA  expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
   827  DATA  expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
   828  DATA  expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
   829  DATA  expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
   830  DATA  expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
   831  DATA  expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
   832  DATA  expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
   833  
   834  GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
   835  DATA  expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
   836  DATA  expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
   837  DATA  expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
   838  DATA  expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
   839  DATA  expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
   840  DATA  expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
   841  DATA  expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
   842  DATA  expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
   843  
   844  GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
   845  DATA  expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
   846  DATA  expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
   847  DATA  expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
   848  DATA  expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
   849  DATA  expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
   850  DATA  expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
   851  DATA  expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
   852  DATA  expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
   853  
   854  GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
   855  DATA  expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
   856  DATA  expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
   857  DATA  expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
   858  DATA  expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
   859  DATA  expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
   860  DATA  expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
   861  DATA  expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
   862  DATA  expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
   863  
   864  GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
   865  DATA  expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
   866  DATA  expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
   867  DATA  expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
   868  DATA  expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
   869  DATA  expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
   870  DATA  expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
   871  DATA  expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
   872  DATA  expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
   873  
   874  GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
   875  DATA  expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
   876  DATA  expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
   877  DATA  expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
   878  DATA  expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
   879  DATA  expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
   880  DATA  expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
   881  DATA  expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
   882  DATA  expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
   883  
   884  GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
   885  DATA  expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
   886  DATA  expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
   887  DATA  expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
   888  DATA  expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
   889  DATA  expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
   890  DATA  expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
   891  DATA  expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
   892  DATA  expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
   893  
   894  TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
   895  	VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
   896  	VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
   897  	VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
   898  	VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
   899  	VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
   900  	VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
   901  	VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
   902  	VMOVDQU64 (AX), Z7
   903  	VPERMB Z7, Z0, Z0
   904  	VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
   905  	VPERMB Z7, Z2, Z2
   906  	VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
   907  	VPERMB Z7, Z3, Z3
   908  	VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
   909  	VPERMB Z7, Z4, Z4
   910  	VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
   911  	VPERMI2B Z2, Z0, Z1
   912  	MOVQ $0xffe0fff83ffe0fff, AX
   913  	KMOVQ AX, K1
   914  	VPERMI2B.Z Z3, Z2, K1, Z5
   915  	MOVQ $0x1f0007c001f000, AX
   916  	KMOVQ AX, K1
   917  	VPERMB.Z Z4, Z6, K1, Z0
   918  	VPORQ Z0, Z5, Z2
   919  	RET
   920  
   921  GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
   922  DATA  expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
   923  DATA  expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
   924  DATA  expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
   925  DATA  expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
   926  DATA  expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
   927  DATA  expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
   928  DATA  expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
   929  DATA  expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
   930  
   931  GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
   932  DATA  expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
   933  DATA  expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
   934  DATA  expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
   935  DATA  expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
   936  DATA  expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
   937  DATA  expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
   938  DATA  expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
   939  DATA  expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
   940  
   941  GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
   942  DATA  expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
   943  DATA  expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
   944  DATA  expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
   945  DATA  expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
   946  DATA  expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
   947  DATA  expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
   948  DATA  expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
   949  DATA  expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
   950  
   951  GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
   952  DATA  expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
   953  DATA  expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
   954  DATA  expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
   955  DATA  expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
   956  DATA  expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
   957  DATA  expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
   958  DATA  expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
   959  DATA  expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
   960  
   961  GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
   962  DATA  expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
   963  DATA  expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
   964  DATA  expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
   965  DATA  expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
   966  DATA  expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
   967  DATA  expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
   968  DATA  expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
   969  DATA  expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
   970  
   971  GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
   972  DATA  expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
   973  DATA  expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
   974  DATA  expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
   975  DATA  expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
   976  DATA  expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
   977  DATA  expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
   978  DATA  expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
   979  DATA  expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
   980  
   981  GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
   982  DATA  expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
   983  DATA  expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
   984  DATA  expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
   985  DATA  expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
   986  DATA  expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
   987  DATA  expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
   988  DATA  expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
   989  DATA  expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
   990  
   991  GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
   992  DATA  expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
   993  DATA  expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
   994  DATA  expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
   995  DATA  expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
   996  DATA  expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
   997  DATA  expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
   998  DATA  expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
   999  DATA  expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
  1000  
  1001  TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
  1002  	VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
  1003  	VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
  1004  	VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
  1005  	VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
  1006  	VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
  1007  	VMOVDQU64 (AX), Z5
  1008  	VPERMB Z5, Z0, Z0
  1009  	VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
  1010  	VPERMB Z5, Z3, Z3
  1011  	VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
  1012  	VPERMB Z5, Z4, Z4
  1013  	VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
  1014  	VPERMI2B Z3, Z0, Z1
  1015  	VPERMI2B Z4, Z3, Z2
  1016  	RET
  1017  
  1018  GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
  1019  DATA  expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
  1020  DATA  expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1021  DATA  expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
  1022  DATA  expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
  1023  DATA  expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
  1024  DATA  expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
  1025  DATA  expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
  1026  DATA  expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
  1027  
  1028  GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
  1029  DATA  expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
  1030  DATA  expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
  1031  DATA  expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
  1032  DATA  expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
  1033  DATA  expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
  1034  DATA  expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
  1035  DATA  expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
  1036  DATA  expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
  1037  
  1038  GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
  1039  DATA  expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
  1040  DATA  expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
  1041  DATA  expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
  1042  DATA  expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
  1043  DATA  expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
  1044  DATA  expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
  1045  DATA  expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
  1046  DATA  expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
  1047  
  1048  GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
  1049  DATA  expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
  1050  DATA  expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
  1051  DATA  expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
  1052  DATA  expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
  1053  DATA  expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
  1054  DATA  expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
  1055  DATA  expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
  1056  DATA  expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
  1057  
  1058  GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
  1059  DATA  expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
  1060  DATA  expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
  1061  DATA  expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
  1062  DATA  expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
  1063  DATA  expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
  1064  DATA  expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
  1065  DATA  expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
  1066  DATA  expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
  1067  
  1068  GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
  1069  DATA  expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
  1070  DATA  expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
  1071  DATA  expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
  1072  DATA  expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
  1073  DATA  expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
  1074  DATA  expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
  1075  DATA  expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
  1076  DATA  expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
  1077  
  1078  GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
  1079  DATA  expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
  1080  DATA  expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
  1081  DATA  expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
  1082  DATA  expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
  1083  DATA  expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1084  DATA  expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1085  DATA  expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1086  DATA  expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1087  
  1088  GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
  1089  DATA  expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
  1090  DATA  expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
  1091  DATA  expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
  1092  DATA  expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
  1093  DATA  expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
  1094  DATA  expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
  1095  DATA  expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
  1096  DATA  expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
  1097  
  1098  GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
  1099  DATA  expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
  1100  DATA  expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
  1101  DATA  expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
  1102  DATA  expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
  1103  DATA  expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
  1104  DATA  expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
  1105  DATA  expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
  1106  DATA  expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
  1107  
  1108  GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
  1109  DATA  expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
  1110  DATA  expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
  1111  DATA  expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
  1112  DATA  expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
  1113  DATA  expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
  1114  DATA  expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
  1115  DATA  expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
  1116  DATA  expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
  1117  
  1118  GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
  1119  DATA  expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1120  DATA  expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1121  DATA  expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
  1122  DATA  expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1123  DATA  expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
  1124  DATA  expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
  1125  DATA  expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1126  DATA  expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
  1127  
  1128  TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
  1129  	VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
  1130  	VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
  1131  	VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
  1132  	VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
  1133  	VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
  1134  	VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
  1135  	VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
  1136  	VMOVDQU64 (AX), Z7
  1137  	VPERMB Z7, Z0, Z0
  1138  	VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
  1139  	VPERMB Z7, Z2, Z2
  1140  	VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
  1141  	VPERMB Z7, Z3, Z3
  1142  	VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
  1143  	VPERMB Z7, Z4, Z4
  1144  	VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
  1145  	VPERMI2B Z2, Z0, Z1
  1146  	MOVQ $0xffff03fffc0ffff, AX
  1147  	KMOVQ AX, K1
  1148  	VPERMI2B.Z Z3, Z2, K1, Z5
  1149  	MOVQ $0xf0000fc0003f0000, AX
  1150  	KMOVQ AX, K1
  1151  	VPERMB.Z Z4, Z6, K1, Z0
  1152  	VPORQ Z0, Z5, Z2
  1153  	RET
  1154  
  1155  GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
  1156  DATA  expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1157  DATA  expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
  1158  DATA  expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
  1159  DATA  expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
  1160  DATA  expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
  1161  DATA  expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
  1162  DATA  expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1163  DATA  expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1164  
  1165  GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
  1166  DATA  expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
  1167  DATA  expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
  1168  DATA  expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
  1169  DATA  expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
  1170  DATA  expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
  1171  DATA  expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
  1172  DATA  expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
  1173  DATA  expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
  1174  
  1175  GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
  1176  DATA  expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
  1177  DATA  expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
  1178  DATA  expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
  1179  DATA  expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
  1180  DATA  expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
  1181  DATA  expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
  1182  DATA  expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
  1183  DATA  expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
  1184  
  1185  GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
  1186  DATA  expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
  1187  DATA  expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
  1188  DATA  expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
  1189  DATA  expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
  1190  DATA  expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
  1191  DATA  expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
  1192  DATA  expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
  1193  DATA  expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
  1194  
  1195  GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
  1196  DATA  expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
  1197  DATA  expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
  1198  DATA  expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
  1199  DATA  expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
  1200  DATA  expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
  1201  DATA  expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
  1202  DATA  expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
  1203  DATA  expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
  1204  
  1205  GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
  1206  DATA  expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
  1207  DATA  expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
  1208  DATA  expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  1209  DATA  expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1210  DATA  expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1211  DATA  expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1212  DATA  expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1213  DATA  expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1214  
  1215  GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
  1216  DATA  expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
  1217  DATA  expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
  1218  DATA  expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
  1219  DATA  expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
  1220  DATA  expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
  1221  DATA  expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
  1222  DATA  expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
  1223  DATA  expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
  1224  
  1225  GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
  1226  DATA  expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
  1227  DATA  expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
  1228  DATA  expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
  1229  DATA  expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
  1230  DATA  expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
  1231  DATA  expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
  1232  DATA  expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
  1233  DATA  expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
  1234  
  1235  GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
  1236  DATA  expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
  1237  DATA  expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
  1238  DATA  expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
  1239  DATA  expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
  1240  DATA  expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
  1241  DATA  expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
  1242  DATA  expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
  1243  DATA  expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
  1244  
  1245  GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
  1246  DATA  expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1247  DATA  expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1248  DATA  expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  1249  DATA  expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1250  DATA  expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1251  DATA  expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1252  DATA  expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1253  DATA  expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
  1254  
  1255  TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
  1256  	VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
  1257  	VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
  1258  	VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
  1259  	VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
  1260  	VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
  1261  	VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
  1262  	VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
  1263  	VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
  1264  	VMOVDQU64 (AX), Z8
  1265  	VPERMB Z8, Z0, Z0
  1266  	VGF2P8AFFINEQB $0, Z2, Z0, Z0
  1267  	VPERMB Z8, Z3, Z3
  1268  	VGF2P8AFFINEQB $0, Z2, Z3, Z2
  1269  	VPERMB Z8, Z4, Z3
  1270  	VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
  1271  	VPERMB Z8, Z5, Z4
  1272  	VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
  1273  	VPERMI2B Z2, Z0, Z1
  1274  	MOVQ $0xdfffffffffffffff, AX
  1275  	KMOVQ AX, K1
  1276  	VPERMI2B.Z Z3, Z2, K1, Z6
  1277  	MOVQ $0x2000000000000000, AX
  1278  	KMOVQ AX, K1
  1279  	VPERMB.Z Z4, Z7, K1, Z0
  1280  	VPORQ Z0, Z6, Z2
  1281  	RET
  1282  
  1283  GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
  1284  DATA  expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1285  DATA  expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1286  DATA  expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
  1287  DATA  expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
  1288  DATA  expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
  1289  DATA  expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
  1290  DATA  expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
  1291  DATA  expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1292  
  1293  GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
  1294  DATA  expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
  1295  DATA  expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
  1296  DATA  expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
  1297  DATA  expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
  1298  DATA  expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
  1299  DATA  expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
  1300  DATA  expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
  1301  DATA  expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
  1302  
  1303  GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
  1304  DATA  expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
  1305  DATA  expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
  1306  DATA  expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1307  DATA  expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
  1308  DATA  expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
  1309  DATA  expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
  1310  DATA  expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
  1311  DATA  expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
  1312  
  1313  GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
  1314  DATA  expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
  1315  DATA  expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
  1316  DATA  expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
  1317  DATA  expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
  1318  DATA  expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
  1319  DATA  expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
  1320  DATA  expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
  1321  DATA  expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
  1322  
  1323  GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
  1324  DATA  expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
  1325  DATA  expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
  1326  DATA  expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
  1327  DATA  expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
  1328  DATA  expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
  1329  DATA  expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
  1330  DATA  expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
  1331  DATA  expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
  1332  
  1333  GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
  1334  DATA  expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
  1335  DATA  expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
  1336  DATA  expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
  1337  DATA  expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
  1338  DATA  expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
  1339  DATA  expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
  1340  DATA  expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
  1341  DATA  expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
  1342  
  1343  GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
  1344  DATA  expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
  1345  DATA  expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
  1346  DATA  expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
  1347  DATA  expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
  1348  DATA  expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
  1349  DATA  expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
  1350  DATA  expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1351  DATA  expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1352  
  1353  GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
  1354  DATA  expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
  1355  DATA  expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
  1356  DATA  expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
  1357  DATA  expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
  1358  DATA  expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
  1359  DATA  expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
  1360  DATA  expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
  1361  DATA  expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
  1362  
  1363  GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
  1364  DATA  expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
  1365  DATA  expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
  1366  DATA  expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
  1367  DATA  expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
  1368  DATA  expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
  1369  DATA  expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
  1370  DATA  expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
  1371  DATA  expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
  1372  
  1373  GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
  1374  DATA  expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
  1375  DATA  expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
  1376  DATA  expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
  1377  DATA  expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
  1378  DATA  expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
  1379  DATA  expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
  1380  DATA  expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
  1381  DATA  expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
  1382  
  1383  GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
  1384  DATA  expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1385  DATA  expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1386  DATA  expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
  1387  DATA  expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1388  DATA  expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1389  DATA  expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
  1390  DATA  expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
  1391  DATA  expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
  1392  
  1393  TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
  1394  	VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
  1395  	VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
  1396  	VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
  1397  	VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
  1398  	VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
  1399  	VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
  1400  	VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
  1401  	VMOVDQU64 (AX), Z7
  1402  	VPERMB Z7, Z0, Z0
  1403  	VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
  1404  	VPERMB Z7, Z2, Z2
  1405  	VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
  1406  	VPERMB Z7, Z3, Z3
  1407  	VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
  1408  	VPERMB Z7, Z4, Z4
  1409  	VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
  1410  	VPERMI2B Z2, Z0, Z1
  1411  	MOVQ $0xff7c07ffff01ffff, AX
  1412  	KMOVQ AX, K1
  1413  	VPERMI2B.Z Z3, Z2, K1, Z5
  1414  	MOVQ $0x83f80000fe0000, AX
  1415  	KMOVQ AX, K1
  1416  	VPERMB.Z Z4, Z6, K1, Z0
  1417  	VPORQ Z0, Z5, Z2
  1418  	RET
  1419  
  1420  GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
  1421  DATA  expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1422  DATA  expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1423  DATA  expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
  1424  DATA  expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
  1425  DATA  expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1426  DATA  expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
  1427  DATA  expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1428  DATA  expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
  1429  
  1430  GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
  1431  DATA  expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
  1432  DATA  expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
  1433  DATA  expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
  1434  DATA  expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
  1435  DATA  expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
  1436  DATA  expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
  1437  DATA  expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
  1438  DATA  expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
  1439  
  1440  GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
  1441  DATA  expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
  1442  DATA  expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
  1443  DATA  expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1444  DATA  expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
  1445  DATA  expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
  1446  DATA  expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
  1447  DATA  expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
  1448  DATA  expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
  1449  
  1450  GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
  1451  DATA  expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
  1452  DATA  expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
  1453  DATA  expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
  1454  DATA  expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
  1455  DATA  expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
  1456  DATA  expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
  1457  DATA  expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
  1458  DATA  expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
  1459  
  1460  GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
  1461  DATA  expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
  1462  DATA  expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
  1463  DATA  expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
  1464  DATA  expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
  1465  DATA  expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
  1466  DATA  expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
  1467  DATA  expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
  1468  DATA  expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
  1469  
  1470  GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
  1471  DATA  expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
  1472  DATA  expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
  1473  DATA  expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
  1474  DATA  expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
  1475  DATA  expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
  1476  DATA  expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
  1477  DATA  expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
  1478  DATA  expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
  1479  
  1480  GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
  1481  DATA  expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
  1482  DATA  expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
  1483  DATA  expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
  1484  DATA  expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1485  DATA  expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1486  DATA  expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1487  DATA  expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1488  DATA  expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1489  
  1490  GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
  1491  DATA  expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
  1492  DATA  expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
  1493  DATA  expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
  1494  DATA  expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
  1495  DATA  expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
  1496  DATA  expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
  1497  DATA  expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
  1498  DATA  expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
  1499  
  1500  GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
  1501  DATA  expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
  1502  DATA  expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
  1503  DATA  expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
  1504  DATA  expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
  1505  DATA  expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
  1506  DATA  expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
  1507  DATA  expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
  1508  DATA  expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
  1509  
  1510  GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
  1511  DATA  expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
  1512  DATA  expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
  1513  DATA  expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
  1514  DATA  expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
  1515  DATA  expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
  1516  DATA  expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
  1517  DATA  expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
  1518  DATA  expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
  1519  
  1520  GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
  1521  DATA  expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1522  DATA  expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1523  DATA  expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
  1524  DATA  expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
  1525  DATA  expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1526  DATA  expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1527  DATA  expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
  1528  DATA  expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
  1529  
  1530  TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
  1531  	VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
  1532  	VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
  1533  	VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
  1534  	VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
  1535  	VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
  1536  	VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
  1537  	VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
  1538  	VMOVDQU64 (AX), Z7
  1539  	VPERMB Z7, Z0, Z0
  1540  	VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
  1541  	VPERMB Z7, Z2, Z2
  1542  	VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
  1543  	VPERMB Z7, Z3, Z3
  1544  	VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
  1545  	VPERMB Z7, Z4, Z4
  1546  	VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
  1547  	VPERMI2B Z2, Z0, Z1
  1548  	MOVQ $0xdf87fffff87fffff, AX
  1549  	KMOVQ AX, K1
  1550  	VPERMI2B.Z Z3, Z2, K1, Z5
  1551  	MOVQ $0x2078000007800000, AX
  1552  	KMOVQ AX, K1
  1553  	VPERMB.Z Z4, Z6, K1, Z0
  1554  	VPORQ Z0, Z5, Z2
  1555  	RET
  1556  
  1557  GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
  1558  DATA  expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
  1559  DATA  expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
  1560  DATA  expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
  1561  DATA  expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
  1562  DATA  expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
  1563  DATA  expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
  1564  DATA  expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
  1565  DATA  expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
  1566  
  1567  GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
  1568  DATA  expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
  1569  DATA  expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
  1570  DATA  expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
  1571  DATA  expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
  1572  DATA  expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
  1573  DATA  expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
  1574  DATA  expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
  1575  DATA  expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
  1576  
  1577  GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
  1578  DATA  expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
  1579  DATA  expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
  1580  DATA  expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
  1581  DATA  expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
  1582  DATA  expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
  1583  DATA  expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
  1584  DATA  expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
  1585  DATA  expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
  1586  
  1587  GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
  1588  DATA  expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
  1589  DATA  expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
  1590  DATA  expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
  1591  DATA  expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
  1592  DATA  expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
  1593  DATA  expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
  1594  DATA  expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
  1595  DATA  expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
  1596  
  1597  GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
  1598  DATA  expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
  1599  DATA  expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
  1600  DATA  expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
  1601  DATA  expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
  1602  DATA  expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
  1603  DATA  expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
  1604  DATA  expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
  1605  DATA  expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
  1606  
  1607  GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
  1608  DATA  expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
  1609  DATA  expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
  1610  DATA  expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
  1611  DATA  expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
  1612  DATA  expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
  1613  DATA  expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
  1614  DATA  expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
  1615  DATA  expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
  1616  
  1617  GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
  1618  DATA  expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
  1619  DATA  expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
  1620  DATA  expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
  1621  DATA  expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
  1622  DATA  expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
  1623  DATA  expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
  1624  DATA  expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1625  DATA  expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1626  
  1627  GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
  1628  DATA  expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
  1629  DATA  expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
  1630  DATA  expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
  1631  DATA  expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
  1632  DATA  expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
  1633  DATA  expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
  1634  DATA  expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
  1635  DATA  expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
  1636  
  1637  GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
  1638  DATA  expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
  1639  DATA  expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
  1640  DATA  expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
  1641  DATA  expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
  1642  DATA  expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
  1643  DATA  expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
  1644  DATA  expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
  1645  DATA  expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
  1646  
  1647  GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
  1648  DATA  expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
  1649  DATA  expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
  1650  DATA  expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
  1651  DATA  expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
  1652  DATA  expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
  1653  DATA  expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
  1654  DATA  expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
  1655  DATA  expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
  1656  
  1657  GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
  1658  DATA  expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1659  DATA  expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1660  DATA  expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
  1661  DATA  expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
  1662  DATA  expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1663  DATA  expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1664  DATA  expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
  1665  DATA  expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
  1666  
  1667  TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
  1668  	VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
  1669  	VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
  1670  	VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
  1671  	VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
  1672  	VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
  1673  	VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
  1674  	VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
  1675  	VMOVDQU64 (AX), Z7
  1676  	VPERMB Z7, Z0, Z0
  1677  	VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
  1678  	VPERMB Z7, Z2, Z2
  1679  	VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
  1680  	VPERMB Z7, Z3, Z3
  1681  	VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
  1682  	VPERMB Z7, Z4, Z4
  1683  	VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
  1684  	VPERMI2B Z2, Z0, Z1
  1685  	MOVQ $0xb001ffffc007ffff, AX
  1686  	KMOVQ AX, K1
  1687  	VPERMI2B.Z Z3, Z2, K1, Z5
  1688  	MOVQ $0x4ffe00003ff80000, AX
  1689  	KMOVQ AX, K1
  1690  	VPERMB.Z Z4, Z6, K1, Z0
  1691  	VPORQ Z0, Z5, Z2
  1692  	RET
  1693  
  1694  GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
  1695  DATA  expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
  1696  DATA  expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
  1697  DATA  expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
  1698  DATA  expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
  1699  DATA  expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
  1700  DATA  expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
  1701  DATA  expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
  1702  DATA  expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
  1703  
  1704  GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
  1705  DATA  expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
  1706  DATA  expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
  1707  DATA  expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
  1708  DATA  expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
  1709  DATA  expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
  1710  DATA  expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
  1711  DATA  expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
  1712  DATA  expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
  1713  
  1714  GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
  1715  DATA  expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
  1716  DATA  expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
  1717  DATA  expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
  1718  DATA  expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
  1719  DATA  expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
  1720  DATA  expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
  1721  DATA  expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
  1722  DATA  expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
  1723  
  1724  GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
  1725  DATA  expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
  1726  DATA  expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
  1727  DATA  expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
  1728  DATA  expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
  1729  DATA  expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
  1730  DATA  expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
  1731  DATA  expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
  1732  DATA  expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
  1733  
  1734  TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
  1735  	VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
  1736  	VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
  1737  	VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
  1738  	VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
  1739  	VMOVDQU64 (AX), Z4
  1740  	VPERMB Z4, Z0, Z0
  1741  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
  1742  	VPERMB Z4, Z2, Z2
  1743  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
  1744  	VPERMB Z0, Z3, Z1
  1745  	VPERMB Z2, Z3, Z2
  1746  	RET
  1747  
  1748  GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
  1749  DATA  expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
  1750  DATA  expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  1751  DATA  expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
  1752  DATA  expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
  1753  DATA  expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1754  DATA  expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
  1755  DATA  expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
  1756  DATA  expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
  1757  
  1758  GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
  1759  DATA  expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
  1760  DATA  expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
  1761  DATA  expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
  1762  DATA  expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
  1763  DATA  expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
  1764  DATA  expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
  1765  DATA  expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
  1766  DATA  expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
  1767  
  1768  GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
  1769  DATA  expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
  1770  DATA  expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
  1771  DATA  expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  1772  DATA  expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
  1773  DATA  expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
  1774  DATA  expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
  1775  DATA  expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
  1776  DATA  expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
  1777  
  1778  GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
  1779  DATA  expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
  1780  DATA  expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
  1781  DATA  expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
  1782  DATA  expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
  1783  DATA  expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
  1784  DATA  expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
  1785  DATA  expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
  1786  DATA  expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
  1787  
  1788  GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
  1789  DATA  expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
  1790  DATA  expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
  1791  DATA  expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
  1792  DATA  expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
  1793  DATA  expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
  1794  DATA  expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
  1795  DATA  expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
  1796  DATA  expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
  1797  
  1798  GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
  1799  DATA  expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
  1800  DATA  expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
  1801  DATA  expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
  1802  DATA  expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
  1803  DATA  expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
  1804  DATA  expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
  1805  DATA  expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
  1806  DATA  expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
  1807  
  1808  GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
  1809  DATA  expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
  1810  DATA  expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
  1811  DATA  expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
  1812  DATA  expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
  1813  DATA  expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
  1814  DATA  expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
  1815  DATA  expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
  1816  DATA  expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
  1817  
  1818  GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
  1819  DATA  expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
  1820  DATA  expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
  1821  DATA  expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
  1822  DATA  expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
  1823  DATA  expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
  1824  DATA  expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
  1825  DATA  expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
  1826  DATA  expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
  1827  
  1828  TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
  1829  	VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
  1830  	VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
  1831  	VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
  1832  	VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
  1833  	VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
  1834  	VMOVDQU64 (AX), Z5
  1835  	VPERMB Z5, Z0, Z0
  1836  	VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
  1837  	VPERMB Z5, Z3, Z3
  1838  	VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
  1839  	VPERMB Z5, Z4, Z4
  1840  	VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
  1841  	VPERMI2B Z3, Z0, Z1
  1842  	VPERMI2B Z4, Z3, Z2
  1843  	RET
  1844  
  1845  GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
  1846  DATA  expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
  1847  DATA  expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
  1848  DATA  expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
  1849  DATA  expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
  1850  DATA  expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
  1851  DATA  expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
  1852  DATA  expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
  1853  DATA  expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
  1854  
  1855  GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
  1856  DATA  expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
  1857  DATA  expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
  1858  DATA  expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
  1859  DATA  expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
  1860  DATA  expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
  1861  DATA  expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
  1862  DATA  expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
  1863  DATA  expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
  1864  
  1865  GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
  1866  DATA  expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
  1867  DATA  expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
  1868  DATA  expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
  1869  DATA  expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
  1870  DATA  expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
  1871  DATA  expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
  1872  DATA  expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
  1873  DATA  expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
  1874  
  1875  GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
  1876  DATA  expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
  1877  DATA  expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
  1878  DATA  expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
  1879  DATA  expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
  1880  DATA  expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
  1881  DATA  expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
  1882  DATA  expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
  1883  DATA  expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
  1884  
  1885  GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
  1886  DATA  expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
  1887  DATA  expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
  1888  DATA  expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
  1889  DATA  expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
  1890  DATA  expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
  1891  DATA  expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
  1892  DATA  expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
  1893  DATA  expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
  1894  
  1895  GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
  1896  DATA  expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
  1897  DATA  expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
  1898  DATA  expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
  1899  DATA  expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
  1900  DATA  expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
  1901  DATA  expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
  1902  DATA  expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
  1903  DATA  expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
  1904  
  1905  GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
  1906  DATA  expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
  1907  DATA  expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
  1908  DATA  expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  1909  DATA  expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  1910  DATA  expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  1911  DATA  expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  1912  DATA  expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  1913  DATA  expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  1914  
  1915  GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
  1916  DATA  expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
  1917  DATA  expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
  1918  DATA  expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
  1919  DATA  expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
  1920  DATA  expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
  1921  DATA  expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
  1922  DATA  expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
  1923  DATA  expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
  1924  
  1925  GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
  1926  DATA  expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
  1927  DATA  expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
  1928  DATA  expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
  1929  DATA  expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
  1930  DATA  expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
  1931  DATA  expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
  1932  DATA  expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
  1933  DATA  expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
  1934  
  1935  GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
  1936  DATA  expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
  1937  DATA  expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
  1938  DATA  expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
  1939  DATA  expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
  1940  DATA  expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
  1941  DATA  expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
  1942  DATA  expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
  1943  DATA  expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
  1944  
  1945  GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
  1946  DATA  expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  1947  DATA  expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  1948  DATA  expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  1949  DATA  expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  1950  DATA  expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  1951  DATA  expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  1952  DATA  expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
  1953  DATA  expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
  1954  
  1955  TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
  1956  	VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
  1957  	VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
  1958  	VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
  1959  	VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
  1960  	VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
  1961  	VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
  1962  	VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
  1963  	VMOVDQU64 (AX), Z7
  1964  	VPERMB Z7, Z0, Z0
  1965  	VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
  1966  	VPERMB Z7, Z2, Z2
  1967  	VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
  1968  	VPERMB Z7, Z3, Z3
  1969  	VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
  1970  	VPERMB Z7, Z4, Z4
  1971  	VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
  1972  	VPERMI2B Z2, Z0, Z1
  1973  	MOVQ $0xe7ffffffffffffff, AX
  1974  	KMOVQ AX, K1
  1975  	VPERMI2B.Z Z3, Z2, K1, Z5
  1976  	MOVQ $0x1800000000000000, AX
  1977  	KMOVQ AX, K1
  1978  	VPERMB.Z Z4, Z6, K1, Z0
  1979  	VPORQ Z0, Z5, Z2
  1980  	RET
  1981  
  1982  GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
  1983  DATA  expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
  1984  DATA  expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  1985  DATA  expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
  1986  DATA  expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
  1987  DATA  expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
  1988  DATA  expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
  1989  DATA  expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
  1990  DATA  expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  1991  
  1992  GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
  1993  DATA  expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
  1994  DATA  expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
  1995  DATA  expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
  1996  DATA  expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
  1997  DATA  expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
  1998  DATA  expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
  1999  DATA  expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
  2000  DATA  expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
  2001  
  2002  GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
  2003  DATA  expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
  2004  DATA  expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
  2005  DATA  expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2006  DATA  expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
  2007  DATA  expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
  2008  DATA  expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
  2009  DATA  expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
  2010  DATA  expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
  2011  
  2012  GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
  2013  DATA  expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
  2014  DATA  expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
  2015  DATA  expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
  2016  DATA  expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
  2017  DATA  expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
  2018  DATA  expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
  2019  DATA  expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
  2020  DATA  expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
  2021  
  2022  GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
  2023  DATA  expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
  2024  DATA  expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
  2025  DATA  expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
  2026  DATA  expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
  2027  DATA  expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
  2028  DATA  expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
  2029  DATA  expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
  2030  DATA  expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
  2031  
  2032  GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
  2033  DATA  expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
  2034  DATA  expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
  2035  DATA  expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
  2036  DATA  expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
  2037  DATA  expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
  2038  DATA  expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
  2039  DATA  expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
  2040  DATA  expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
  2041  
  2042  GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
  2043  DATA  expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
  2044  DATA  expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
  2045  DATA  expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
  2046  DATA  expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
  2047  DATA  expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
  2048  DATA  expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
  2049  DATA  expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2050  DATA  expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2051  
  2052  GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
  2053  DATA  expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
  2054  DATA  expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
  2055  DATA  expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
  2056  DATA  expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
  2057  DATA  expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
  2058  DATA  expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
  2059  DATA  expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
  2060  DATA  expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
  2061  
  2062  GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
  2063  DATA  expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
  2064  DATA  expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
  2065  DATA  expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
  2066  DATA  expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
  2067  DATA  expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
  2068  DATA  expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
  2069  DATA  expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
  2070  DATA  expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
  2071  
  2072  GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
  2073  DATA  expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
  2074  DATA  expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
  2075  DATA  expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
  2076  DATA  expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
  2077  DATA  expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
  2078  DATA  expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
  2079  DATA  expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
  2080  DATA  expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
  2081  
  2082  GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
  2083  DATA  expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2084  DATA  expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2085  DATA  expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2086  DATA  expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
  2087  DATA  expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
  2088  DATA  expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
  2089  DATA  expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
  2090  DATA  expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
  2091  
  2092  TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
  2093  	VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
  2094  	VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
  2095  	VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
  2096  	VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
  2097  	VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
  2098  	VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
  2099  	VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
  2100  	VMOVDQU64 (AX), Z7
  2101  	VPERMB Z7, Z0, Z0
  2102  	VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
  2103  	VPERMB Z7, Z2, Z2
  2104  	VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
  2105  	VPERMB Z7, Z3, Z3
  2106  	VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
  2107  	VPERMB Z7, Z4, Z4
  2108  	VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
  2109  	VPERMI2B Z2, Z0, Z1
  2110  	MOVQ $0xce79fe003fffffff, AX
  2111  	KMOVQ AX, K1
  2112  	VPERMI2B.Z Z3, Z2, K1, Z5
  2113  	MOVQ $0x318601ffc0000000, AX
  2114  	KMOVQ AX, K1
  2115  	VPERMB.Z Z4, Z6, K1, Z0
  2116  	VPORQ Z0, Z5, Z2
  2117  	RET
  2118  
  2119  GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
  2120  DATA  expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
  2121  DATA  expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
  2122  DATA  expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
  2123  DATA  expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
  2124  DATA  expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
  2125  DATA  expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
  2126  DATA  expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
  2127  DATA  expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
  2128  
  2129  GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
  2130  DATA  expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
  2131  DATA  expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
  2132  DATA  expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
  2133  DATA  expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
  2134  DATA  expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
  2135  DATA  expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
  2136  DATA  expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
  2137  DATA  expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
  2138  
  2139  GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
  2140  DATA  expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
  2141  DATA  expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
  2142  DATA  expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
  2143  DATA  expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
  2144  DATA  expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
  2145  DATA  expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
  2146  DATA  expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
  2147  DATA  expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
  2148  
  2149  GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
  2150  DATA  expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
  2151  DATA  expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
  2152  DATA  expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
  2153  DATA  expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
  2154  DATA  expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
  2155  DATA  expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
  2156  DATA  expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
  2157  DATA  expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
  2158  
  2159  GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
  2160  DATA  expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
  2161  DATA  expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
  2162  DATA  expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
  2163  DATA  expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
  2164  DATA  expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
  2165  DATA  expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
  2166  DATA  expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
  2167  DATA  expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
  2168  
  2169  GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
  2170  DATA  expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
  2171  DATA  expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
  2172  DATA  expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
  2173  DATA  expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
  2174  DATA  expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
  2175  DATA  expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
  2176  DATA  expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
  2177  DATA  expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
  2178  
  2179  GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
  2180  DATA  expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
  2181  DATA  expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
  2182  DATA  expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
  2183  DATA  expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
  2184  DATA  expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
  2185  DATA  expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
  2186  DATA  expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
  2187  DATA  expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
  2188  
  2189  GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
  2190  DATA  expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
  2191  DATA  expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
  2192  DATA  expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
  2193  DATA  expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
  2194  DATA  expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
  2195  DATA  expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
  2196  DATA  expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
  2197  DATA  expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
  2198  
  2199  TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
  2200  	VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
  2201  	VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
  2202  	VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
  2203  	VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
  2204  	VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
  2205  	VMOVDQU64 (AX), Z5
  2206  	VPERMB Z5, Z0, Z0
  2207  	VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
  2208  	VPERMB Z5, Z3, Z3
  2209  	VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
  2210  	VPERMB Z5, Z4, Z4
  2211  	VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
  2212  	VPERMI2B Z3, Z0, Z1
  2213  	VPERMI2B Z4, Z3, Z2
  2214  	RET
  2215  
  2216  GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
  2217  DATA  expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
  2218  DATA  expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
  2219  DATA  expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
  2220  DATA  expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
  2221  DATA  expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
  2222  DATA  expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
  2223  DATA  expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
  2224  DATA  expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  2225  
  2226  GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
  2227  DATA  expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
  2228  DATA  expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
  2229  DATA  expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
  2230  DATA  expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
  2231  DATA  expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
  2232  DATA  expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
  2233  DATA  expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
  2234  DATA  expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
  2235  
  2236  GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
  2237  DATA  expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
  2238  DATA  expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
  2239  DATA  expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2240  DATA  expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
  2241  DATA  expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
  2242  DATA  expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
  2243  DATA  expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
  2244  DATA  expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
  2245  
  2246  GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
  2247  DATA  expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
  2248  DATA  expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
  2249  DATA  expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
  2250  DATA  expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
  2251  DATA  expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
  2252  DATA  expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
  2253  DATA  expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
  2254  DATA  expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
  2255  
  2256  GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
  2257  DATA  expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
  2258  DATA  expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
  2259  DATA  expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
  2260  DATA  expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
  2261  DATA  expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
  2262  DATA  expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
  2263  DATA  expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
  2264  DATA  expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
  2265  
  2266  GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
  2267  DATA  expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
  2268  DATA  expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
  2269  DATA  expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
  2270  DATA  expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
  2271  DATA  expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
  2272  DATA  expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
  2273  DATA  expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
  2274  DATA  expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
  2275  
  2276  GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
  2277  DATA  expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
  2278  DATA  expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
  2279  DATA  expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
  2280  DATA  expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
  2281  DATA  expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  2282  DATA  expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  2283  DATA  expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2284  DATA  expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2285  
  2286  GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
  2287  DATA  expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
  2288  DATA  expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
  2289  DATA  expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
  2290  DATA  expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
  2291  DATA  expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
  2292  DATA  expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
  2293  DATA  expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
  2294  DATA  expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
  2295  
  2296  GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
  2297  DATA  expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
  2298  DATA  expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
  2299  DATA  expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
  2300  DATA  expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
  2301  DATA  expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
  2302  DATA  expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
  2303  DATA  expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
  2304  DATA  expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
  2305  
  2306  GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
  2307  DATA  expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
  2308  DATA  expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
  2309  DATA  expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
  2310  DATA  expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
  2311  DATA  expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
  2312  DATA  expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
  2313  DATA  expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
  2314  DATA  expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
  2315  
  2316  GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
  2317  DATA  expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2318  DATA  expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2319  DATA  expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2320  DATA  expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  2321  DATA  expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  2322  DATA  expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
  2323  DATA  expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
  2324  DATA  expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
  2325  
  2326  TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
  2327  	VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
  2328  	VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
  2329  	VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
  2330  	VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
  2331  	VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
  2332  	VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
  2333  	VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
  2334  	VMOVDQU64 (AX), Z7
  2335  	VPERMB Z7, Z0, Z0
  2336  	VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
  2337  	VPERMB Z7, Z2, Z2
  2338  	VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
  2339  	VPERMB Z7, Z3, Z3
  2340  	VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
  2341  	VPERMB Z7, Z4, Z4
  2342  	VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
  2343  	VPERMI2B Z2, Z0, Z1
  2344  	MOVQ $0x387f80ffffffffff, AX
  2345  	KMOVQ AX, K1
  2346  	VPERMI2B.Z Z3, Z2, K1, Z5
  2347  	MOVQ $0xc7807f0000000000, AX
  2348  	KMOVQ AX, K1
  2349  	VPERMB.Z Z4, Z6, K1, Z0
  2350  	VPORQ Z0, Z5, Z2
  2351  	RET
  2352  
  2353  GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
  2354  DATA  expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
  2355  DATA  expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
  2356  DATA  expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
  2357  DATA  expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
  2358  DATA  expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
  2359  DATA  expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
  2360  DATA  expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
  2361  DATA  expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
  2362  
  2363  GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
  2364  DATA  expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
  2365  DATA  expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
  2366  DATA  expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
  2367  DATA  expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
  2368  DATA  expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
  2369  DATA  expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
  2370  DATA  expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
  2371  DATA  expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
  2372  
  2373  GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
  2374  DATA  expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
  2375  DATA  expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
  2376  DATA  expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
  2377  DATA  expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
  2378  DATA  expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
  2379  DATA  expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
  2380  DATA  expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
  2381  DATA  expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
  2382  
  2383  GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
  2384  DATA  expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
  2385  DATA  expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
  2386  DATA  expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
  2387  DATA  expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
  2388  DATA  expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
  2389  DATA  expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
  2390  DATA  expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
  2391  DATA  expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
  2392  
  2393  GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
  2394  DATA  expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
  2395  DATA  expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
  2396  DATA  expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
  2397  DATA  expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
  2398  DATA  expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
  2399  DATA  expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
  2400  DATA  expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
  2401  DATA  expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
  2402  
  2403  GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
  2404  DATA  expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
  2405  DATA  expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
  2406  DATA  expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
  2407  DATA  expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
  2408  DATA  expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
  2409  DATA  expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
  2410  DATA  expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
  2411  DATA  expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
  2412  
  2413  GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
  2414  DATA  expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
  2415  DATA  expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
  2416  DATA  expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
  2417  DATA  expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
  2418  DATA  expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
  2419  DATA  expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
  2420  DATA  expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
  2421  DATA  expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
  2422  
  2423  TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
  2424  	VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
  2425  	VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
  2426  	VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
  2427  	VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
  2428  	VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
  2429  	VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
  2430  	VMOVDQU64 (AX), Z6
  2431  	VPERMB Z6, Z0, Z0
  2432  	VGF2P8AFFINEQB $0, Z3, Z0, Z0
  2433  	VPERMB Z6, Z4, Z4
  2434  	VGF2P8AFFINEQB $0, Z3, Z4, Z3
  2435  	VPERMB Z6, Z5, Z4
  2436  	VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
  2437  	VPERMI2B Z3, Z0, Z1
  2438  	VPERMI2B Z4, Z3, Z2
  2439  	RET
  2440  
  2441  GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
  2442  DATA  expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
  2443  DATA  expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
  2444  DATA  expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
  2445  DATA  expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
  2446  DATA  expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
  2447  DATA  expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
  2448  DATA  expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
  2449  DATA  expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
  2450  
  2451  GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
  2452  DATA  expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
  2453  DATA  expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
  2454  DATA  expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
  2455  DATA  expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
  2456  DATA  expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
  2457  DATA  expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
  2458  DATA  expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
  2459  DATA  expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
  2460  
  2461  GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
  2462  DATA  expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
  2463  DATA  expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
  2464  DATA  expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
  2465  DATA  expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
  2466  DATA  expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
  2467  DATA  expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
  2468  DATA  expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
  2469  DATA  expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
  2470  
  2471  GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
  2472  DATA  expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
  2473  DATA  expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
  2474  DATA  expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
  2475  DATA  expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
  2476  DATA  expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
  2477  DATA  expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
  2478  DATA  expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
  2479  DATA  expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
  2480  
  2481  GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
  2482  DATA  expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
  2483  DATA  expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
  2484  DATA  expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
  2485  DATA  expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
  2486  DATA  expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
  2487  DATA  expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
  2488  DATA  expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
  2489  DATA  expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
  2490  
  2491  GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
  2492  DATA  expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
  2493  DATA  expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
  2494  DATA  expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
  2495  DATA  expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
  2496  DATA  expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
  2497  DATA  expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
  2498  DATA  expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
  2499  DATA  expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
  2500  
  2501  GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
  2502  DATA  expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
  2503  DATA  expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
  2504  DATA  expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
  2505  DATA  expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
  2506  DATA  expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
  2507  DATA  expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
  2508  DATA  expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
  2509  DATA  expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
  2510  
  2511  GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
  2512  DATA  expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
  2513  DATA  expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
  2514  DATA  expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
  2515  DATA  expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
  2516  DATA  expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
  2517  DATA  expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
  2518  DATA  expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
  2519  DATA  expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
  2520  
  2521  GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
  2522  DATA  expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
  2523  DATA  expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
  2524  DATA  expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
  2525  DATA  expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
  2526  DATA  expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
  2527  DATA  expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
  2528  DATA  expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
  2529  DATA  expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
  2530  
  2531  GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
  2532  DATA  expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
  2533  DATA  expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
  2534  DATA  expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
  2535  DATA  expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
  2536  DATA  expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
  2537  DATA  expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
  2538  DATA  expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
  2539  DATA  expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
  2540  
  2541  GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
  2542  DATA  expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
  2543  DATA  expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
  2544  DATA  expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
  2545  DATA  expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
  2546  DATA  expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
  2547  DATA  expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
  2548  DATA  expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
  2549  DATA  expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
  2550  
  2551  TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
  2552  	VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
  2553  	VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
  2554  	VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
  2555  	VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
  2556  	VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
  2557  	VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
  2558  	VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
  2559  	VMOVDQU64 (AX), Z7
  2560  	VPERMB Z7, Z0, Z0
  2561  	VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
  2562  	VPERMB Z7, Z2, Z2
  2563  	VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
  2564  	VPERMB Z7, Z3, Z3
  2565  	VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
  2566  	VPERMB Z7, Z4, Z4
  2567  	VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
  2568  	VPERMI2B Z2, Z0, Z1
  2569  	MOVQ $0x9f01ffffffffffff, AX
  2570  	KMOVQ AX, K1
  2571  	VPERMI2B.Z Z3, Z2, K1, Z5
  2572  	MOVQ $0x60fe000000000000, AX
  2573  	KMOVQ AX, K1
  2574  	VPERMB.Z Z4, Z6, K1, Z0
  2575  	VPORQ Z0, Z5, Z2
  2576  	RET
  2577  
  2578  GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
  2579  DATA  expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
  2580  DATA  expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
  2581  DATA  expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
  2582  DATA  expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
  2583  DATA  expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
  2584  DATA  expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
  2585  DATA  expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
  2586  DATA  expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
  2587  
  2588  GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
  2589  DATA  expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
  2590  DATA  expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
  2591  DATA  expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
  2592  DATA  expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
  2593  DATA  expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
  2594  DATA  expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
  2595  DATA  expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
  2596  DATA  expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
  2597  
  2598  GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
  2599  DATA  expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
  2600  DATA  expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
  2601  DATA  expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
  2602  DATA  expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
  2603  DATA  expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
  2604  DATA  expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
  2605  DATA  expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
  2606  DATA  expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
  2607  
  2608  GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
  2609  DATA  expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
  2610  DATA  expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
  2611  DATA  expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
  2612  DATA  expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
  2613  DATA  expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
  2614  DATA  expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
  2615  DATA  expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
  2616  DATA  expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
  2617  
  2618  TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
  2619  	VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
  2620  	VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
  2621  	VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
  2622  	VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
  2623  	VMOVDQU64 (AX), Z4
  2624  	VPERMB Z4, Z0, Z0
  2625  	VGF2P8AFFINEQB $0, Z1, Z0, Z0
  2626  	VPERMB Z4, Z2, Z2
  2627  	VGF2P8AFFINEQB $0, Z1, Z2, Z2
  2628  	VPERMB Z0, Z3, Z1
  2629  	VPERMB Z2, Z3, Z2
  2630  	RET
  2631  
  2632  

View as plain text