1 // Code generated by mkasm.go. DO NOT EDIT.
2
3 #include "go_asm.h"
4 #include "textflag.h"
5
6 GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
7 DATA ·gcExpandersAVX512+0x00(SB)/8, $0
8 DATA ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
9 DATA ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
10 DATA ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
11 DATA ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
12 DATA ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
13 DATA ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
14 DATA ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
15 DATA ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
16 DATA ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
17 DATA ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
18 DATA ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
19 DATA ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
20 DATA ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
21 DATA ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
22 DATA ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
23 DATA ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
24 DATA ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
25 DATA ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
26 DATA ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
27 DATA ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
28 DATA ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
29 DATA ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
30 DATA ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
31 DATA ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
32 DATA ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
33 DATA ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
34 DATA ·gcExpandersAVX512+0xd8(SB)/8, $0
35 DATA ·gcExpandersAVX512+0xe0(SB)/8, $0
36 DATA ·gcExpandersAVX512+0xe8(SB)/8, $0
37 DATA ·gcExpandersAVX512+0xf0(SB)/8, $0
38 DATA ·gcExpandersAVX512+0xf8(SB)/8, $0
39 DATA ·gcExpandersAVX512+0x100(SB)/8, $0
40 DATA ·gcExpandersAVX512+0x108(SB)/8, $0
41 DATA ·gcExpandersAVX512+0x110(SB)/8, $0
42 DATA ·gcExpandersAVX512+0x118(SB)/8, $0
43 DATA ·gcExpandersAVX512+0x120(SB)/8, $0
44 DATA ·gcExpandersAVX512+0x128(SB)/8, $0
45 DATA ·gcExpandersAVX512+0x130(SB)/8, $0
46 DATA ·gcExpandersAVX512+0x138(SB)/8, $0
47 DATA ·gcExpandersAVX512+0x140(SB)/8, $0
48 DATA ·gcExpandersAVX512+0x148(SB)/8, $0
49 DATA ·gcExpandersAVX512+0x150(SB)/8, $0
50 DATA ·gcExpandersAVX512+0x158(SB)/8, $0
51 DATA ·gcExpandersAVX512+0x160(SB)/8, $0
52 DATA ·gcExpandersAVX512+0x168(SB)/8, $0
53 DATA ·gcExpandersAVX512+0x170(SB)/8, $0
54 DATA ·gcExpandersAVX512+0x178(SB)/8, $0
55 DATA ·gcExpandersAVX512+0x180(SB)/8, $0
56 DATA ·gcExpandersAVX512+0x188(SB)/8, $0
57 DATA ·gcExpandersAVX512+0x190(SB)/8, $0
58 DATA ·gcExpandersAVX512+0x198(SB)/8, $0
59 DATA ·gcExpandersAVX512+0x1a0(SB)/8, $0
60 DATA ·gcExpandersAVX512+0x1a8(SB)/8, $0
61 DATA ·gcExpandersAVX512+0x1b0(SB)/8, $0
62 DATA ·gcExpandersAVX512+0x1b8(SB)/8, $0
63 DATA ·gcExpandersAVX512+0x1c0(SB)/8, $0
64 DATA ·gcExpandersAVX512+0x1c8(SB)/8, $0
65 DATA ·gcExpandersAVX512+0x1d0(SB)/8, $0
66 DATA ·gcExpandersAVX512+0x1d8(SB)/8, $0
67 DATA ·gcExpandersAVX512+0x1e0(SB)/8, $0
68 DATA ·gcExpandersAVX512+0x1e8(SB)/8, $0
69 DATA ·gcExpandersAVX512+0x1f0(SB)/8, $0
70 DATA ·gcExpandersAVX512+0x1f8(SB)/8, $0
71 DATA ·gcExpandersAVX512+0x200(SB)/8, $0
72 DATA ·gcExpandersAVX512+0x208(SB)/8, $0
73 DATA ·gcExpandersAVX512+0x210(SB)/8, $0
74 DATA ·gcExpandersAVX512+0x218(SB)/8, $0
75
76 TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
77 VMOVDQU64 (AX), Z1
78 VMOVDQU64 64(AX), Z2
79 RET
80
81 GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
82 DATA expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
83 DATA expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
84 DATA expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
85 DATA expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
86 DATA expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
87 DATA expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
88 DATA expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
89 DATA expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
90
91 GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
92 DATA expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
93 DATA expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
94 DATA expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
95 DATA expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
96 DATA expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
97 DATA expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
98 DATA expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
99 DATA expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
100
101 GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
102 DATA expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
103 DATA expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
104 DATA expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
105 DATA expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
106 DATA expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
107 DATA expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
108 DATA expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
109 DATA expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
110
111 GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
112 DATA expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
113 DATA expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
114 DATA expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
115 DATA expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
116 DATA expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
117 DATA expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
118 DATA expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
119 DATA expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
120
121 TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
122 VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
123 VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
124 VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
125 VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
126 VMOVDQU64 (AX), Z4
127 VPERMB Z4, Z0, Z0
128 VGF2P8AFFINEQB $0, Z1, Z0, Z0
129 VPERMB Z4, Z2, Z2
130 VGF2P8AFFINEQB $0, Z1, Z2, Z2
131 VPERMB Z0, Z3, Z1
132 VPERMB Z2, Z3, Z2
133 RET
134
135 GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
136 DATA expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
137 DATA expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
138 DATA expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
139 DATA expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
140 DATA expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
141 DATA expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
142 DATA expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
143 DATA expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
144
145 GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
146 DATA expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
147 DATA expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
148 DATA expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
149 DATA expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
150 DATA expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
151 DATA expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
152 DATA expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
153 DATA expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
154
155 GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
156 DATA expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
157 DATA expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
158 DATA expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
159 DATA expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
160 DATA expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
161 DATA expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
162 DATA expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
163 DATA expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
164
165 GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
166 DATA expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
167 DATA expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
168 DATA expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
169 DATA expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
170 DATA expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
171 DATA expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
172 DATA expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
173 DATA expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
174
175 GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
176 DATA expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
177 DATA expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
178 DATA expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
179 DATA expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
180 DATA expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
181 DATA expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
182 DATA expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
183 DATA expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
184
185 GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
186 DATA expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
187 DATA expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
188 DATA expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
189 DATA expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
190 DATA expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
191 DATA expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
192 DATA expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
193 DATA expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
194
195 TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
196 VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
197 VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
198 VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
199 VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
200 VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
201 VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
202 VMOVDQU64 (AX), Z6
203 VPERMB Z6, Z0, Z0
204 VGF2P8AFFINEQB $0, Z3, Z0, Z0
205 VPERMB Z6, Z4, Z4
206 VGF2P8AFFINEQB $0, Z3, Z4, Z4
207 VPERMB Z6, Z5, Z5
208 VGF2P8AFFINEQB $0, Z3, Z5, Z3
209 VPERMI2B Z4, Z0, Z1
210 VPERMI2B Z3, Z4, Z2
211 RET
212
213 GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
214 DATA expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
215 DATA expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
216 DATA expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
217 DATA expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
218 DATA expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
219 DATA expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
220 DATA expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
221 DATA expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
222
223 GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
224 DATA expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
225 DATA expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
226 DATA expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
227 DATA expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
228 DATA expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
229 DATA expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
230 DATA expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
231 DATA expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
232
233 GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
234 DATA expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
235 DATA expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
236 DATA expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
237 DATA expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
238 DATA expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
239 DATA expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
240 DATA expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
241 DATA expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
242
243 GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
244 DATA expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
245 DATA expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
246 DATA expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
247 DATA expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
248 DATA expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
249 DATA expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
250 DATA expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
251 DATA expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
252
253 TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
254 VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
255 VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
256 VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
257 VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
258 VMOVDQU64 (AX), Z4
259 VPERMB Z4, Z0, Z0
260 VGF2P8AFFINEQB $0, Z1, Z0, Z0
261 VPERMB Z4, Z2, Z2
262 VGF2P8AFFINEQB $0, Z1, Z2, Z2
263 VPERMB Z0, Z3, Z1
264 VPERMB Z2, Z3, Z2
265 RET
266
267 GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
268 DATA expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
269 DATA expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
270 DATA expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
271 DATA expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
272 DATA expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
273 DATA expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
274 DATA expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
275 DATA expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
276
277 GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
278 DATA expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
279 DATA expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
280 DATA expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
281 DATA expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
282 DATA expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
283 DATA expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
284 DATA expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
285 DATA expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
286
287 GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
288 DATA expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
289 DATA expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
290 DATA expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
291 DATA expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
292 DATA expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
293 DATA expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
294 DATA expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
295 DATA expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
296
297 GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
298 DATA expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
299 DATA expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
300 DATA expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
301 DATA expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
302 DATA expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
303 DATA expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
304 DATA expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
305 DATA expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
306
307 GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
308 DATA expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
309 DATA expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
310 DATA expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
311 DATA expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
312 DATA expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
313 DATA expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
314 DATA expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
315 DATA expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
316
317 GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
318 DATA expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
319 DATA expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
320 DATA expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
321 DATA expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
322 DATA expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
323 DATA expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
324 DATA expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
325 DATA expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
326
327 TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
328 VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
329 VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
330 VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
331 VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
332 VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
333 VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
334 VMOVDQU64 (AX), Z6
335 VPERMB Z6, Z0, Z0
336 VGF2P8AFFINEQB $0, Z3, Z0, Z0
337 VPERMB Z6, Z4, Z4
338 VGF2P8AFFINEQB $0, Z3, Z4, Z4
339 VPERMB Z6, Z5, Z5
340 VGF2P8AFFINEQB $0, Z3, Z5, Z3
341 VPERMI2B Z4, Z0, Z1
342 VPERMI2B Z3, Z4, Z2
343 RET
344
345 GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
346 DATA expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
347 DATA expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
348 DATA expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
349 DATA expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
350 DATA expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
351 DATA expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
352 DATA expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
353 DATA expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
354
355 GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
356 DATA expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
357 DATA expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
358 DATA expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
359 DATA expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
360 DATA expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
361 DATA expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
362 DATA expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
363 DATA expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
364
365 GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
366 DATA expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
367 DATA expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
368 DATA expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
369 DATA expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
370 DATA expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
371 DATA expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
372 DATA expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
373 DATA expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
374
375 GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
376 DATA expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
377 DATA expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
378 DATA expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
379 DATA expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
380 DATA expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
381 DATA expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
382 DATA expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
383 DATA expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
384
385 TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
386 VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
387 VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
388 VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
389 VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
390 VMOVDQU64 (AX), Z4
391 VPERMB Z4, Z0, Z0
392 VGF2P8AFFINEQB $0, Z1, Z0, Z0
393 VPERMB Z4, Z2, Z2
394 VGF2P8AFFINEQB $0, Z1, Z2, Z2
395 VPERMB Z0, Z3, Z1
396 VPERMB Z2, Z3, Z2
397 RET
398
399 GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
400 DATA expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
401 DATA expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
402 DATA expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
403 DATA expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
404 DATA expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
405 DATA expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
406 DATA expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
407 DATA expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
408
409 GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
410 DATA expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
411 DATA expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
412 DATA expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
413 DATA expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
414 DATA expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
415 DATA expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
416 DATA expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
417 DATA expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
418
419 GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
420 DATA expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
421 DATA expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
422 DATA expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
423 DATA expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
424 DATA expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
425 DATA expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
426 DATA expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
427 DATA expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
428
429 GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
430 DATA expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
431 DATA expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
432 DATA expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
433 DATA expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
434 DATA expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
435 DATA expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
436 DATA expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
437 DATA expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
438
439 GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
440 DATA expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
441 DATA expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
442 DATA expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
443 DATA expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
444 DATA expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
445 DATA expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
446 DATA expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
447 DATA expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
448
449 GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
450 DATA expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
451 DATA expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
452 DATA expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
453 DATA expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
454 DATA expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
455 DATA expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
456 DATA expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
457 DATA expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
458
459 GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
460 DATA expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
461 DATA expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
462 DATA expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
463 DATA expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
464 DATA expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
465 DATA expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
466 DATA expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
467 DATA expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
468
469 GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
470 DATA expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
471 DATA expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
472 DATA expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
473 DATA expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
474 DATA expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
475 DATA expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
476 DATA expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
477 DATA expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
478
479 TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
480 VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
481 VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
482 VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
483 VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
484 VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
485 VMOVDQU64 (AX), Z5
486 VPERMB Z5, Z0, Z0
487 VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
488 VPERMB Z5, Z3, Z3
489 VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
490 VPERMB Z5, Z4, Z4
491 VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
492 VPERMI2B Z3, Z0, Z1
493 VPERMI2B Z4, Z3, Z2
494 RET
495
496 GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
497 DATA expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
498 DATA expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
499 DATA expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
500 DATA expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
501 DATA expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
502 DATA expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
503 DATA expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
504 DATA expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
505
506 GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
507 DATA expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
508 DATA expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
509 DATA expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
510 DATA expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
511 DATA expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
512 DATA expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
513 DATA expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
514 DATA expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
515
516 GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
517 DATA expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
518 DATA expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
519 DATA expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
520 DATA expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
521 DATA expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
522 DATA expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
523 DATA expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
524 DATA expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
525
526 GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
527 DATA expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
528 DATA expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
529 DATA expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
530 DATA expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
531 DATA expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
532 DATA expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
533 DATA expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
534 DATA expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
535
536 GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
537 DATA expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
538 DATA expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
539 DATA expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
540 DATA expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
541 DATA expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
542 DATA expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
543 DATA expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
544 DATA expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
545
546 GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
547 DATA expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
548 DATA expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
549 DATA expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
550 DATA expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
551 DATA expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
552 DATA expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
553 DATA expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
554 DATA expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
555
556 GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
557 DATA expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
558 DATA expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
559 DATA expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
560 DATA expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
561 DATA expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
562 DATA expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
563 DATA expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
564 DATA expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
565
566 GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
567 DATA expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
568 DATA expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
569 DATA expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
570 DATA expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
571 DATA expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
572 DATA expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
573 DATA expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
574 DATA expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
575
576 TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
577 VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
578 VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
579 VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
580 VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
581 VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
582 VMOVDQU64 (AX), Z5
583 VPERMB Z5, Z0, Z0
584 VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
585 VPERMB Z5, Z3, Z3
586 VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
587 VPERMB Z5, Z4, Z4
588 VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
589 VPERMI2B Z3, Z0, Z1
590 VPERMI2B Z4, Z3, Z2
591 RET
592
593 GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
594 DATA expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
595 DATA expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
596 DATA expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
597 DATA expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
598 DATA expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
599 DATA expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
600 DATA expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
601 DATA expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
602
603 GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
604 DATA expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
605 DATA expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
606 DATA expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
607 DATA expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
608 DATA expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
609 DATA expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
610 DATA expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
611 DATA expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
612
613 GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
614 DATA expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
615 DATA expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
616 DATA expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
617 DATA expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
618 DATA expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
619 DATA expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
620 DATA expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
621 DATA expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
622
623 GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
624 DATA expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
625 DATA expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
626 DATA expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
627 DATA expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
628 DATA expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
629 DATA expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
630 DATA expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
631 DATA expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
632
633 GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
634 DATA expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
635 DATA expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
636 DATA expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
637 DATA expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
638 DATA expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
639 DATA expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
640 DATA expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
641 DATA expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
642
643 GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
644 DATA expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
645 DATA expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
646 DATA expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
647 DATA expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
648 DATA expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
649 DATA expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
650 DATA expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
651 DATA expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
652
653 GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
654 DATA expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
655 DATA expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
656 DATA expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
657 DATA expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
658 DATA expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
659 DATA expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
660 DATA expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
661 DATA expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
662
663 GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
664 DATA expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
665 DATA expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
666 DATA expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
667 DATA expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
668 DATA expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
669 DATA expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
670 DATA expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
671 DATA expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
672
673 GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
674 DATA expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
675 DATA expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
676 DATA expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
677 DATA expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
678 DATA expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
679 DATA expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
680 DATA expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
681 DATA expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
682
683 GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
684 DATA expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
685 DATA expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
686 DATA expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
687 DATA expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
688 DATA expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
689 DATA expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
690 DATA expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
691 DATA expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
692
693 GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
694 DATA expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
695 DATA expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
696 DATA expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
697 DATA expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
698 DATA expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
699 DATA expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
700 DATA expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
701 DATA expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
702
703 TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
704 VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
705 VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
706 VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
707 VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
708 VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
709 VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
710 VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
711 VMOVDQU64 (AX), Z7
712 VPERMB Z7, Z0, Z0
713 VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
714 VPERMB Z7, Z2, Z2
715 VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
716 VPERMB Z7, Z3, Z3
717 VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
718 VPERMB Z7, Z4, Z4
719 VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
720 VPERMI2B Z2, Z0, Z1
721 MOVQ $0xff0ffc3ff0ffc3ff, AX
722 KMOVQ AX, K1
723 VPERMI2B.Z Z3, Z2, K1, Z5
724 MOVQ $0xf003c00f003c00, AX
725 KMOVQ AX, K1
726 VPERMB.Z Z4, Z6, K1, Z0
727 VPORQ Z0, Z5, Z2
728 RET
729
730 GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
731 DATA expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
732 DATA expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
733 DATA expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
734 DATA expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
735 DATA expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
736 DATA expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
737 DATA expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
738 DATA expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
739
740 GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
741 DATA expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
742 DATA expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
743 DATA expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
744 DATA expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
745 DATA expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
746 DATA expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
747 DATA expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
748 DATA expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
749
750 GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
751 DATA expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
752 DATA expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
753 DATA expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
754 DATA expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
755 DATA expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
756 DATA expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
757 DATA expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
758 DATA expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
759
760 GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
761 DATA expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
762 DATA expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
763 DATA expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
764 DATA expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
765 DATA expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
766 DATA expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
767 DATA expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
768 DATA expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
769
770 TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
771 VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
772 VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
773 VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
774 VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
775 VMOVDQU64 (AX), Z4
776 VPERMB Z4, Z0, Z0
777 VGF2P8AFFINEQB $0, Z1, Z0, Z0
778 VPERMB Z4, Z2, Z2
779 VGF2P8AFFINEQB $0, Z1, Z2, Z2
780 VPERMB Z0, Z3, Z1
781 VPERMB Z2, Z3, Z2
782 RET
783
784 GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
785 DATA expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
786 DATA expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
787 DATA expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
788 DATA expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
789 DATA expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
790 DATA expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
791 DATA expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
792 DATA expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
793
794 GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
795 DATA expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
796 DATA expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
797 DATA expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
798 DATA expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
799 DATA expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
800 DATA expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
801 DATA expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
802 DATA expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
803
804 GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
805 DATA expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
806 DATA expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
807 DATA expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
808 DATA expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
809 DATA expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
810 DATA expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
811 DATA expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
812 DATA expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
813
814 GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
815 DATA expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
816 DATA expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
817 DATA expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
818 DATA expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
819 DATA expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
820 DATA expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
821 DATA expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
822 DATA expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
823
824 GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
825 DATA expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
826 DATA expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
827 DATA expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
828 DATA expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
829 DATA expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
830 DATA expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
831 DATA expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
832 DATA expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
833
834 GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
835 DATA expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
836 DATA expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
837 DATA expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
838 DATA expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
839 DATA expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
840 DATA expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
841 DATA expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
842 DATA expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
843
844 GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
845 DATA expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
846 DATA expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
847 DATA expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
848 DATA expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
849 DATA expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
850 DATA expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
851 DATA expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
852 DATA expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
853
854 GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
855 DATA expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
856 DATA expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
857 DATA expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
858 DATA expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
859 DATA expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
860 DATA expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
861 DATA expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
862 DATA expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
863
864 GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
865 DATA expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
866 DATA expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
867 DATA expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
868 DATA expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
869 DATA expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
870 DATA expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
871 DATA expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
872 DATA expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
873
874 GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
875 DATA expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
876 DATA expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
877 DATA expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
878 DATA expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
879 DATA expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
880 DATA expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
881 DATA expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
882 DATA expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
883
884 GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
885 DATA expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
886 DATA expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
887 DATA expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
888 DATA expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
889 DATA expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
890 DATA expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
891 DATA expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
892 DATA expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
893
894 TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
895 VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
896 VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
897 VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
898 VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
899 VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
900 VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
901 VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
902 VMOVDQU64 (AX), Z7
903 VPERMB Z7, Z0, Z0
904 VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
905 VPERMB Z7, Z2, Z2
906 VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
907 VPERMB Z7, Z3, Z3
908 VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
909 VPERMB Z7, Z4, Z4
910 VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
911 VPERMI2B Z2, Z0, Z1
912 MOVQ $0xffe0fff83ffe0fff, AX
913 KMOVQ AX, K1
914 VPERMI2B.Z Z3, Z2, K1, Z5
915 MOVQ $0x1f0007c001f000, AX
916 KMOVQ AX, K1
917 VPERMB.Z Z4, Z6, K1, Z0
918 VPORQ Z0, Z5, Z2
919 RET
920
921 GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
922 DATA expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
923 DATA expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
924 DATA expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
925 DATA expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
926 DATA expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
927 DATA expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
928 DATA expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
929 DATA expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
930
931 GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
932 DATA expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
933 DATA expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
934 DATA expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
935 DATA expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
936 DATA expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
937 DATA expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
938 DATA expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
939 DATA expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
940
941 GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
942 DATA expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
943 DATA expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
944 DATA expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
945 DATA expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
946 DATA expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
947 DATA expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
948 DATA expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
949 DATA expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
950
951 GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
952 DATA expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
953 DATA expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
954 DATA expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
955 DATA expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
956 DATA expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
957 DATA expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
958 DATA expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
959 DATA expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
960
961 GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
962 DATA expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
963 DATA expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
964 DATA expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
965 DATA expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
966 DATA expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
967 DATA expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
968 DATA expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
969 DATA expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
970
971 GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
972 DATA expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
973 DATA expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
974 DATA expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
975 DATA expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
976 DATA expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
977 DATA expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
978 DATA expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
979 DATA expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
980
981 GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
982 DATA expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
983 DATA expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
984 DATA expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
985 DATA expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
986 DATA expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
987 DATA expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
988 DATA expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
989 DATA expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
990
991 GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
992 DATA expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
993 DATA expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
994 DATA expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
995 DATA expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
996 DATA expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
997 DATA expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
998 DATA expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
999 DATA expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
1000
1001 TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
1002 VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
1003 VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
1004 VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
1005 VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
1006 VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
1007 VMOVDQU64 (AX), Z5
1008 VPERMB Z5, Z0, Z0
1009 VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
1010 VPERMB Z5, Z3, Z3
1011 VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
1012 VPERMB Z5, Z4, Z4
1013 VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
1014 VPERMI2B Z3, Z0, Z1
1015 VPERMI2B Z4, Z3, Z2
1016 RET
1017
1018 GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
1019 DATA expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
1020 DATA expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1021 DATA expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
1022 DATA expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
1023 DATA expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
1024 DATA expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
1025 DATA expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
1026 DATA expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
1027
1028 GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
1029 DATA expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
1030 DATA expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
1031 DATA expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
1032 DATA expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
1033 DATA expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
1034 DATA expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
1035 DATA expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
1036 DATA expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
1037
1038 GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
1039 DATA expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
1040 DATA expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
1041 DATA expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
1042 DATA expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
1043 DATA expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
1044 DATA expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
1045 DATA expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
1046 DATA expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
1047
1048 GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
1049 DATA expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
1050 DATA expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
1051 DATA expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
1052 DATA expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
1053 DATA expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
1054 DATA expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
1055 DATA expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
1056 DATA expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
1057
1058 GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
1059 DATA expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
1060 DATA expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
1061 DATA expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
1062 DATA expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
1063 DATA expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
1064 DATA expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
1065 DATA expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
1066 DATA expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
1067
1068 GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
1069 DATA expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
1070 DATA expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
1071 DATA expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
1072 DATA expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
1073 DATA expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
1074 DATA expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
1075 DATA expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
1076 DATA expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
1077
1078 GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
1079 DATA expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
1080 DATA expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
1081 DATA expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
1082 DATA expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
1083 DATA expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1084 DATA expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1085 DATA expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1086 DATA expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1087
1088 GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
1089 DATA expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
1090 DATA expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
1091 DATA expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
1092 DATA expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
1093 DATA expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
1094 DATA expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
1095 DATA expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
1096 DATA expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
1097
1098 GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
1099 DATA expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
1100 DATA expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
1101 DATA expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
1102 DATA expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
1103 DATA expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
1104 DATA expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
1105 DATA expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
1106 DATA expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
1107
1108 GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
1109 DATA expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
1110 DATA expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
1111 DATA expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
1112 DATA expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
1113 DATA expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
1114 DATA expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
1115 DATA expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
1116 DATA expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
1117
1118 GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
1119 DATA expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1120 DATA expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1121 DATA expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
1122 DATA expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1123 DATA expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
1124 DATA expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
1125 DATA expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1126 DATA expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
1127
1128 TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
1129 VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
1130 VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
1131 VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
1132 VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
1133 VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
1134 VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
1135 VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
1136 VMOVDQU64 (AX), Z7
1137 VPERMB Z7, Z0, Z0
1138 VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
1139 VPERMB Z7, Z2, Z2
1140 VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
1141 VPERMB Z7, Z3, Z3
1142 VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
1143 VPERMB Z7, Z4, Z4
1144 VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
1145 VPERMI2B Z2, Z0, Z1
1146 MOVQ $0xffff03fffc0ffff, AX
1147 KMOVQ AX, K1
1148 VPERMI2B.Z Z3, Z2, K1, Z5
1149 MOVQ $0xf0000fc0003f0000, AX
1150 KMOVQ AX, K1
1151 VPERMB.Z Z4, Z6, K1, Z0
1152 VPORQ Z0, Z5, Z2
1153 RET
1154
1155 GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
1156 DATA expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1157 DATA expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
1158 DATA expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
1159 DATA expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
1160 DATA expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
1161 DATA expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
1162 DATA expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1163 DATA expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1164
1165 GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
1166 DATA expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
1167 DATA expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
1168 DATA expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
1169 DATA expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
1170 DATA expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
1171 DATA expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
1172 DATA expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
1173 DATA expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
1174
1175 GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
1176 DATA expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
1177 DATA expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
1178 DATA expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
1179 DATA expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
1180 DATA expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
1181 DATA expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
1182 DATA expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
1183 DATA expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
1184
1185 GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
1186 DATA expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
1187 DATA expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
1188 DATA expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
1189 DATA expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
1190 DATA expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
1191 DATA expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
1192 DATA expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
1193 DATA expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
1194
1195 GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
1196 DATA expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
1197 DATA expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
1198 DATA expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
1199 DATA expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
1200 DATA expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
1201 DATA expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
1202 DATA expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
1203 DATA expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
1204
1205 GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
1206 DATA expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
1207 DATA expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
1208 DATA expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
1209 DATA expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1210 DATA expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1211 DATA expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1212 DATA expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1213 DATA expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1214
1215 GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
1216 DATA expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
1217 DATA expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
1218 DATA expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
1219 DATA expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
1220 DATA expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
1221 DATA expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
1222 DATA expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
1223 DATA expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
1224
1225 GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
1226 DATA expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
1227 DATA expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
1228 DATA expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
1229 DATA expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
1230 DATA expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
1231 DATA expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
1232 DATA expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
1233 DATA expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
1234
1235 GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
1236 DATA expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
1237 DATA expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
1238 DATA expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
1239 DATA expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
1240 DATA expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
1241 DATA expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
1242 DATA expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
1243 DATA expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
1244
1245 GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
1246 DATA expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1247 DATA expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1248 DATA expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
1249 DATA expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1250 DATA expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1251 DATA expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1252 DATA expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1253 DATA expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
1254
1255 TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
1256 VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
1257 VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
1258 VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
1259 VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
1260 VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
1261 VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
1262 VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
1263 VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
1264 VMOVDQU64 (AX), Z8
1265 VPERMB Z8, Z0, Z0
1266 VGF2P8AFFINEQB $0, Z2, Z0, Z0
1267 VPERMB Z8, Z3, Z3
1268 VGF2P8AFFINEQB $0, Z2, Z3, Z2
1269 VPERMB Z8, Z4, Z3
1270 VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
1271 VPERMB Z8, Z5, Z4
1272 VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
1273 VPERMI2B Z2, Z0, Z1
1274 MOVQ $0xdfffffffffffffff, AX
1275 KMOVQ AX, K1
1276 VPERMI2B.Z Z3, Z2, K1, Z6
1277 MOVQ $0x2000000000000000, AX
1278 KMOVQ AX, K1
1279 VPERMB.Z Z4, Z7, K1, Z0
1280 VPORQ Z0, Z6, Z2
1281 RET
1282
1283 GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
1284 DATA expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1285 DATA expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1286 DATA expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
1287 DATA expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
1288 DATA expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
1289 DATA expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
1290 DATA expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
1291 DATA expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1292
1293 GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
1294 DATA expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
1295 DATA expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
1296 DATA expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
1297 DATA expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
1298 DATA expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
1299 DATA expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
1300 DATA expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
1301 DATA expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
1302
1303 GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
1304 DATA expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
1305 DATA expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
1306 DATA expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1307 DATA expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
1308 DATA expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
1309 DATA expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
1310 DATA expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
1311 DATA expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
1312
1313 GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
1314 DATA expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
1315 DATA expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
1316 DATA expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
1317 DATA expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
1318 DATA expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
1319 DATA expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
1320 DATA expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
1321 DATA expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
1322
1323 GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
1324 DATA expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
1325 DATA expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
1326 DATA expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
1327 DATA expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
1328 DATA expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
1329 DATA expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
1330 DATA expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
1331 DATA expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
1332
1333 GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
1334 DATA expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
1335 DATA expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
1336 DATA expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
1337 DATA expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
1338 DATA expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
1339 DATA expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
1340 DATA expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
1341 DATA expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
1342
1343 GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
1344 DATA expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
1345 DATA expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
1346 DATA expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
1347 DATA expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
1348 DATA expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
1349 DATA expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
1350 DATA expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1351 DATA expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1352
1353 GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
1354 DATA expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
1355 DATA expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
1356 DATA expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
1357 DATA expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
1358 DATA expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
1359 DATA expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
1360 DATA expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
1361 DATA expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
1362
1363 GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
1364 DATA expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
1365 DATA expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
1366 DATA expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
1367 DATA expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
1368 DATA expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
1369 DATA expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
1370 DATA expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
1371 DATA expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
1372
1373 GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
1374 DATA expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
1375 DATA expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
1376 DATA expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
1377 DATA expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
1378 DATA expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
1379 DATA expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
1380 DATA expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
1381 DATA expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
1382
1383 GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
1384 DATA expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1385 DATA expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1386 DATA expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
1387 DATA expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1388 DATA expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1389 DATA expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
1390 DATA expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
1391 DATA expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
1392
1393 TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
1394 VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
1395 VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
1396 VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
1397 VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
1398 VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
1399 VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
1400 VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
1401 VMOVDQU64 (AX), Z7
1402 VPERMB Z7, Z0, Z0
1403 VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
1404 VPERMB Z7, Z2, Z2
1405 VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
1406 VPERMB Z7, Z3, Z3
1407 VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
1408 VPERMB Z7, Z4, Z4
1409 VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
1410 VPERMI2B Z2, Z0, Z1
1411 MOVQ $0xff7c07ffff01ffff, AX
1412 KMOVQ AX, K1
1413 VPERMI2B.Z Z3, Z2, K1, Z5
1414 MOVQ $0x83f80000fe0000, AX
1415 KMOVQ AX, K1
1416 VPERMB.Z Z4, Z6, K1, Z0
1417 VPORQ Z0, Z5, Z2
1418 RET
1419
1420 GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
1421 DATA expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1422 DATA expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1423 DATA expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
1424 DATA expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
1425 DATA expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1426 DATA expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
1427 DATA expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1428 DATA expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
1429
1430 GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
1431 DATA expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
1432 DATA expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
1433 DATA expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
1434 DATA expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
1435 DATA expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
1436 DATA expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
1437 DATA expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
1438 DATA expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
1439
1440 GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
1441 DATA expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
1442 DATA expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
1443 DATA expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1444 DATA expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
1445 DATA expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
1446 DATA expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
1447 DATA expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
1448 DATA expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
1449
1450 GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
1451 DATA expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
1452 DATA expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
1453 DATA expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
1454 DATA expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
1455 DATA expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
1456 DATA expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
1457 DATA expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
1458 DATA expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
1459
1460 GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
1461 DATA expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
1462 DATA expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
1463 DATA expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
1464 DATA expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
1465 DATA expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
1466 DATA expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
1467 DATA expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
1468 DATA expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
1469
1470 GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
1471 DATA expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
1472 DATA expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
1473 DATA expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
1474 DATA expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
1475 DATA expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
1476 DATA expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
1477 DATA expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
1478 DATA expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
1479
1480 GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
1481 DATA expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
1482 DATA expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
1483 DATA expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
1484 DATA expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1485 DATA expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1486 DATA expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1487 DATA expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1488 DATA expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1489
1490 GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
1491 DATA expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
1492 DATA expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
1493 DATA expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
1494 DATA expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
1495 DATA expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
1496 DATA expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
1497 DATA expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
1498 DATA expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
1499
1500 GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
1501 DATA expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
1502 DATA expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
1503 DATA expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
1504 DATA expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
1505 DATA expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
1506 DATA expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
1507 DATA expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
1508 DATA expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
1509
1510 GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
1511 DATA expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
1512 DATA expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
1513 DATA expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
1514 DATA expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
1515 DATA expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
1516 DATA expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
1517 DATA expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
1518 DATA expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
1519
1520 GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
1521 DATA expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1522 DATA expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1523 DATA expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
1524 DATA expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
1525 DATA expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1526 DATA expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1527 DATA expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
1528 DATA expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
1529
1530 TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
1531 VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
1532 VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
1533 VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
1534 VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
1535 VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
1536 VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
1537 VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
1538 VMOVDQU64 (AX), Z7
1539 VPERMB Z7, Z0, Z0
1540 VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
1541 VPERMB Z7, Z2, Z2
1542 VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
1543 VPERMB Z7, Z3, Z3
1544 VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
1545 VPERMB Z7, Z4, Z4
1546 VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
1547 VPERMI2B Z2, Z0, Z1
1548 MOVQ $0xdf87fffff87fffff, AX
1549 KMOVQ AX, K1
1550 VPERMI2B.Z Z3, Z2, K1, Z5
1551 MOVQ $0x2078000007800000, AX
1552 KMOVQ AX, K1
1553 VPERMB.Z Z4, Z6, K1, Z0
1554 VPORQ Z0, Z5, Z2
1555 RET
1556
1557 GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
1558 DATA expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
1559 DATA expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
1560 DATA expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
1561 DATA expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
1562 DATA expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
1563 DATA expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
1564 DATA expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
1565 DATA expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
1566
1567 GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
1568 DATA expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
1569 DATA expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
1570 DATA expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
1571 DATA expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
1572 DATA expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
1573 DATA expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
1574 DATA expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
1575 DATA expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
1576
1577 GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
1578 DATA expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
1579 DATA expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
1580 DATA expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
1581 DATA expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
1582 DATA expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
1583 DATA expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
1584 DATA expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
1585 DATA expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
1586
1587 GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
1588 DATA expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
1589 DATA expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
1590 DATA expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
1591 DATA expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
1592 DATA expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
1593 DATA expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
1594 DATA expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
1595 DATA expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
1596
1597 GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
1598 DATA expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
1599 DATA expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
1600 DATA expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
1601 DATA expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
1602 DATA expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
1603 DATA expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
1604 DATA expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
1605 DATA expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
1606
1607 GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
1608 DATA expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
1609 DATA expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
1610 DATA expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
1611 DATA expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
1612 DATA expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
1613 DATA expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
1614 DATA expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
1615 DATA expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
1616
1617 GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
1618 DATA expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
1619 DATA expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
1620 DATA expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
1621 DATA expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
1622 DATA expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
1623 DATA expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
1624 DATA expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1625 DATA expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1626
1627 GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
1628 DATA expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
1629 DATA expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
1630 DATA expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
1631 DATA expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
1632 DATA expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
1633 DATA expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
1634 DATA expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
1635 DATA expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
1636
1637 GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
1638 DATA expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
1639 DATA expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
1640 DATA expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
1641 DATA expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
1642 DATA expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
1643 DATA expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
1644 DATA expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
1645 DATA expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
1646
1647 GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
1648 DATA expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
1649 DATA expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
1650 DATA expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
1651 DATA expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
1652 DATA expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
1653 DATA expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
1654 DATA expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
1655 DATA expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
1656
1657 GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
1658 DATA expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1659 DATA expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1660 DATA expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
1661 DATA expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
1662 DATA expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1663 DATA expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1664 DATA expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
1665 DATA expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
1666
1667 TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
1668 VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
1669 VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
1670 VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
1671 VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
1672 VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
1673 VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
1674 VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
1675 VMOVDQU64 (AX), Z7
1676 VPERMB Z7, Z0, Z0
1677 VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
1678 VPERMB Z7, Z2, Z2
1679 VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
1680 VPERMB Z7, Z3, Z3
1681 VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
1682 VPERMB Z7, Z4, Z4
1683 VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
1684 VPERMI2B Z2, Z0, Z1
1685 MOVQ $0xb001ffffc007ffff, AX
1686 KMOVQ AX, K1
1687 VPERMI2B.Z Z3, Z2, K1, Z5
1688 MOVQ $0x4ffe00003ff80000, AX
1689 KMOVQ AX, K1
1690 VPERMB.Z Z4, Z6, K1, Z0
1691 VPORQ Z0, Z5, Z2
1692 RET
1693
1694 GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
1695 DATA expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
1696 DATA expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
1697 DATA expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
1698 DATA expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
1699 DATA expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
1700 DATA expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
1701 DATA expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
1702 DATA expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
1703
1704 GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
1705 DATA expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
1706 DATA expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
1707 DATA expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
1708 DATA expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
1709 DATA expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
1710 DATA expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
1711 DATA expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
1712 DATA expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
1713
1714 GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
1715 DATA expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
1716 DATA expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
1717 DATA expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
1718 DATA expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
1719 DATA expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
1720 DATA expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
1721 DATA expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
1722 DATA expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
1723
1724 GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
1725 DATA expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
1726 DATA expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
1727 DATA expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
1728 DATA expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
1729 DATA expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
1730 DATA expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
1731 DATA expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
1732 DATA expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
1733
1734 TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
1735 VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
1736 VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
1737 VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
1738 VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
1739 VMOVDQU64 (AX), Z4
1740 VPERMB Z4, Z0, Z0
1741 VGF2P8AFFINEQB $0, Z1, Z0, Z0
1742 VPERMB Z4, Z2, Z2
1743 VGF2P8AFFINEQB $0, Z1, Z2, Z2
1744 VPERMB Z0, Z3, Z1
1745 VPERMB Z2, Z3, Z2
1746 RET
1747
1748 GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
1749 DATA expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
1750 DATA expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
1751 DATA expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
1752 DATA expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
1753 DATA expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1754 DATA expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
1755 DATA expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
1756 DATA expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
1757
1758 GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
1759 DATA expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
1760 DATA expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
1761 DATA expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
1762 DATA expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
1763 DATA expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
1764 DATA expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
1765 DATA expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
1766 DATA expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
1767
1768 GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
1769 DATA expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
1770 DATA expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
1771 DATA expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
1772 DATA expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
1773 DATA expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
1774 DATA expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
1775 DATA expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
1776 DATA expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
1777
1778 GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
1779 DATA expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
1780 DATA expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
1781 DATA expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
1782 DATA expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
1783 DATA expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
1784 DATA expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
1785 DATA expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
1786 DATA expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
1787
1788 GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
1789 DATA expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
1790 DATA expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
1791 DATA expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
1792 DATA expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
1793 DATA expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
1794 DATA expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
1795 DATA expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
1796 DATA expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
1797
1798 GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
1799 DATA expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
1800 DATA expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
1801 DATA expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
1802 DATA expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
1803 DATA expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
1804 DATA expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
1805 DATA expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
1806 DATA expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
1807
1808 GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
1809 DATA expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
1810 DATA expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
1811 DATA expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
1812 DATA expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
1813 DATA expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
1814 DATA expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
1815 DATA expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
1816 DATA expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
1817
1818 GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
1819 DATA expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
1820 DATA expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
1821 DATA expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
1822 DATA expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
1823 DATA expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
1824 DATA expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
1825 DATA expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
1826 DATA expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
1827
1828 TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
1829 VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
1830 VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
1831 VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
1832 VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
1833 VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
1834 VMOVDQU64 (AX), Z5
1835 VPERMB Z5, Z0, Z0
1836 VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
1837 VPERMB Z5, Z3, Z3
1838 VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
1839 VPERMB Z5, Z4, Z4
1840 VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
1841 VPERMI2B Z3, Z0, Z1
1842 VPERMI2B Z4, Z3, Z2
1843 RET
1844
1845 GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
1846 DATA expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
1847 DATA expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
1848 DATA expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
1849 DATA expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
1850 DATA expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
1851 DATA expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
1852 DATA expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
1853 DATA expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
1854
1855 GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
1856 DATA expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
1857 DATA expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
1858 DATA expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
1859 DATA expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
1860 DATA expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
1861 DATA expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
1862 DATA expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
1863 DATA expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
1864
1865 GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
1866 DATA expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
1867 DATA expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
1868 DATA expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
1869 DATA expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
1870 DATA expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
1871 DATA expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
1872 DATA expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
1873 DATA expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
1874
1875 GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
1876 DATA expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
1877 DATA expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
1878 DATA expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
1879 DATA expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
1880 DATA expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
1881 DATA expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
1882 DATA expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
1883 DATA expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
1884
1885 GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
1886 DATA expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
1887 DATA expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
1888 DATA expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
1889 DATA expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
1890 DATA expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
1891 DATA expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
1892 DATA expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
1893 DATA expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
1894
1895 GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
1896 DATA expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
1897 DATA expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
1898 DATA expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
1899 DATA expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
1900 DATA expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
1901 DATA expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
1902 DATA expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
1903 DATA expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
1904
1905 GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
1906 DATA expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
1907 DATA expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
1908 DATA expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
1909 DATA expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
1910 DATA expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
1911 DATA expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
1912 DATA expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
1913 DATA expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
1914
1915 GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
1916 DATA expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
1917 DATA expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
1918 DATA expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
1919 DATA expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
1920 DATA expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
1921 DATA expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
1922 DATA expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
1923 DATA expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
1924
1925 GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
1926 DATA expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
1927 DATA expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
1928 DATA expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
1929 DATA expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
1930 DATA expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
1931 DATA expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
1932 DATA expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
1933 DATA expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
1934
1935 GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
1936 DATA expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
1937 DATA expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
1938 DATA expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
1939 DATA expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
1940 DATA expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
1941 DATA expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
1942 DATA expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
1943 DATA expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
1944
1945 GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
1946 DATA expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
1947 DATA expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
1948 DATA expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
1949 DATA expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
1950 DATA expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
1951 DATA expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
1952 DATA expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
1953 DATA expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
1954
1955 TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
1956 VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
1957 VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
1958 VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
1959 VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
1960 VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
1961 VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
1962 VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
1963 VMOVDQU64 (AX), Z7
1964 VPERMB Z7, Z0, Z0
1965 VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
1966 VPERMB Z7, Z2, Z2
1967 VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
1968 VPERMB Z7, Z3, Z3
1969 VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
1970 VPERMB Z7, Z4, Z4
1971 VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
1972 VPERMI2B Z2, Z0, Z1
1973 MOVQ $0xe7ffffffffffffff, AX
1974 KMOVQ AX, K1
1975 VPERMI2B.Z Z3, Z2, K1, Z5
1976 MOVQ $0x1800000000000000, AX
1977 KMOVQ AX, K1
1978 VPERMB.Z Z4, Z6, K1, Z0
1979 VPORQ Z0, Z5, Z2
1980 RET
1981
1982 GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
1983 DATA expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
1984 DATA expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
1985 DATA expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
1986 DATA expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
1987 DATA expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
1988 DATA expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
1989 DATA expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
1990 DATA expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
1991
1992 GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
1993 DATA expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
1994 DATA expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
1995 DATA expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
1996 DATA expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
1997 DATA expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
1998 DATA expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
1999 DATA expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
2000 DATA expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
2001
2002 GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
2003 DATA expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
2004 DATA expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
2005 DATA expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2006 DATA expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
2007 DATA expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
2008 DATA expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
2009 DATA expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
2010 DATA expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
2011
2012 GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
2013 DATA expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
2014 DATA expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
2015 DATA expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
2016 DATA expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
2017 DATA expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
2018 DATA expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
2019 DATA expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
2020 DATA expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
2021
2022 GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
2023 DATA expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
2024 DATA expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
2025 DATA expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
2026 DATA expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
2027 DATA expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
2028 DATA expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
2029 DATA expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
2030 DATA expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
2031
2032 GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
2033 DATA expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
2034 DATA expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
2035 DATA expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
2036 DATA expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
2037 DATA expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
2038 DATA expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
2039 DATA expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
2040 DATA expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
2041
2042 GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
2043 DATA expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
2044 DATA expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
2045 DATA expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
2046 DATA expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
2047 DATA expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
2048 DATA expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
2049 DATA expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2050 DATA expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2051
2052 GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
2053 DATA expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
2054 DATA expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
2055 DATA expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
2056 DATA expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
2057 DATA expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
2058 DATA expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
2059 DATA expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
2060 DATA expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
2061
2062 GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
2063 DATA expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
2064 DATA expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
2065 DATA expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
2066 DATA expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
2067 DATA expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
2068 DATA expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
2069 DATA expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
2070 DATA expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
2071
2072 GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
2073 DATA expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
2074 DATA expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
2075 DATA expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
2076 DATA expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
2077 DATA expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
2078 DATA expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
2079 DATA expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
2080 DATA expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
2081
2082 GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
2083 DATA expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2084 DATA expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2085 DATA expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2086 DATA expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
2087 DATA expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
2088 DATA expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
2089 DATA expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
2090 DATA expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
2091
2092 TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
2093 VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
2094 VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
2095 VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
2096 VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
2097 VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
2098 VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
2099 VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
2100 VMOVDQU64 (AX), Z7
2101 VPERMB Z7, Z0, Z0
2102 VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
2103 VPERMB Z7, Z2, Z2
2104 VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
2105 VPERMB Z7, Z3, Z3
2106 VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
2107 VPERMB Z7, Z4, Z4
2108 VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
2109 VPERMI2B Z2, Z0, Z1
2110 MOVQ $0xce79fe003fffffff, AX
2111 KMOVQ AX, K1
2112 VPERMI2B.Z Z3, Z2, K1, Z5
2113 MOVQ $0x318601ffc0000000, AX
2114 KMOVQ AX, K1
2115 VPERMB.Z Z4, Z6, K1, Z0
2116 VPORQ Z0, Z5, Z2
2117 RET
2118
2119 GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
2120 DATA expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
2121 DATA expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
2122 DATA expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
2123 DATA expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
2124 DATA expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
2125 DATA expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
2126 DATA expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
2127 DATA expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
2128
2129 GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
2130 DATA expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
2131 DATA expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
2132 DATA expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
2133 DATA expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
2134 DATA expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
2135 DATA expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
2136 DATA expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
2137 DATA expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
2138
2139 GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
2140 DATA expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
2141 DATA expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
2142 DATA expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
2143 DATA expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
2144 DATA expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
2145 DATA expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
2146 DATA expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
2147 DATA expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
2148
2149 GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
2150 DATA expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
2151 DATA expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
2152 DATA expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
2153 DATA expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
2154 DATA expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
2155 DATA expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
2156 DATA expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
2157 DATA expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
2158
2159 GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
2160 DATA expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
2161 DATA expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
2162 DATA expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
2163 DATA expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
2164 DATA expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
2165 DATA expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
2166 DATA expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
2167 DATA expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
2168
2169 GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
2170 DATA expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
2171 DATA expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
2172 DATA expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
2173 DATA expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
2174 DATA expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
2175 DATA expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
2176 DATA expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
2177 DATA expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
2178
2179 GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
2180 DATA expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
2181 DATA expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
2182 DATA expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
2183 DATA expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
2184 DATA expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
2185 DATA expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
2186 DATA expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
2187 DATA expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
2188
2189 GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
2190 DATA expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
2191 DATA expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
2192 DATA expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
2193 DATA expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
2194 DATA expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
2195 DATA expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
2196 DATA expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
2197 DATA expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
2198
2199 TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
2200 VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
2201 VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
2202 VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
2203 VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
2204 VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
2205 VMOVDQU64 (AX), Z5
2206 VPERMB Z5, Z0, Z0
2207 VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
2208 VPERMB Z5, Z3, Z3
2209 VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
2210 VPERMB Z5, Z4, Z4
2211 VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
2212 VPERMI2B Z3, Z0, Z1
2213 VPERMI2B Z4, Z3, Z2
2214 RET
2215
2216 GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
2217 DATA expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
2218 DATA expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
2219 DATA expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
2220 DATA expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
2221 DATA expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
2222 DATA expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
2223 DATA expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
2224 DATA expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
2225
2226 GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
2227 DATA expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
2228 DATA expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
2229 DATA expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
2230 DATA expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
2231 DATA expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
2232 DATA expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
2233 DATA expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
2234 DATA expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
2235
2236 GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
2237 DATA expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
2238 DATA expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
2239 DATA expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2240 DATA expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
2241 DATA expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
2242 DATA expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
2243 DATA expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
2244 DATA expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
2245
2246 GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
2247 DATA expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
2248 DATA expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
2249 DATA expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
2250 DATA expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
2251 DATA expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
2252 DATA expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
2253 DATA expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
2254 DATA expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
2255
2256 GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
2257 DATA expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
2258 DATA expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
2259 DATA expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
2260 DATA expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
2261 DATA expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
2262 DATA expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
2263 DATA expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
2264 DATA expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
2265
2266 GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
2267 DATA expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
2268 DATA expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
2269 DATA expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
2270 DATA expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
2271 DATA expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
2272 DATA expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
2273 DATA expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
2274 DATA expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
2275
2276 GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
2277 DATA expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
2278 DATA expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
2279 DATA expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
2280 DATA expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
2281 DATA expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
2282 DATA expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
2283 DATA expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2284 DATA expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2285
2286 GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
2287 DATA expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
2288 DATA expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
2289 DATA expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
2290 DATA expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
2291 DATA expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
2292 DATA expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
2293 DATA expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
2294 DATA expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
2295
2296 GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
2297 DATA expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
2298 DATA expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
2299 DATA expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
2300 DATA expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
2301 DATA expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
2302 DATA expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
2303 DATA expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
2304 DATA expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
2305
2306 GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
2307 DATA expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
2308 DATA expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
2309 DATA expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
2310 DATA expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
2311 DATA expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
2312 DATA expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
2313 DATA expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
2314 DATA expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
2315
2316 GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
2317 DATA expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2318 DATA expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2319 DATA expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2320 DATA expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
2321 DATA expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
2322 DATA expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
2323 DATA expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
2324 DATA expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
2325
2326 TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
2327 VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
2328 VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
2329 VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
2330 VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
2331 VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
2332 VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
2333 VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
2334 VMOVDQU64 (AX), Z7
2335 VPERMB Z7, Z0, Z0
2336 VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
2337 VPERMB Z7, Z2, Z2
2338 VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
2339 VPERMB Z7, Z3, Z3
2340 VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
2341 VPERMB Z7, Z4, Z4
2342 VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
2343 VPERMI2B Z2, Z0, Z1
2344 MOVQ $0x387f80ffffffffff, AX
2345 KMOVQ AX, K1
2346 VPERMI2B.Z Z3, Z2, K1, Z5
2347 MOVQ $0xc7807f0000000000, AX
2348 KMOVQ AX, K1
2349 VPERMB.Z Z4, Z6, K1, Z0
2350 VPORQ Z0, Z5, Z2
2351 RET
2352
2353 GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
2354 DATA expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
2355 DATA expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
2356 DATA expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
2357 DATA expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
2358 DATA expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
2359 DATA expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
2360 DATA expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
2361 DATA expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
2362
2363 GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
2364 DATA expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
2365 DATA expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
2366 DATA expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
2367 DATA expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
2368 DATA expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
2369 DATA expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
2370 DATA expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
2371 DATA expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
2372
2373 GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
2374 DATA expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
2375 DATA expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
2376 DATA expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
2377 DATA expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
2378 DATA expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
2379 DATA expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
2380 DATA expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
2381 DATA expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
2382
2383 GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
2384 DATA expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
2385 DATA expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
2386 DATA expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
2387 DATA expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
2388 DATA expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
2389 DATA expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
2390 DATA expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
2391 DATA expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
2392
2393 GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
2394 DATA expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
2395 DATA expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
2396 DATA expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
2397 DATA expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
2398 DATA expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
2399 DATA expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
2400 DATA expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
2401 DATA expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
2402
2403 GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
2404 DATA expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
2405 DATA expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
2406 DATA expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
2407 DATA expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
2408 DATA expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
2409 DATA expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
2410 DATA expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
2411 DATA expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
2412
2413 GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
2414 DATA expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
2415 DATA expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
2416 DATA expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
2417 DATA expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
2418 DATA expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
2419 DATA expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
2420 DATA expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
2421 DATA expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
2422
2423 TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
2424 VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
2425 VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
2426 VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
2427 VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
2428 VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
2429 VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
2430 VMOVDQU64 (AX), Z6
2431 VPERMB Z6, Z0, Z0
2432 VGF2P8AFFINEQB $0, Z3, Z0, Z0
2433 VPERMB Z6, Z4, Z4
2434 VGF2P8AFFINEQB $0, Z3, Z4, Z3
2435 VPERMB Z6, Z5, Z4
2436 VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
2437 VPERMI2B Z3, Z0, Z1
2438 VPERMI2B Z4, Z3, Z2
2439 RET
2440
2441 GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
2442 DATA expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
2443 DATA expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
2444 DATA expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
2445 DATA expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
2446 DATA expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
2447 DATA expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
2448 DATA expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
2449 DATA expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
2450
2451 GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
2452 DATA expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
2453 DATA expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
2454 DATA expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
2455 DATA expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
2456 DATA expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
2457 DATA expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
2458 DATA expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
2459 DATA expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
2460
2461 GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
2462 DATA expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
2463 DATA expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
2464 DATA expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
2465 DATA expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
2466 DATA expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
2467 DATA expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
2468 DATA expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
2469 DATA expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
2470
2471 GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
2472 DATA expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
2473 DATA expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
2474 DATA expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
2475 DATA expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
2476 DATA expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
2477 DATA expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
2478 DATA expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
2479 DATA expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
2480
2481 GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
2482 DATA expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
2483 DATA expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
2484 DATA expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
2485 DATA expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
2486 DATA expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
2487 DATA expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
2488 DATA expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
2489 DATA expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
2490
2491 GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
2492 DATA expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
2493 DATA expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
2494 DATA expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
2495 DATA expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
2496 DATA expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
2497 DATA expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
2498 DATA expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
2499 DATA expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
2500
2501 GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
2502 DATA expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
2503 DATA expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
2504 DATA expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
2505 DATA expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
2506 DATA expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
2507 DATA expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
2508 DATA expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
2509 DATA expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
2510
2511 GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
2512 DATA expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
2513 DATA expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
2514 DATA expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
2515 DATA expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
2516 DATA expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
2517 DATA expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
2518 DATA expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
2519 DATA expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
2520
2521 GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
2522 DATA expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
2523 DATA expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
2524 DATA expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
2525 DATA expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
2526 DATA expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
2527 DATA expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
2528 DATA expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
2529 DATA expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
2530
2531 GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
2532 DATA expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
2533 DATA expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
2534 DATA expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
2535 DATA expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
2536 DATA expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
2537 DATA expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
2538 DATA expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
2539 DATA expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
2540
2541 GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
2542 DATA expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
2543 DATA expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
2544 DATA expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
2545 DATA expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
2546 DATA expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
2547 DATA expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
2548 DATA expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
2549 DATA expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
2550
2551 TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
2552 VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
2553 VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
2554 VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
2555 VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
2556 VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
2557 VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
2558 VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
2559 VMOVDQU64 (AX), Z7
2560 VPERMB Z7, Z0, Z0
2561 VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
2562 VPERMB Z7, Z2, Z2
2563 VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
2564 VPERMB Z7, Z3, Z3
2565 VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
2566 VPERMB Z7, Z4, Z4
2567 VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
2568 VPERMI2B Z2, Z0, Z1
2569 MOVQ $0x9f01ffffffffffff, AX
2570 KMOVQ AX, K1
2571 VPERMI2B.Z Z3, Z2, K1, Z5
2572 MOVQ $0x60fe000000000000, AX
2573 KMOVQ AX, K1
2574 VPERMB.Z Z4, Z6, K1, Z0
2575 VPORQ Z0, Z5, Z2
2576 RET
2577
2578 GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
2579 DATA expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
2580 DATA expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
2581 DATA expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
2582 DATA expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
2583 DATA expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
2584 DATA expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
2585 DATA expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
2586 DATA expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
2587
2588 GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
2589 DATA expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
2590 DATA expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
2591 DATA expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
2592 DATA expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
2593 DATA expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
2594 DATA expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
2595 DATA expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
2596 DATA expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
2597
2598 GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
2599 DATA expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
2600 DATA expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
2601 DATA expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
2602 DATA expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
2603 DATA expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
2604 DATA expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
2605 DATA expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
2606 DATA expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
2607
2608 GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
2609 DATA expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
2610 DATA expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
2611 DATA expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
2612 DATA expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
2613 DATA expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
2614 DATA expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
2615 DATA expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
2616 DATA expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
2617
2618 TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
2619 VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
2620 VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
2621 VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
2622 VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
2623 VMOVDQU64 (AX), Z4
2624 VPERMB Z4, Z0, Z0
2625 VGF2P8AFFINEQB $0, Z1, Z0, Z0
2626 VPERMB Z4, Z2, Z2
2627 VGF2P8AFFINEQB $0, Z1, Z2, Z2
2628 VPERMB Z0, Z3, Z1
2629 VPERMB Z2, Z3, Z2
2630 RET
2631
2632
View as plain text