1*f91a4547SGvozden Neskovic /*
2*f91a4547SGvozden Neskovic  * CDDL HEADER START
3*f91a4547SGvozden Neskovic  *
4*f91a4547SGvozden Neskovic  * The contents of this file are subject to the terms of the
5*f91a4547SGvozden Neskovic  * Common Development and Distribution License (the "License").
6*f91a4547SGvozden Neskovic  * You may not use this file except in compliance with the License.
7*f91a4547SGvozden Neskovic  *
8*f91a4547SGvozden Neskovic  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*f91a4547SGvozden Neskovic  * or http://www.opensolaris.org/os/licensing.
10*f91a4547SGvozden Neskovic  * See the License for the specific language governing permissions
11*f91a4547SGvozden Neskovic  * and limitations under the License.
12*f91a4547SGvozden Neskovic  *
13*f91a4547SGvozden Neskovic  * When distributing Covered Code, include this CDDL HEADER in each
14*f91a4547SGvozden Neskovic  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*f91a4547SGvozden Neskovic  * If applicable, add the following below this CDDL HEADER, with the
16*f91a4547SGvozden Neskovic  * fields enclosed by brackets "[]" replaced with your own identifying
17*f91a4547SGvozden Neskovic  * information: Portions Copyright [yyyy] [name of copyright owner]
18*f91a4547SGvozden Neskovic  *
19*f91a4547SGvozden Neskovic  * CDDL HEADER END
20*f91a4547SGvozden Neskovic  */
21*f91a4547SGvozden Neskovic /*
22*f91a4547SGvozden Neskovic  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23*f91a4547SGvozden Neskovic  */
24*f91a4547SGvozden Neskovic #include <sys/isa_defs.h>
25*f91a4547SGvozden Neskovic 
26*f91a4547SGvozden Neskovic #if defined(__amd64)
27*f91a4547SGvozden Neskovic 
28*f91a4547SGvozden Neskovic #include <sys/types.h>
29*f91a4547SGvozden Neskovic #include <sys/simd.h>
30*f91a4547SGvozden Neskovic 
31*f91a4547SGvozden Neskovic #define	__asm __asm__ __volatile__
32*f91a4547SGvozden Neskovic 
33*f91a4547SGvozden Neskovic #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
34*f91a4547SGvozden Neskovic #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
35*f91a4547SGvozden Neskovic 
36*f91a4547SGvozden Neskovic #define	VR0_(REG, ...) "ymm"#REG
37*f91a4547SGvozden Neskovic #define	VR1_(_1, REG, ...) "ymm"#REG
38*f91a4547SGvozden Neskovic #define	VR2_(_1, _2, REG, ...) "ymm"#REG
39*f91a4547SGvozden Neskovic #define	VR3_(_1, _2, _3, REG, ...) "ymm"#REG
40*f91a4547SGvozden Neskovic #define	VR4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
41*f91a4547SGvozden Neskovic #define	VR5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
42*f91a4547SGvozden Neskovic #define	VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
43*f91a4547SGvozden Neskovic #define	VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
44*f91a4547SGvozden Neskovic 
45*f91a4547SGvozden Neskovic #define	VR0(r...) VR0_(r)
46*f91a4547SGvozden Neskovic #define	VR1(r...) VR1_(r)
47*f91a4547SGvozden Neskovic #define	VR2(r...) VR2_(r, 1)
48*f91a4547SGvozden Neskovic #define	VR3(r...) VR3_(r, 1, 2)
49*f91a4547SGvozden Neskovic #define	VR4(r...) VR4_(r, 1, 2)
50*f91a4547SGvozden Neskovic #define	VR5(r...) VR5_(r, 1, 2, 3)
51*f91a4547SGvozden Neskovic #define	VR6(r...) VR6_(r, 1, 2, 3, 4)
52*f91a4547SGvozden Neskovic #define	VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
53*f91a4547SGvozden Neskovic 
54*f91a4547SGvozden Neskovic #define	R_01(REG1, REG2, ...) REG1, REG2
55*f91a4547SGvozden Neskovic #define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
56*f91a4547SGvozden Neskovic #define	R_23(REG...) _R_23(REG, 1, 2, 3)
57*f91a4547SGvozden Neskovic 
58*f91a4547SGvozden Neskovic #define	ZFS_ASM_BUG()	ASSERT(0)
59*f91a4547SGvozden Neskovic 
60*f91a4547SGvozden Neskovic extern const uint8_t gf_clmul_mod_lt[4*256][16];
61*f91a4547SGvozden Neskovic 
62*f91a4547SGvozden Neskovic #define	ELEM_SIZE 32
63*f91a4547SGvozden Neskovic 
64*f91a4547SGvozden Neskovic typedef struct v {
65*f91a4547SGvozden Neskovic 	uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
66*f91a4547SGvozden Neskovic } v_t;
67*f91a4547SGvozden Neskovic 
68*f91a4547SGvozden Neskovic 
69*f91a4547SGvozden Neskovic #define	XOR_ACC(src, r...)						\
70*f91a4547SGvozden Neskovic {									\
71*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
72*f91a4547SGvozden Neskovic 	case 4:								\
73*f91a4547SGvozden Neskovic 		__asm(							\
74*f91a4547SGvozden Neskovic 		    "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n"	\
75*f91a4547SGvozden Neskovic 		    "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n"	\
76*f91a4547SGvozden Neskovic 		    "vpxor 0x40(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n"	\
77*f91a4547SGvozden Neskovic 		    "vpxor 0x60(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n"	\
78*f91a4547SGvozden Neskovic 		    : : [SRC] "r" (src));				\
79*f91a4547SGvozden Neskovic 		break;							\
80*f91a4547SGvozden Neskovic 	case 2:								\
81*f91a4547SGvozden Neskovic 		__asm(							\
82*f91a4547SGvozden Neskovic 		    "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n"	\
83*f91a4547SGvozden Neskovic 		    "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n"	\
84*f91a4547SGvozden Neskovic 		    : : [SRC] "r" (src));				\
85*f91a4547SGvozden Neskovic 		break;							\
86*f91a4547SGvozden Neskovic 	default:							\
87*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
88*f91a4547SGvozden Neskovic 	}								\
89*f91a4547SGvozden Neskovic }
90*f91a4547SGvozden Neskovic 
91*f91a4547SGvozden Neskovic #define	XOR(r...)							\
92*f91a4547SGvozden Neskovic {									\
93*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
94*f91a4547SGvozden Neskovic 	case 8:								\
95*f91a4547SGvozden Neskovic 		__asm(							\
96*f91a4547SGvozden Neskovic 		    "vpxor %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n"	\
97*f91a4547SGvozden Neskovic 		    "vpxor %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n"	\
98*f91a4547SGvozden Neskovic 		    "vpxor %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n"	\
99*f91a4547SGvozden Neskovic 		    "vpxor %" VR3(r) ", %" VR7(r)", %" VR7(r));		\
100*f91a4547SGvozden Neskovic 		break;							\
101*f91a4547SGvozden Neskovic 	case 4:								\
102*f91a4547SGvozden Neskovic 		__asm(							\
103*f91a4547SGvozden Neskovic 		    "vpxor %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n"	\
104*f91a4547SGvozden Neskovic 		    "vpxor %" VR1(r) ", %" VR3(r)", %" VR3(r));		\
105*f91a4547SGvozden Neskovic 		break;							\
106*f91a4547SGvozden Neskovic 	default:							\
107*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
108*f91a4547SGvozden Neskovic 	}								\
109*f91a4547SGvozden Neskovic }
110*f91a4547SGvozden Neskovic 
111*f91a4547SGvozden Neskovic #define	ZERO(r...)	XOR(r, r)
112*f91a4547SGvozden Neskovic 
113*f91a4547SGvozden Neskovic #define	COPY(r...) 							\
114*f91a4547SGvozden Neskovic {									\
115*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
116*f91a4547SGvozden Neskovic 	case 8:								\
117*f91a4547SGvozden Neskovic 		__asm(							\
118*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR0(r) ", %" VR4(r) "\n"		\
119*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR1(r) ", %" VR5(r) "\n"		\
120*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR2(r) ", %" VR6(r) "\n"		\
121*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR3(r) ", %" VR7(r));			\
122*f91a4547SGvozden Neskovic 		break;							\
123*f91a4547SGvozden Neskovic 	case 4:								\
124*f91a4547SGvozden Neskovic 		__asm(							\
125*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR0(r) ", %" VR2(r) "\n"		\
126*f91a4547SGvozden Neskovic 		    "vmovdqa %" VR1(r) ", %" VR3(r));			\
127*f91a4547SGvozden Neskovic 		break;							\
128*f91a4547SGvozden Neskovic 	default:							\
129*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
130*f91a4547SGvozden Neskovic 	}								\
131*f91a4547SGvozden Neskovic }
132*f91a4547SGvozden Neskovic 
133*f91a4547SGvozden Neskovic #define	LOAD(src, r...) 						\
134*f91a4547SGvozden Neskovic {									\
135*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
136*f91a4547SGvozden Neskovic 	case 4:								\
137*f91a4547SGvozden Neskovic 		__asm(							\
138*f91a4547SGvozden Neskovic 		    "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n"		\
139*f91a4547SGvozden Neskovic 		    "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n"		\
140*f91a4547SGvozden Neskovic 		    "vmovdqa 0x40(%[SRC]), %%" VR2(r) "\n"		\
141*f91a4547SGvozden Neskovic 		    "vmovdqa 0x60(%[SRC]), %%" VR3(r) "\n"		\
142*f91a4547SGvozden Neskovic 		    : : [SRC] "r" (src));				\
143*f91a4547SGvozden Neskovic 		break;							\
144*f91a4547SGvozden Neskovic 	case 2:								\
145*f91a4547SGvozden Neskovic 		__asm(							\
146*f91a4547SGvozden Neskovic 		    "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n"		\
147*f91a4547SGvozden Neskovic 		    "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n"		\
148*f91a4547SGvozden Neskovic 		    : : [SRC] "r" (src));				\
149*f91a4547SGvozden Neskovic 		break;							\
150*f91a4547SGvozden Neskovic 	default:							\
151*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
152*f91a4547SGvozden Neskovic 	}								\
153*f91a4547SGvozden Neskovic }
154*f91a4547SGvozden Neskovic 
155*f91a4547SGvozden Neskovic #define	STORE(dst, r...)   						\
156*f91a4547SGvozden Neskovic {									\
157*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
158*f91a4547SGvozden Neskovic 	case 4:								\
159*f91a4547SGvozden Neskovic 		__asm(							\
160*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n"		\
161*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n"		\
162*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR2(r) ", 0x40(%[DST])\n"		\
163*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR3(r) ", 0x60(%[DST])\n"		\
164*f91a4547SGvozden Neskovic 		    : : [DST] "r" (dst));				\
165*f91a4547SGvozden Neskovic 		break;							\
166*f91a4547SGvozden Neskovic 	case 2:								\
167*f91a4547SGvozden Neskovic 		__asm(							\
168*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n"		\
169*f91a4547SGvozden Neskovic 		    "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n"		\
170*f91a4547SGvozden Neskovic 		    : : [DST] "r" (dst));				\
171*f91a4547SGvozden Neskovic 		break;							\
172*f91a4547SGvozden Neskovic 	default:							\
173*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
174*f91a4547SGvozden Neskovic 	}								\
175*f91a4547SGvozden Neskovic }
176*f91a4547SGvozden Neskovic 
177*f91a4547SGvozden Neskovic #define	FLUSH()								\
178*f91a4547SGvozden Neskovic {									\
179*f91a4547SGvozden Neskovic 	__asm("vzeroupper");						\
180*f91a4547SGvozden Neskovic }
181*f91a4547SGvozden Neskovic 
182*f91a4547SGvozden Neskovic #define	MUL2_SETUP() 							\
183*f91a4547SGvozden Neskovic {   									\
184*f91a4547SGvozden Neskovic 	__asm("vmovq %0,   %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d));	\
185*f91a4547SGvozden Neskovic 	__asm("vpbroadcastq %xmm14, %ymm14");				\
186*f91a4547SGvozden Neskovic 	__asm("vpxor        %ymm15, %ymm15 ,%ymm15");			\
187*f91a4547SGvozden Neskovic }
188*f91a4547SGvozden Neskovic 
189*f91a4547SGvozden Neskovic #define	_MUL2(r...) 							\
190*f91a4547SGvozden Neskovic {									\
191*f91a4547SGvozden Neskovic 	switch	(REG_CNT(r)) {						\
192*f91a4547SGvozden Neskovic 	case 2:								\
193*f91a4547SGvozden Neskovic 		__asm(							\
194*f91a4547SGvozden Neskovic 		    "vpcmpgtb %" VR0(r)", %ymm15,     %ymm12\n"		\
195*f91a4547SGvozden Neskovic 		    "vpcmpgtb %" VR1(r)", %ymm15,     %ymm13\n"		\
196*f91a4547SGvozden Neskovic 		    "vpaddb   %" VR0(r)", %" VR0(r)", %" VR0(r) "\n"	\
197*f91a4547SGvozden Neskovic 		    "vpaddb   %" VR1(r)", %" VR1(r)", %" VR1(r) "\n"	\
198*f91a4547SGvozden Neskovic 		    "vpand    %ymm14,     %ymm12,     %ymm12\n"		\
199*f91a4547SGvozden Neskovic 		    "vpand    %ymm14,     %ymm13,     %ymm13\n"		\
200*f91a4547SGvozden Neskovic 		    "vpxor    %ymm12,     %" VR0(r)", %" VR0(r) "\n"	\
201*f91a4547SGvozden Neskovic 		    "vpxor    %ymm13,     %" VR1(r)", %" VR1(r));	\
202*f91a4547SGvozden Neskovic 		break;							\
203*f91a4547SGvozden Neskovic 	default:							\
204*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
205*f91a4547SGvozden Neskovic 	}								\
206*f91a4547SGvozden Neskovic }
207*f91a4547SGvozden Neskovic 
208*f91a4547SGvozden Neskovic #define	MUL2(r...)							\
209*f91a4547SGvozden Neskovic {									\
210*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
211*f91a4547SGvozden Neskovic 	case 4:								\
212*f91a4547SGvozden Neskovic 	    _MUL2(R_01(r));						\
213*f91a4547SGvozden Neskovic 	    _MUL2(R_23(r));						\
214*f91a4547SGvozden Neskovic 	    break;							\
215*f91a4547SGvozden Neskovic 	case 2:								\
216*f91a4547SGvozden Neskovic 	    _MUL2(r);							\
217*f91a4547SGvozden Neskovic 	    break;							\
218*f91a4547SGvozden Neskovic 	default:							\
219*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
220*f91a4547SGvozden Neskovic 	}								\
221*f91a4547SGvozden Neskovic }
222*f91a4547SGvozden Neskovic 
223*f91a4547SGvozden Neskovic #define	MUL4(r...)							\
224*f91a4547SGvozden Neskovic {									\
225*f91a4547SGvozden Neskovic 	MUL2(r);							\
226*f91a4547SGvozden Neskovic 	MUL2(r);							\
227*f91a4547SGvozden Neskovic }
228*f91a4547SGvozden Neskovic 
229*f91a4547SGvozden Neskovic #define	_0f		"ymm15"
230*f91a4547SGvozden Neskovic #define	_as		"ymm14"
231*f91a4547SGvozden Neskovic #define	_bs		"ymm13"
232*f91a4547SGvozden Neskovic #define	_ltmod		"ymm12"
233*f91a4547SGvozden Neskovic #define	_ltmul		"ymm11"
234*f91a4547SGvozden Neskovic #define	_ta		"ymm10"
235*f91a4547SGvozden Neskovic #define	_tb		"ymm15"
236*f91a4547SGvozden Neskovic 
237*f91a4547SGvozden Neskovic static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
238*f91a4547SGvozden Neskovic 
239*f91a4547SGvozden Neskovic #define	_MULx2(c, r...)							\
240*f91a4547SGvozden Neskovic {									\
241*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
242*f91a4547SGvozden Neskovic 	case 2:								\
243*f91a4547SGvozden Neskovic 		__asm(							\
244*f91a4547SGvozden Neskovic 		    "vpbroadcastb (%[mask]), %%" _0f "\n"		\
245*f91a4547SGvozden Neskovic 		    /* upper bits */					\
246*f91a4547SGvozden Neskovic 		    "vbroadcasti128 0x00(%[lt]), %%" _ltmod "\n"	\
247*f91a4547SGvozden Neskovic 		    "vbroadcasti128 0x10(%[lt]), %%" _ltmul "\n"	\
248*f91a4547SGvozden Neskovic 									\
249*f91a4547SGvozden Neskovic 		    "vpsraw $0x4, %%" VR0(r) ", %%"_as "\n"		\
250*f91a4547SGvozden Neskovic 		    "vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n"		\
251*f91a4547SGvozden Neskovic 		    "vpand %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n"	\
252*f91a4547SGvozden Neskovic 		    "vpand %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n"	\
253*f91a4547SGvozden Neskovic 		    "vpand %%" _0f ", %%" _as ", %%" _as "\n"		\
254*f91a4547SGvozden Neskovic 		    "vpand %%" _0f ", %%" _bs ", %%" _bs "\n"		\
255*f91a4547SGvozden Neskovic 									\
256*f91a4547SGvozden Neskovic 		    "vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n"	\
257*f91a4547SGvozden Neskovic 		    "vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n"	\
258*f91a4547SGvozden Neskovic 		    "vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n"	\
259*f91a4547SGvozden Neskovic 		    "vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n"	\
260*f91a4547SGvozden Neskovic 		    /* lower bits */					\
261*f91a4547SGvozden Neskovic 		    "vbroadcasti128 0x20(%[lt]), %%" _ltmod "\n"	\
262*f91a4547SGvozden Neskovic 		    "vbroadcasti128 0x30(%[lt]), %%" _ltmul "\n"	\
263*f91a4547SGvozden Neskovic 									\
264*f91a4547SGvozden Neskovic 		    "vpxor %%" _ta ", %%" _as ", %%" _as "\n"		\
265*f91a4547SGvozden Neskovic 		    "vpxor %%" _tb ", %%" _bs ", %%" _bs "\n"		\
266*f91a4547SGvozden Neskovic 									\
267*f91a4547SGvozden Neskovic 		    "vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n"	\
268*f91a4547SGvozden Neskovic 		    "vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n"	\
269*f91a4547SGvozden Neskovic 		    "vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
270*f91a4547SGvozden Neskovic 		    "vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
271*f91a4547SGvozden Neskovic 									\
272*f91a4547SGvozden Neskovic 		    "vpxor %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n"	\
273*f91a4547SGvozden Neskovic 		    "vpxor %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n"	\
274*f91a4547SGvozden Neskovic 		    "vpxor %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n"	\
275*f91a4547SGvozden Neskovic 		    "vpxor %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n"	\
276*f91a4547SGvozden Neskovic 		    : : [mask] "r" (&_mul_mask),			\
277*f91a4547SGvozden Neskovic 		    [lt] "r" (gf_clmul_mod_lt[4*(c)]));			\
278*f91a4547SGvozden Neskovic 		break;							\
279*f91a4547SGvozden Neskovic 	default:							\
280*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
281*f91a4547SGvozden Neskovic 	}								\
282*f91a4547SGvozden Neskovic }
283*f91a4547SGvozden Neskovic 
284*f91a4547SGvozden Neskovic #define	MUL(c, r...)							\
285*f91a4547SGvozden Neskovic {									\
286*f91a4547SGvozden Neskovic 	switch (REG_CNT(r)) {						\
287*f91a4547SGvozden Neskovic 	case 4:								\
288*f91a4547SGvozden Neskovic 		_MULx2(c, R_01(r));					\
289*f91a4547SGvozden Neskovic 		_MULx2(c, R_23(r));					\
290*f91a4547SGvozden Neskovic 		break;							\
291*f91a4547SGvozden Neskovic 	case 2:								\
292*f91a4547SGvozden Neskovic 		_MULx2(c, R_01(r));					\
293*f91a4547SGvozden Neskovic 		break;							\
294*f91a4547SGvozden Neskovic 	default:							\
295*f91a4547SGvozden Neskovic 		ZFS_ASM_BUG();						\
296*f91a4547SGvozden Neskovic 	}								\
297*f91a4547SGvozden Neskovic }
298*f91a4547SGvozden Neskovic 
299*f91a4547SGvozden Neskovic #define	raidz_math_begin()	kfpu_begin()
300*f91a4547SGvozden Neskovic #define	raidz_math_end()						\
301*f91a4547SGvozden Neskovic {									\
302*f91a4547SGvozden Neskovic 	FLUSH();							\
303*f91a4547SGvozden Neskovic 	kfpu_end();							\
304*f91a4547SGvozden Neskovic }
305*f91a4547SGvozden Neskovic 
306*f91a4547SGvozden Neskovic 
307*f91a4547SGvozden Neskovic #define	SYN_STRIDE		4
308*f91a4547SGvozden Neskovic 
309*f91a4547SGvozden Neskovic #define	ZERO_STRIDE		4
310*f91a4547SGvozden Neskovic #define	ZERO_DEFINE()		{}
311*f91a4547SGvozden Neskovic #define	ZERO_D			0, 1, 2, 3
312*f91a4547SGvozden Neskovic 
313*f91a4547SGvozden Neskovic #define	COPY_STRIDE		4
314*f91a4547SGvozden Neskovic #define	COPY_DEFINE()		{}
315*f91a4547SGvozden Neskovic #define	COPY_D			0, 1, 2, 3
316*f91a4547SGvozden Neskovic 
317*f91a4547SGvozden Neskovic #define	ADD_STRIDE		4
318*f91a4547SGvozden Neskovic #define	ADD_DEFINE()		{}
319*f91a4547SGvozden Neskovic #define	ADD_D 			0, 1, 2, 3
320*f91a4547SGvozden Neskovic 
321*f91a4547SGvozden Neskovic #define	MUL_STRIDE		4
322*f91a4547SGvozden Neskovic #define	MUL_DEFINE() 		{}
323*f91a4547SGvozden Neskovic #define	MUL_D			0, 1, 2, 3
324*f91a4547SGvozden Neskovic 
325*f91a4547SGvozden Neskovic #define	GEN_P_STRIDE		4
326*f91a4547SGvozden Neskovic #define	GEN_P_DEFINE()		{}
327*f91a4547SGvozden Neskovic #define	GEN_P_P			0, 1, 2, 3
328*f91a4547SGvozden Neskovic 
329*f91a4547SGvozden Neskovic #define	GEN_PQ_STRIDE		4
330*f91a4547SGvozden Neskovic #define	GEN_PQ_DEFINE() 	{}
331*f91a4547SGvozden Neskovic #define	GEN_PQ_D		0, 1, 2, 3
332*f91a4547SGvozden Neskovic #define	GEN_PQ_C		4, 5, 6, 7
333*f91a4547SGvozden Neskovic 
334*f91a4547SGvozden Neskovic #define	GEN_PQR_STRIDE		4
335*f91a4547SGvozden Neskovic #define	GEN_PQR_DEFINE() 	{}
336*f91a4547SGvozden Neskovic #define	GEN_PQR_D		0, 1, 2, 3
337*f91a4547SGvozden Neskovic #define	GEN_PQR_C		4, 5, 6, 7
338*f91a4547SGvozden Neskovic 
339*f91a4547SGvozden Neskovic #define	SYN_Q_DEFINE()		{}
340*f91a4547SGvozden Neskovic #define	SYN_Q_D			0, 1, 2, 3
341*f91a4547SGvozden Neskovic #define	SYN_Q_X			4, 5, 6, 7
342*f91a4547SGvozden Neskovic 
343*f91a4547SGvozden Neskovic #define	SYN_R_DEFINE()		{}
344*f91a4547SGvozden Neskovic #define	SYN_R_D			0, 1, 2, 3
345*f91a4547SGvozden Neskovic #define	SYN_R_X			4, 5, 6, 7
346*f91a4547SGvozden Neskovic 
347*f91a4547SGvozden Neskovic #define	SYN_PQ_DEFINE() 	{}
348*f91a4547SGvozden Neskovic #define	SYN_PQ_D		0, 1, 2, 3
349*f91a4547SGvozden Neskovic #define	SYN_PQ_X		4, 5, 6, 7
350*f91a4547SGvozden Neskovic 
351*f91a4547SGvozden Neskovic #define	REC_PQ_STRIDE		2
352*f91a4547SGvozden Neskovic #define	REC_PQ_DEFINE() 	{}
353*f91a4547SGvozden Neskovic #define	REC_PQ_X		0, 1
354*f91a4547SGvozden Neskovic #define	REC_PQ_Y		2, 3
355*f91a4547SGvozden Neskovic #define	REC_PQ_T		4, 5
356*f91a4547SGvozden Neskovic 
357*f91a4547SGvozden Neskovic #define	SYN_PR_DEFINE() 	{}
358*f91a4547SGvozden Neskovic #define	SYN_PR_D		0, 1, 2, 3
359*f91a4547SGvozden Neskovic #define	SYN_PR_X		4, 5, 6, 7
360*f91a4547SGvozden Neskovic 
361*f91a4547SGvozden Neskovic #define	REC_PR_STRIDE		2
362*f91a4547SGvozden Neskovic #define	REC_PR_DEFINE() 	{}
363*f91a4547SGvozden Neskovic #define	REC_PR_X		0, 1
364*f91a4547SGvozden Neskovic #define	REC_PR_Y		2, 3
365*f91a4547SGvozden Neskovic #define	REC_PR_T		4, 5
366*f91a4547SGvozden Neskovic 
367*f91a4547SGvozden Neskovic #define	SYN_QR_DEFINE() 	{}
368*f91a4547SGvozden Neskovic #define	SYN_QR_D		0, 1, 2, 3
369*f91a4547SGvozden Neskovic #define	SYN_QR_X		4, 5, 6, 7
370*f91a4547SGvozden Neskovic 
371*f91a4547SGvozden Neskovic #define	REC_QR_STRIDE		2
372*f91a4547SGvozden Neskovic #define	REC_QR_DEFINE() 	{}
373*f91a4547SGvozden Neskovic #define	REC_QR_X		0, 1
374*f91a4547SGvozden Neskovic #define	REC_QR_Y		2, 3
375*f91a4547SGvozden Neskovic #define	REC_QR_T		4, 5
376*f91a4547SGvozden Neskovic 
377*f91a4547SGvozden Neskovic #define	SYN_PQR_DEFINE() 	{}
378*f91a4547SGvozden Neskovic #define	SYN_PQR_D		0, 1, 2, 3
379*f91a4547SGvozden Neskovic #define	SYN_PQR_X		4, 5, 6, 7
380*f91a4547SGvozden Neskovic 
381*f91a4547SGvozden Neskovic #define	REC_PQR_STRIDE		2
382*f91a4547SGvozden Neskovic #define	REC_PQR_DEFINE() 	{}
383*f91a4547SGvozden Neskovic #define	REC_PQR_X		0, 1
384*f91a4547SGvozden Neskovic #define	REC_PQR_Y		2, 3
385*f91a4547SGvozden Neskovic #define	REC_PQR_Z		4, 5
386*f91a4547SGvozden Neskovic #define	REC_PQR_XS		6, 7
387*f91a4547SGvozden Neskovic #define	REC_PQR_YS		8, 9
388*f91a4547SGvozden Neskovic 
389*f91a4547SGvozden Neskovic 
390*f91a4547SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
391*f91a4547SGvozden Neskovic #include "vdev_raidz_math_impl.h"
392*f91a4547SGvozden Neskovic 
393*f91a4547SGvozden Neskovic DEFINE_GEN_METHODS(avx2);
394*f91a4547SGvozden Neskovic DEFINE_REC_METHODS(avx2);
395*f91a4547SGvozden Neskovic 
396*f91a4547SGvozden Neskovic static boolean_t
raidz_will_avx2_work(void)397*f91a4547SGvozden Neskovic raidz_will_avx2_work(void)
398*f91a4547SGvozden Neskovic {
399*f91a4547SGvozden Neskovic 	return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
400*f91a4547SGvozden Neskovic }
401*f91a4547SGvozden Neskovic 
402*f91a4547SGvozden Neskovic const raidz_impl_ops_t vdev_raidz_avx2_impl = {
403*f91a4547SGvozden Neskovic 	.init = NULL,
404*f91a4547SGvozden Neskovic 	.fini = NULL,
405*f91a4547SGvozden Neskovic 	.gen = RAIDZ_GEN_METHODS(avx2),
406*f91a4547SGvozden Neskovic 	.rec = RAIDZ_REC_METHODS(avx2),
407*f91a4547SGvozden Neskovic 	.is_supported = &raidz_will_avx2_work,
408*f91a4547SGvozden Neskovic 	.name = "avx2"
409*f91a4547SGvozden Neskovic };
410*f91a4547SGvozden Neskovic 
411*f91a4547SGvozden Neskovic #elif defined(__i386)
412*f91a4547SGvozden Neskovic 
413*f91a4547SGvozden Neskovic /* 32-bit stub for user-level fakekernel dependencies */
414*f91a4547SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
415*f91a4547SGvozden Neskovic const raidz_impl_ops_t vdev_raidz_avx2_impl = {
416*f91a4547SGvozden Neskovic 	.init = NULL,
417*f91a4547SGvozden Neskovic 	.fini = NULL,
418*f91a4547SGvozden Neskovic 	.gen = NULL,
419*f91a4547SGvozden Neskovic 	.rec = NULL,
420*f91a4547SGvozden Neskovic 	.is_supported = NULL,
421*f91a4547SGvozden Neskovic 	.name = "avx2"
422*f91a4547SGvozden Neskovic };
423*f91a4547SGvozden Neskovic 
424*f91a4547SGvozden Neskovic #endif /* defined(__amd64) */
425