1*f91a4547SGvozden Neskovic /*
2*f91a4547SGvozden Neskovic * CDDL HEADER START
3*f91a4547SGvozden Neskovic *
4*f91a4547SGvozden Neskovic * The contents of this file are subject to the terms of the
5*f91a4547SGvozden Neskovic * Common Development and Distribution License (the "License").
6*f91a4547SGvozden Neskovic * You may not use this file except in compliance with the License.
7*f91a4547SGvozden Neskovic *
8*f91a4547SGvozden Neskovic * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*f91a4547SGvozden Neskovic * or http://www.opensolaris.org/os/licensing.
10*f91a4547SGvozden Neskovic * See the License for the specific language governing permissions
11*f91a4547SGvozden Neskovic * and limitations under the License.
12*f91a4547SGvozden Neskovic *
13*f91a4547SGvozden Neskovic * When distributing Covered Code, include this CDDL HEADER in each
14*f91a4547SGvozden Neskovic * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*f91a4547SGvozden Neskovic * If applicable, add the following below this CDDL HEADER, with the
16*f91a4547SGvozden Neskovic * fields enclosed by brackets "[]" replaced with your own identifying
17*f91a4547SGvozden Neskovic * information: Portions Copyright [yyyy] [name of copyright owner]
18*f91a4547SGvozden Neskovic *
19*f91a4547SGvozden Neskovic * CDDL HEADER END
20*f91a4547SGvozden Neskovic */
21*f91a4547SGvozden Neskovic /*
22*f91a4547SGvozden Neskovic * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23*f91a4547SGvozden Neskovic */
24*f91a4547SGvozden Neskovic #include <sys/isa_defs.h>
25*f91a4547SGvozden Neskovic
26*f91a4547SGvozden Neskovic #if defined(__amd64)
27*f91a4547SGvozden Neskovic
28*f91a4547SGvozden Neskovic #include <sys/types.h>
29*f91a4547SGvozden Neskovic #include <sys/simd.h>
30*f91a4547SGvozden Neskovic
31*f91a4547SGvozden Neskovic #define __asm __asm__ __volatile__
32*f91a4547SGvozden Neskovic
33*f91a4547SGvozden Neskovic #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
34*f91a4547SGvozden Neskovic #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
35*f91a4547SGvozden Neskovic
36*f91a4547SGvozden Neskovic #define VR0_(REG, ...) "ymm"#REG
37*f91a4547SGvozden Neskovic #define VR1_(_1, REG, ...) "ymm"#REG
38*f91a4547SGvozden Neskovic #define VR2_(_1, _2, REG, ...) "ymm"#REG
39*f91a4547SGvozden Neskovic #define VR3_(_1, _2, _3, REG, ...) "ymm"#REG
40*f91a4547SGvozden Neskovic #define VR4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
41*f91a4547SGvozden Neskovic #define VR5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
42*f91a4547SGvozden Neskovic #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
43*f91a4547SGvozden Neskovic #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
44*f91a4547SGvozden Neskovic
45*f91a4547SGvozden Neskovic #define VR0(r...) VR0_(r)
46*f91a4547SGvozden Neskovic #define VR1(r...) VR1_(r)
47*f91a4547SGvozden Neskovic #define VR2(r...) VR2_(r, 1)
48*f91a4547SGvozden Neskovic #define VR3(r...) VR3_(r, 1, 2)
49*f91a4547SGvozden Neskovic #define VR4(r...) VR4_(r, 1, 2)
50*f91a4547SGvozden Neskovic #define VR5(r...) VR5_(r, 1, 2, 3)
51*f91a4547SGvozden Neskovic #define VR6(r...) VR6_(r, 1, 2, 3, 4)
52*f91a4547SGvozden Neskovic #define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
53*f91a4547SGvozden Neskovic
54*f91a4547SGvozden Neskovic #define R_01(REG1, REG2, ...) REG1, REG2
55*f91a4547SGvozden Neskovic #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
56*f91a4547SGvozden Neskovic #define R_23(REG...) _R_23(REG, 1, 2, 3)
57*f91a4547SGvozden Neskovic
58*f91a4547SGvozden Neskovic #define ZFS_ASM_BUG() ASSERT(0)
59*f91a4547SGvozden Neskovic
60*f91a4547SGvozden Neskovic extern const uint8_t gf_clmul_mod_lt[4*256][16];
61*f91a4547SGvozden Neskovic
62*f91a4547SGvozden Neskovic #define ELEM_SIZE 32
63*f91a4547SGvozden Neskovic
64*f91a4547SGvozden Neskovic typedef struct v {
65*f91a4547SGvozden Neskovic uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
66*f91a4547SGvozden Neskovic } v_t;
67*f91a4547SGvozden Neskovic
68*f91a4547SGvozden Neskovic
69*f91a4547SGvozden Neskovic #define XOR_ACC(src, r...) \
70*f91a4547SGvozden Neskovic { \
71*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
72*f91a4547SGvozden Neskovic case 4: \
73*f91a4547SGvozden Neskovic __asm( \
74*f91a4547SGvozden Neskovic "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
75*f91a4547SGvozden Neskovic "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
76*f91a4547SGvozden Neskovic "vpxor 0x40(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \
77*f91a4547SGvozden Neskovic "vpxor 0x60(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \
78*f91a4547SGvozden Neskovic : : [SRC] "r" (src)); \
79*f91a4547SGvozden Neskovic break; \
80*f91a4547SGvozden Neskovic case 2: \
81*f91a4547SGvozden Neskovic __asm( \
82*f91a4547SGvozden Neskovic "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
83*f91a4547SGvozden Neskovic "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
84*f91a4547SGvozden Neskovic : : [SRC] "r" (src)); \
85*f91a4547SGvozden Neskovic break; \
86*f91a4547SGvozden Neskovic default: \
87*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
88*f91a4547SGvozden Neskovic } \
89*f91a4547SGvozden Neskovic }
90*f91a4547SGvozden Neskovic
91*f91a4547SGvozden Neskovic #define XOR(r...) \
92*f91a4547SGvozden Neskovic { \
93*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
94*f91a4547SGvozden Neskovic case 8: \
95*f91a4547SGvozden Neskovic __asm( \
96*f91a4547SGvozden Neskovic "vpxor %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \
97*f91a4547SGvozden Neskovic "vpxor %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \
98*f91a4547SGvozden Neskovic "vpxor %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \
99*f91a4547SGvozden Neskovic "vpxor %" VR3(r) ", %" VR7(r)", %" VR7(r)); \
100*f91a4547SGvozden Neskovic break; \
101*f91a4547SGvozden Neskovic case 4: \
102*f91a4547SGvozden Neskovic __asm( \
103*f91a4547SGvozden Neskovic "vpxor %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \
104*f91a4547SGvozden Neskovic "vpxor %" VR1(r) ", %" VR3(r)", %" VR3(r)); \
105*f91a4547SGvozden Neskovic break; \
106*f91a4547SGvozden Neskovic default: \
107*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
108*f91a4547SGvozden Neskovic } \
109*f91a4547SGvozden Neskovic }
110*f91a4547SGvozden Neskovic
111*f91a4547SGvozden Neskovic #define ZERO(r...) XOR(r, r)
112*f91a4547SGvozden Neskovic
113*f91a4547SGvozden Neskovic #define COPY(r...) \
114*f91a4547SGvozden Neskovic { \
115*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
116*f91a4547SGvozden Neskovic case 8: \
117*f91a4547SGvozden Neskovic __asm( \
118*f91a4547SGvozden Neskovic "vmovdqa %" VR0(r) ", %" VR4(r) "\n" \
119*f91a4547SGvozden Neskovic "vmovdqa %" VR1(r) ", %" VR5(r) "\n" \
120*f91a4547SGvozden Neskovic "vmovdqa %" VR2(r) ", %" VR6(r) "\n" \
121*f91a4547SGvozden Neskovic "vmovdqa %" VR3(r) ", %" VR7(r)); \
122*f91a4547SGvozden Neskovic break; \
123*f91a4547SGvozden Neskovic case 4: \
124*f91a4547SGvozden Neskovic __asm( \
125*f91a4547SGvozden Neskovic "vmovdqa %" VR0(r) ", %" VR2(r) "\n" \
126*f91a4547SGvozden Neskovic "vmovdqa %" VR1(r) ", %" VR3(r)); \
127*f91a4547SGvozden Neskovic break; \
128*f91a4547SGvozden Neskovic default: \
129*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
130*f91a4547SGvozden Neskovic } \
131*f91a4547SGvozden Neskovic }
132*f91a4547SGvozden Neskovic
133*f91a4547SGvozden Neskovic #define LOAD(src, r...) \
134*f91a4547SGvozden Neskovic { \
135*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
136*f91a4547SGvozden Neskovic case 4: \
137*f91a4547SGvozden Neskovic __asm( \
138*f91a4547SGvozden Neskovic "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
139*f91a4547SGvozden Neskovic "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
140*f91a4547SGvozden Neskovic "vmovdqa 0x40(%[SRC]), %%" VR2(r) "\n" \
141*f91a4547SGvozden Neskovic "vmovdqa 0x60(%[SRC]), %%" VR3(r) "\n" \
142*f91a4547SGvozden Neskovic : : [SRC] "r" (src)); \
143*f91a4547SGvozden Neskovic break; \
144*f91a4547SGvozden Neskovic case 2: \
145*f91a4547SGvozden Neskovic __asm( \
146*f91a4547SGvozden Neskovic "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
147*f91a4547SGvozden Neskovic "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
148*f91a4547SGvozden Neskovic : : [SRC] "r" (src)); \
149*f91a4547SGvozden Neskovic break; \
150*f91a4547SGvozden Neskovic default: \
151*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
152*f91a4547SGvozden Neskovic } \
153*f91a4547SGvozden Neskovic }
154*f91a4547SGvozden Neskovic
155*f91a4547SGvozden Neskovic #define STORE(dst, r...) \
156*f91a4547SGvozden Neskovic { \
157*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
158*f91a4547SGvozden Neskovic case 4: \
159*f91a4547SGvozden Neskovic __asm( \
160*f91a4547SGvozden Neskovic "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
161*f91a4547SGvozden Neskovic "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
162*f91a4547SGvozden Neskovic "vmovdqa %%" VR2(r) ", 0x40(%[DST])\n" \
163*f91a4547SGvozden Neskovic "vmovdqa %%" VR3(r) ", 0x60(%[DST])\n" \
164*f91a4547SGvozden Neskovic : : [DST] "r" (dst)); \
165*f91a4547SGvozden Neskovic break; \
166*f91a4547SGvozden Neskovic case 2: \
167*f91a4547SGvozden Neskovic __asm( \
168*f91a4547SGvozden Neskovic "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
169*f91a4547SGvozden Neskovic "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
170*f91a4547SGvozden Neskovic : : [DST] "r" (dst)); \
171*f91a4547SGvozden Neskovic break; \
172*f91a4547SGvozden Neskovic default: \
173*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
174*f91a4547SGvozden Neskovic } \
175*f91a4547SGvozden Neskovic }
176*f91a4547SGvozden Neskovic
177*f91a4547SGvozden Neskovic #define FLUSH() \
178*f91a4547SGvozden Neskovic { \
179*f91a4547SGvozden Neskovic __asm("vzeroupper"); \
180*f91a4547SGvozden Neskovic }
181*f91a4547SGvozden Neskovic
182*f91a4547SGvozden Neskovic #define MUL2_SETUP() \
183*f91a4547SGvozden Neskovic { \
184*f91a4547SGvozden Neskovic __asm("vmovq %0, %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d)); \
185*f91a4547SGvozden Neskovic __asm("vpbroadcastq %xmm14, %ymm14"); \
186*f91a4547SGvozden Neskovic __asm("vpxor %ymm15, %ymm15 ,%ymm15"); \
187*f91a4547SGvozden Neskovic }
188*f91a4547SGvozden Neskovic
189*f91a4547SGvozden Neskovic #define _MUL2(r...) \
190*f91a4547SGvozden Neskovic { \
191*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
192*f91a4547SGvozden Neskovic case 2: \
193*f91a4547SGvozden Neskovic __asm( \
194*f91a4547SGvozden Neskovic "vpcmpgtb %" VR0(r)", %ymm15, %ymm12\n" \
195*f91a4547SGvozden Neskovic "vpcmpgtb %" VR1(r)", %ymm15, %ymm13\n" \
196*f91a4547SGvozden Neskovic "vpaddb %" VR0(r)", %" VR0(r)", %" VR0(r) "\n" \
197*f91a4547SGvozden Neskovic "vpaddb %" VR1(r)", %" VR1(r)", %" VR1(r) "\n" \
198*f91a4547SGvozden Neskovic "vpand %ymm14, %ymm12, %ymm12\n" \
199*f91a4547SGvozden Neskovic "vpand %ymm14, %ymm13, %ymm13\n" \
200*f91a4547SGvozden Neskovic "vpxor %ymm12, %" VR0(r)", %" VR0(r) "\n" \
201*f91a4547SGvozden Neskovic "vpxor %ymm13, %" VR1(r)", %" VR1(r)); \
202*f91a4547SGvozden Neskovic break; \
203*f91a4547SGvozden Neskovic default: \
204*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
205*f91a4547SGvozden Neskovic } \
206*f91a4547SGvozden Neskovic }
207*f91a4547SGvozden Neskovic
208*f91a4547SGvozden Neskovic #define MUL2(r...) \
209*f91a4547SGvozden Neskovic { \
210*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
211*f91a4547SGvozden Neskovic case 4: \
212*f91a4547SGvozden Neskovic _MUL2(R_01(r)); \
213*f91a4547SGvozden Neskovic _MUL2(R_23(r)); \
214*f91a4547SGvozden Neskovic break; \
215*f91a4547SGvozden Neskovic case 2: \
216*f91a4547SGvozden Neskovic _MUL2(r); \
217*f91a4547SGvozden Neskovic break; \
218*f91a4547SGvozden Neskovic default: \
219*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
220*f91a4547SGvozden Neskovic } \
221*f91a4547SGvozden Neskovic }
222*f91a4547SGvozden Neskovic
223*f91a4547SGvozden Neskovic #define MUL4(r...) \
224*f91a4547SGvozden Neskovic { \
225*f91a4547SGvozden Neskovic MUL2(r); \
226*f91a4547SGvozden Neskovic MUL2(r); \
227*f91a4547SGvozden Neskovic }
228*f91a4547SGvozden Neskovic
229*f91a4547SGvozden Neskovic #define _0f "ymm15"
230*f91a4547SGvozden Neskovic #define _as "ymm14"
231*f91a4547SGvozden Neskovic #define _bs "ymm13"
232*f91a4547SGvozden Neskovic #define _ltmod "ymm12"
233*f91a4547SGvozden Neskovic #define _ltmul "ymm11"
234*f91a4547SGvozden Neskovic #define _ta "ymm10"
235*f91a4547SGvozden Neskovic #define _tb "ymm15"
236*f91a4547SGvozden Neskovic
237*f91a4547SGvozden Neskovic static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
238*f91a4547SGvozden Neskovic
239*f91a4547SGvozden Neskovic #define _MULx2(c, r...) \
240*f91a4547SGvozden Neskovic { \
241*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
242*f91a4547SGvozden Neskovic case 2: \
243*f91a4547SGvozden Neskovic __asm( \
244*f91a4547SGvozden Neskovic "vpbroadcastb (%[mask]), %%" _0f "\n" \
245*f91a4547SGvozden Neskovic /* upper bits */ \
246*f91a4547SGvozden Neskovic "vbroadcasti128 0x00(%[lt]), %%" _ltmod "\n" \
247*f91a4547SGvozden Neskovic "vbroadcasti128 0x10(%[lt]), %%" _ltmul "\n" \
248*f91a4547SGvozden Neskovic \
249*f91a4547SGvozden Neskovic "vpsraw $0x4, %%" VR0(r) ", %%"_as "\n" \
250*f91a4547SGvozden Neskovic "vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n" \
251*f91a4547SGvozden Neskovic "vpand %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n" \
252*f91a4547SGvozden Neskovic "vpand %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n" \
253*f91a4547SGvozden Neskovic "vpand %%" _0f ", %%" _as ", %%" _as "\n" \
254*f91a4547SGvozden Neskovic "vpand %%" _0f ", %%" _bs ", %%" _bs "\n" \
255*f91a4547SGvozden Neskovic \
256*f91a4547SGvozden Neskovic "vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n" \
257*f91a4547SGvozden Neskovic "vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n" \
258*f91a4547SGvozden Neskovic "vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n" \
259*f91a4547SGvozden Neskovic "vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n" \
260*f91a4547SGvozden Neskovic /* lower bits */ \
261*f91a4547SGvozden Neskovic "vbroadcasti128 0x20(%[lt]), %%" _ltmod "\n" \
262*f91a4547SGvozden Neskovic "vbroadcasti128 0x30(%[lt]), %%" _ltmul "\n" \
263*f91a4547SGvozden Neskovic \
264*f91a4547SGvozden Neskovic "vpxor %%" _ta ", %%" _as ", %%" _as "\n" \
265*f91a4547SGvozden Neskovic "vpxor %%" _tb ", %%" _bs ", %%" _bs "\n" \
266*f91a4547SGvozden Neskovic \
267*f91a4547SGvozden Neskovic "vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n" \
268*f91a4547SGvozden Neskovic "vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n" \
269*f91a4547SGvozden Neskovic "vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
270*f91a4547SGvozden Neskovic "vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
271*f91a4547SGvozden Neskovic \
272*f91a4547SGvozden Neskovic "vpxor %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n" \
273*f91a4547SGvozden Neskovic "vpxor %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n" \
274*f91a4547SGvozden Neskovic "vpxor %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n" \
275*f91a4547SGvozden Neskovic "vpxor %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n" \
276*f91a4547SGvozden Neskovic : : [mask] "r" (&_mul_mask), \
277*f91a4547SGvozden Neskovic [lt] "r" (gf_clmul_mod_lt[4*(c)])); \
278*f91a4547SGvozden Neskovic break; \
279*f91a4547SGvozden Neskovic default: \
280*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
281*f91a4547SGvozden Neskovic } \
282*f91a4547SGvozden Neskovic }
283*f91a4547SGvozden Neskovic
284*f91a4547SGvozden Neskovic #define MUL(c, r...) \
285*f91a4547SGvozden Neskovic { \
286*f91a4547SGvozden Neskovic switch (REG_CNT(r)) { \
287*f91a4547SGvozden Neskovic case 4: \
288*f91a4547SGvozden Neskovic _MULx2(c, R_01(r)); \
289*f91a4547SGvozden Neskovic _MULx2(c, R_23(r)); \
290*f91a4547SGvozden Neskovic break; \
291*f91a4547SGvozden Neskovic case 2: \
292*f91a4547SGvozden Neskovic _MULx2(c, R_01(r)); \
293*f91a4547SGvozden Neskovic break; \
294*f91a4547SGvozden Neskovic default: \
295*f91a4547SGvozden Neskovic ZFS_ASM_BUG(); \
296*f91a4547SGvozden Neskovic } \
297*f91a4547SGvozden Neskovic }
298*f91a4547SGvozden Neskovic
299*f91a4547SGvozden Neskovic #define raidz_math_begin() kfpu_begin()
300*f91a4547SGvozden Neskovic #define raidz_math_end() \
301*f91a4547SGvozden Neskovic { \
302*f91a4547SGvozden Neskovic FLUSH(); \
303*f91a4547SGvozden Neskovic kfpu_end(); \
304*f91a4547SGvozden Neskovic }
305*f91a4547SGvozden Neskovic
306*f91a4547SGvozden Neskovic
307*f91a4547SGvozden Neskovic #define SYN_STRIDE 4
308*f91a4547SGvozden Neskovic
309*f91a4547SGvozden Neskovic #define ZERO_STRIDE 4
310*f91a4547SGvozden Neskovic #define ZERO_DEFINE() {}
311*f91a4547SGvozden Neskovic #define ZERO_D 0, 1, 2, 3
312*f91a4547SGvozden Neskovic
313*f91a4547SGvozden Neskovic #define COPY_STRIDE 4
314*f91a4547SGvozden Neskovic #define COPY_DEFINE() {}
315*f91a4547SGvozden Neskovic #define COPY_D 0, 1, 2, 3
316*f91a4547SGvozden Neskovic
317*f91a4547SGvozden Neskovic #define ADD_STRIDE 4
318*f91a4547SGvozden Neskovic #define ADD_DEFINE() {}
319*f91a4547SGvozden Neskovic #define ADD_D 0, 1, 2, 3
320*f91a4547SGvozden Neskovic
321*f91a4547SGvozden Neskovic #define MUL_STRIDE 4
322*f91a4547SGvozden Neskovic #define MUL_DEFINE() {}
323*f91a4547SGvozden Neskovic #define MUL_D 0, 1, 2, 3
324*f91a4547SGvozden Neskovic
325*f91a4547SGvozden Neskovic #define GEN_P_STRIDE 4
326*f91a4547SGvozden Neskovic #define GEN_P_DEFINE() {}
327*f91a4547SGvozden Neskovic #define GEN_P_P 0, 1, 2, 3
328*f91a4547SGvozden Neskovic
329*f91a4547SGvozden Neskovic #define GEN_PQ_STRIDE 4
330*f91a4547SGvozden Neskovic #define GEN_PQ_DEFINE() {}
331*f91a4547SGvozden Neskovic #define GEN_PQ_D 0, 1, 2, 3
332*f91a4547SGvozden Neskovic #define GEN_PQ_C 4, 5, 6, 7
333*f91a4547SGvozden Neskovic
334*f91a4547SGvozden Neskovic #define GEN_PQR_STRIDE 4
335*f91a4547SGvozden Neskovic #define GEN_PQR_DEFINE() {}
336*f91a4547SGvozden Neskovic #define GEN_PQR_D 0, 1, 2, 3
337*f91a4547SGvozden Neskovic #define GEN_PQR_C 4, 5, 6, 7
338*f91a4547SGvozden Neskovic
339*f91a4547SGvozden Neskovic #define SYN_Q_DEFINE() {}
340*f91a4547SGvozden Neskovic #define SYN_Q_D 0, 1, 2, 3
341*f91a4547SGvozden Neskovic #define SYN_Q_X 4, 5, 6, 7
342*f91a4547SGvozden Neskovic
343*f91a4547SGvozden Neskovic #define SYN_R_DEFINE() {}
344*f91a4547SGvozden Neskovic #define SYN_R_D 0, 1, 2, 3
345*f91a4547SGvozden Neskovic #define SYN_R_X 4, 5, 6, 7
346*f91a4547SGvozden Neskovic
347*f91a4547SGvozden Neskovic #define SYN_PQ_DEFINE() {}
348*f91a4547SGvozden Neskovic #define SYN_PQ_D 0, 1, 2, 3
349*f91a4547SGvozden Neskovic #define SYN_PQ_X 4, 5, 6, 7
350*f91a4547SGvozden Neskovic
351*f91a4547SGvozden Neskovic #define REC_PQ_STRIDE 2
352*f91a4547SGvozden Neskovic #define REC_PQ_DEFINE() {}
353*f91a4547SGvozden Neskovic #define REC_PQ_X 0, 1
354*f91a4547SGvozden Neskovic #define REC_PQ_Y 2, 3
355*f91a4547SGvozden Neskovic #define REC_PQ_T 4, 5
356*f91a4547SGvozden Neskovic
357*f91a4547SGvozden Neskovic #define SYN_PR_DEFINE() {}
358*f91a4547SGvozden Neskovic #define SYN_PR_D 0, 1, 2, 3
359*f91a4547SGvozden Neskovic #define SYN_PR_X 4, 5, 6, 7
360*f91a4547SGvozden Neskovic
361*f91a4547SGvozden Neskovic #define REC_PR_STRIDE 2
362*f91a4547SGvozden Neskovic #define REC_PR_DEFINE() {}
363*f91a4547SGvozden Neskovic #define REC_PR_X 0, 1
364*f91a4547SGvozden Neskovic #define REC_PR_Y 2, 3
365*f91a4547SGvozden Neskovic #define REC_PR_T 4, 5
366*f91a4547SGvozden Neskovic
367*f91a4547SGvozden Neskovic #define SYN_QR_DEFINE() {}
368*f91a4547SGvozden Neskovic #define SYN_QR_D 0, 1, 2, 3
369*f91a4547SGvozden Neskovic #define SYN_QR_X 4, 5, 6, 7
370*f91a4547SGvozden Neskovic
371*f91a4547SGvozden Neskovic #define REC_QR_STRIDE 2
372*f91a4547SGvozden Neskovic #define REC_QR_DEFINE() {}
373*f91a4547SGvozden Neskovic #define REC_QR_X 0, 1
374*f91a4547SGvozden Neskovic #define REC_QR_Y 2, 3
375*f91a4547SGvozden Neskovic #define REC_QR_T 4, 5
376*f91a4547SGvozden Neskovic
377*f91a4547SGvozden Neskovic #define SYN_PQR_DEFINE() {}
378*f91a4547SGvozden Neskovic #define SYN_PQR_D 0, 1, 2, 3
379*f91a4547SGvozden Neskovic #define SYN_PQR_X 4, 5, 6, 7
380*f91a4547SGvozden Neskovic
381*f91a4547SGvozden Neskovic #define REC_PQR_STRIDE 2
382*f91a4547SGvozden Neskovic #define REC_PQR_DEFINE() {}
383*f91a4547SGvozden Neskovic #define REC_PQR_X 0, 1
384*f91a4547SGvozden Neskovic #define REC_PQR_Y 2, 3
385*f91a4547SGvozden Neskovic #define REC_PQR_Z 4, 5
386*f91a4547SGvozden Neskovic #define REC_PQR_XS 6, 7
387*f91a4547SGvozden Neskovic #define REC_PQR_YS 8, 9
388*f91a4547SGvozden Neskovic
389*f91a4547SGvozden Neskovic
390*f91a4547SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
391*f91a4547SGvozden Neskovic #include "vdev_raidz_math_impl.h"
392*f91a4547SGvozden Neskovic
393*f91a4547SGvozden Neskovic DEFINE_GEN_METHODS(avx2);
394*f91a4547SGvozden Neskovic DEFINE_REC_METHODS(avx2);
395*f91a4547SGvozden Neskovic
396*f91a4547SGvozden Neskovic static boolean_t
raidz_will_avx2_work(void)397*f91a4547SGvozden Neskovic raidz_will_avx2_work(void)
398*f91a4547SGvozden Neskovic {
399*f91a4547SGvozden Neskovic return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
400*f91a4547SGvozden Neskovic }
401*f91a4547SGvozden Neskovic
402*f91a4547SGvozden Neskovic const raidz_impl_ops_t vdev_raidz_avx2_impl = {
403*f91a4547SGvozden Neskovic .init = NULL,
404*f91a4547SGvozden Neskovic .fini = NULL,
405*f91a4547SGvozden Neskovic .gen = RAIDZ_GEN_METHODS(avx2),
406*f91a4547SGvozden Neskovic .rec = RAIDZ_REC_METHODS(avx2),
407*f91a4547SGvozden Neskovic .is_supported = &raidz_will_avx2_work,
408*f91a4547SGvozden Neskovic .name = "avx2"
409*f91a4547SGvozden Neskovic };
410*f91a4547SGvozden Neskovic
411*f91a4547SGvozden Neskovic #elif defined(__i386)
412*f91a4547SGvozden Neskovic
413*f91a4547SGvozden Neskovic /* 32-bit stub for user-level fakekernel dependencies */
414*f91a4547SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
415*f91a4547SGvozden Neskovic const raidz_impl_ops_t vdev_raidz_avx2_impl = {
416*f91a4547SGvozden Neskovic .init = NULL,
417*f91a4547SGvozden Neskovic .fini = NULL,
418*f91a4547SGvozden Neskovic .gen = NULL,
419*f91a4547SGvozden Neskovic .rec = NULL,
420*f91a4547SGvozden Neskovic .is_supported = NULL,
421*f91a4547SGvozden Neskovic .name = "avx2"
422*f91a4547SGvozden Neskovic };
423*f91a4547SGvozden Neskovic
424*f91a4547SGvozden Neskovic #endif /* defined(__amd64) */
425