1e86372a0SGvozden Neskovic /*
2e86372a0SGvozden Neskovic  * CDDL HEADER START
3e86372a0SGvozden Neskovic  *
4e86372a0SGvozden Neskovic  * The contents of this file are subject to the terms of the
5e86372a0SGvozden Neskovic  * Common Development and Distribution License (the "License").
6e86372a0SGvozden Neskovic  * You may not use this file except in compliance with the License.
7e86372a0SGvozden Neskovic  *
8e86372a0SGvozden Neskovic  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9e86372a0SGvozden Neskovic  * or http://www.opensolaris.org/os/licensing.
10e86372a0SGvozden Neskovic  * See the License for the specific language governing permissions
11e86372a0SGvozden Neskovic  * and limitations under the License.
12e86372a0SGvozden Neskovic  *
13e86372a0SGvozden Neskovic  * When distributing Covered Code, include this CDDL HEADER in each
14e86372a0SGvozden Neskovic  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15e86372a0SGvozden Neskovic  * If applicable, add the following below this CDDL HEADER, with the
16e86372a0SGvozden Neskovic  * fields enclosed by brackets "[]" replaced with your own identifying
17e86372a0SGvozden Neskovic  * information: Portions Copyright [yyyy] [name of copyright owner]
18e86372a0SGvozden Neskovic  *
19e86372a0SGvozden Neskovic  * CDDL HEADER END
20e86372a0SGvozden Neskovic  */
21e86372a0SGvozden Neskovic /*
22e86372a0SGvozden Neskovic  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23e86372a0SGvozden Neskovic  */
24e86372a0SGvozden Neskovic 
25e86372a0SGvozden Neskovic #include <sys/zfs_context.h>
26e86372a0SGvozden Neskovic #include <sys/types.h>
27e86372a0SGvozden Neskovic #include <sys/zio.h>
28e86372a0SGvozden Neskovic #include <sys/debug.h>
29e86372a0SGvozden Neskovic #include <sys/zfs_debug.h>
30e86372a0SGvozden Neskovic #include <sys/vdev_raidz.h>
31e86372a0SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
32e86372a0SGvozden Neskovic #include <sys/simd.h>
33e86372a0SGvozden Neskovic 
34e86372a0SGvozden Neskovic #ifndef isspace
35e86372a0SGvozden Neskovic #define	isspace(c)	((c) == ' ' || (c) == '\t' || (c) == '\n' || \
36e86372a0SGvozden Neskovic 			(c) == '\r' || (c) == '\f' || (c) == '\013')
37e86372a0SGvozden Neskovic #endif
38e86372a0SGvozden Neskovic 
39e86372a0SGvozden Neskovic extern boolean_t raidz_will_scalar_work(void);
40e86372a0SGvozden Neskovic 
41e86372a0SGvozden Neskovic /* Opaque implementation with NULL methods to represent original methods */
42e86372a0SGvozden Neskovic static const raidz_impl_ops_t vdev_raidz_original_impl = {
43e86372a0SGvozden Neskovic 	.name = "original",
44e86372a0SGvozden Neskovic 	.is_supported = raidz_will_scalar_work,
45e86372a0SGvozden Neskovic };
46e86372a0SGvozden Neskovic 
47e86372a0SGvozden Neskovic /* RAIDZ parity op that contain the fastest methods */
48e86372a0SGvozden Neskovic static raidz_impl_ops_t vdev_raidz_fastest_impl = {
49e86372a0SGvozden Neskovic 	.name = "fastest"
50e86372a0SGvozden Neskovic };
51e86372a0SGvozden Neskovic 
52e86372a0SGvozden Neskovic /* All compiled in implementations */
53e86372a0SGvozden Neskovic const raidz_impl_ops_t *raidz_all_maths[] = {
54e86372a0SGvozden Neskovic 	&vdev_raidz_original_impl,
55e86372a0SGvozden Neskovic 	&vdev_raidz_scalar_impl,
56f91a4547SGvozden Neskovic #if defined(__amd64)
57f91a4547SGvozden Neskovic 	&vdev_raidz_sse2_impl,
58f91a4547SGvozden Neskovic 	&vdev_raidz_ssse3_impl,
59f91a4547SGvozden Neskovic 	&vdev_raidz_avx2_impl,
60f91a4547SGvozden Neskovic #endif
61e86372a0SGvozden Neskovic };
62e86372a0SGvozden Neskovic 
63e86372a0SGvozden Neskovic /* Indicate that benchmark has been completed */
64e86372a0SGvozden Neskovic static boolean_t raidz_math_initialized = B_FALSE;
65e86372a0SGvozden Neskovic 
66e86372a0SGvozden Neskovic /* Select raidz implementation */
67e86372a0SGvozden Neskovic #define	IMPL_FASTEST	(UINT32_MAX)
68e86372a0SGvozden Neskovic #define	IMPL_CYCLE	(UINT32_MAX - 1)
69e86372a0SGvozden Neskovic #define	IMPL_ORIGINAL	(0)
70e86372a0SGvozden Neskovic #define	IMPL_SCALAR	(1)
71e86372a0SGvozden Neskovic 
72e86372a0SGvozden Neskovic #define	RAIDZ_IMPL_READ(i)	(*(volatile uint32_t *) &(i))
73e86372a0SGvozden Neskovic 
74e86372a0SGvozden Neskovic static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
75e86372a0SGvozden Neskovic static uint32_t user_sel_impl = IMPL_FASTEST;
76e86372a0SGvozden Neskovic 
77e86372a0SGvozden Neskovic /* Hold all supported implementations */
78e86372a0SGvozden Neskovic static size_t raidz_supp_impl_cnt = 0;
79e86372a0SGvozden Neskovic static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
80e86372a0SGvozden Neskovic 
81e86372a0SGvozden Neskovic #if defined(_KERNEL)
82e86372a0SGvozden Neskovic /*
83e86372a0SGvozden Neskovic  * kstats values for supported implementations
84e86372a0SGvozden Neskovic  * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
85e86372a0SGvozden Neskovic  *
86e86372a0SGvozden Neskovic  * PORTING NOTE:
87e86372a0SGvozden Neskovic  * On illumos this is not a kstat. OpenZFS uses their home-grown kstat code
88e86372a0SGvozden Neskovic  * which implements a free-form kstat using additional functionality that does
89e86372a0SGvozden Neskovic  * not exist in illumos. Because there are no software consumers of this
90e86372a0SGvozden Neskovic  * information, we omit a kstat API. If an administrator needs to see this
91e86372a0SGvozden Neskovic  * data for some reason, they can use mdb.
92e86372a0SGvozden Neskovic  *
93e86372a0SGvozden Neskovic  * The format of the kstat data on OpenZFS would be a "header" that looks like
94e86372a0SGvozden Neskovic  * this (a column for each entry in the "raidz_gen_name" and "raidz_rec_name"
95e86372a0SGvozden Neskovic  * arrays, starting with the parity function "implementation" name):
96e86372a0SGvozden Neskovic  *     impl gen_p gen_pq gen_pqr rec_p rec_q rec_r rec_pq rec_pr rec_qr rec_pqr
97e86372a0SGvozden Neskovic  * This is followed by a row for each parity function implementation, showing
98e86372a0SGvozden Neskovic  * the "speed" values calculated for that implementation for each of the
99e86372a0SGvozden Neskovic  * parity generation and reconstruction functions in the "raidz_all_maths"
100e86372a0SGvozden Neskovic  * array.
101e86372a0SGvozden Neskovic  */
102e86372a0SGvozden Neskovic static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
103e86372a0SGvozden Neskovic 
104e86372a0SGvozden Neskovic #endif
105e86372a0SGvozden Neskovic 
106e86372a0SGvozden Neskovic /*
107e86372a0SGvozden Neskovic  * Returns the RAIDZ operations for raidz_map() parity calculations.   When
108e86372a0SGvozden Neskovic  * a SIMD implementation is not allowed in the current context, then fallback
109e86372a0SGvozden Neskovic  * to the fastest generic implementation.
110e86372a0SGvozden Neskovic  */
111e86372a0SGvozden Neskovic const raidz_impl_ops_t *
vdev_raidz_math_get_ops(void)112e86372a0SGvozden Neskovic vdev_raidz_math_get_ops(void)
113e86372a0SGvozden Neskovic {
114f91a4547SGvozden Neskovic 	if (!kfpu_allowed())
115f91a4547SGvozden Neskovic 		return (&vdev_raidz_scalar_impl);
116e86372a0SGvozden Neskovic 
117e86372a0SGvozden Neskovic 	raidz_impl_ops_t *ops = NULL;
118e86372a0SGvozden Neskovic 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
119e86372a0SGvozden Neskovic 
120e86372a0SGvozden Neskovic 	switch (impl) {
121e86372a0SGvozden Neskovic 	case IMPL_FASTEST:
122e86372a0SGvozden Neskovic 		ASSERT(raidz_math_initialized);
123e86372a0SGvozden Neskovic 		ops = &vdev_raidz_fastest_impl;
124e86372a0SGvozden Neskovic 		break;
125e86372a0SGvozden Neskovic 	case IMPL_CYCLE:
126e86372a0SGvozden Neskovic 		/* Cycle through all supported implementations */
127e86372a0SGvozden Neskovic 		ASSERT(raidz_math_initialized);
128e86372a0SGvozden Neskovic 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
129e86372a0SGvozden Neskovic 		static size_t cycle_impl_idx = 0;
130e86372a0SGvozden Neskovic 		size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
131e86372a0SGvozden Neskovic 		ops = raidz_supp_impl[idx];
132e86372a0SGvozden Neskovic 		break;
133e86372a0SGvozden Neskovic 	case IMPL_ORIGINAL:
134e86372a0SGvozden Neskovic 		ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
135e86372a0SGvozden Neskovic 		break;
136e86372a0SGvozden Neskovic 	case IMPL_SCALAR:
137e86372a0SGvozden Neskovic 		ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
138e86372a0SGvozden Neskovic 		break;
139e86372a0SGvozden Neskovic 	default:
140e86372a0SGvozden Neskovic 		ASSERT3U(impl, <, raidz_supp_impl_cnt);
141e86372a0SGvozden Neskovic 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
142e86372a0SGvozden Neskovic 		if (impl < ARRAY_SIZE(raidz_all_maths))
143e86372a0SGvozden Neskovic 			ops = raidz_supp_impl[impl];
144e86372a0SGvozden Neskovic 		break;
145e86372a0SGvozden Neskovic 	}
146e86372a0SGvozden Neskovic 
147e86372a0SGvozden Neskovic 	ASSERT3P(ops, !=, NULL);
148e86372a0SGvozden Neskovic 
149e86372a0SGvozden Neskovic 	return (ops);
150e86372a0SGvozden Neskovic }
151e86372a0SGvozden Neskovic 
152e86372a0SGvozden Neskovic /*
153e86372a0SGvozden Neskovic  * Select parity generation method for raidz_map
154e86372a0SGvozden Neskovic  */
155e86372a0SGvozden Neskovic int
vdev_raidz_math_generate(raidz_map_t * rm)156e86372a0SGvozden Neskovic vdev_raidz_math_generate(raidz_map_t *rm)
157e86372a0SGvozden Neskovic {
158e86372a0SGvozden Neskovic 	raidz_gen_f gen_parity = NULL;
159e86372a0SGvozden Neskovic 
160e86372a0SGvozden Neskovic 	switch (raidz_parity(rm)) {
161e86372a0SGvozden Neskovic 		case 1:
162e86372a0SGvozden Neskovic 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
163e86372a0SGvozden Neskovic 			break;
164e86372a0SGvozden Neskovic 		case 2:
165e86372a0SGvozden Neskovic 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
166e86372a0SGvozden Neskovic 			break;
167e86372a0SGvozden Neskovic 		case 3:
168e86372a0SGvozden Neskovic 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
169e86372a0SGvozden Neskovic 			break;
170e86372a0SGvozden Neskovic 		default:
171e86372a0SGvozden Neskovic 			gen_parity = NULL;
172e86372a0SGvozden Neskovic 			cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
173e86372a0SGvozden Neskovic 			    (uint_t)raidz_parity(rm));
174e86372a0SGvozden Neskovic 			break;
175e86372a0SGvozden Neskovic 	}
176e86372a0SGvozden Neskovic 
177e86372a0SGvozden Neskovic 	/* if method is NULL execute the original implementation */
178e86372a0SGvozden Neskovic 	if (gen_parity == NULL)
179e86372a0SGvozden Neskovic 		return (RAIDZ_ORIGINAL_IMPL);
180e86372a0SGvozden Neskovic 
181e86372a0SGvozden Neskovic 	gen_parity(rm);
182e86372a0SGvozden Neskovic 
183e86372a0SGvozden Neskovic 	return (0);
184e86372a0SGvozden Neskovic }
185e86372a0SGvozden Neskovic 
186e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_p_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)187e86372a0SGvozden Neskovic reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
188e86372a0SGvozden Neskovic     const int nbaddata)
189e86372a0SGvozden Neskovic {
190e86372a0SGvozden Neskovic 	if (nbaddata == 1 && parity_valid[CODE_P]) {
191e86372a0SGvozden Neskovic 		return (rm->rm_ops->rec[RAIDZ_REC_P]);
192e86372a0SGvozden Neskovic 	}
193e86372a0SGvozden Neskovic 	return ((raidz_rec_f) NULL);
194e86372a0SGvozden Neskovic }
195e86372a0SGvozden Neskovic 
196e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_pq_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)197e86372a0SGvozden Neskovic reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
198e86372a0SGvozden Neskovic     const int nbaddata)
199e86372a0SGvozden Neskovic {
200e86372a0SGvozden Neskovic 	if (nbaddata == 1) {
201e86372a0SGvozden Neskovic 		if (parity_valid[CODE_P]) {
202e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
203e86372a0SGvozden Neskovic 		} else if (parity_valid[CODE_Q]) {
204e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
205e86372a0SGvozden Neskovic 		}
206e86372a0SGvozden Neskovic 	} else if (nbaddata == 2 &&
207e86372a0SGvozden Neskovic 	    parity_valid[CODE_P] && parity_valid[CODE_Q]) {
208e86372a0SGvozden Neskovic 		return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
209e86372a0SGvozden Neskovic 	}
210e86372a0SGvozden Neskovic 	return ((raidz_rec_f) NULL);
211e86372a0SGvozden Neskovic }
212e86372a0SGvozden Neskovic 
213e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_pqr_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)214e86372a0SGvozden Neskovic reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
215e86372a0SGvozden Neskovic     const int nbaddata)
216e86372a0SGvozden Neskovic {
217e86372a0SGvozden Neskovic 	if (nbaddata == 1) {
218e86372a0SGvozden Neskovic 		if (parity_valid[CODE_P]) {
219e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
220e86372a0SGvozden Neskovic 		} else if (parity_valid[CODE_Q]) {
221e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
222e86372a0SGvozden Neskovic 		} else if (parity_valid[CODE_R]) {
223e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_R]);
224e86372a0SGvozden Neskovic 		}
225e86372a0SGvozden Neskovic 	} else if (nbaddata == 2) {
226e86372a0SGvozden Neskovic 		if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
227e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
228e86372a0SGvozden Neskovic 		} else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
229e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_PR]);
230e86372a0SGvozden Neskovic 		} else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
231e86372a0SGvozden Neskovic 			return (rm->rm_ops->rec[RAIDZ_REC_QR]);
232e86372a0SGvozden Neskovic 		}
233e86372a0SGvozden Neskovic 	} else if (nbaddata == 3 &&
234e86372a0SGvozden Neskovic 	    parity_valid[CODE_P] && parity_valid[CODE_Q] &&
235e86372a0SGvozden Neskovic 	    parity_valid[CODE_R]) {
236e86372a0SGvozden Neskovic 		return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
237e86372a0SGvozden Neskovic 	}
238e86372a0SGvozden Neskovic 	return ((raidz_rec_f) NULL);
239e86372a0SGvozden Neskovic }
240e86372a0SGvozden Neskovic 
241e86372a0SGvozden Neskovic /*
242e86372a0SGvozden Neskovic  * Select data reconstruction method for raidz_map
243e86372a0SGvozden Neskovic  * @parity_valid - Parity validity flag
244e86372a0SGvozden Neskovic  * @dt           - Failed data index array
245e86372a0SGvozden Neskovic  * @nbaddata     - Number of failed data columns
246e86372a0SGvozden Neskovic  */
247e86372a0SGvozden Neskovic int
vdev_raidz_math_reconstruct(raidz_map_t * rm,const int * parity_valid,const int * dt,const int nbaddata)248e86372a0SGvozden Neskovic vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
249e86372a0SGvozden Neskovic     const int *dt, const int nbaddata)
250e86372a0SGvozden Neskovic {
251e86372a0SGvozden Neskovic 	raidz_rec_f rec_fn = NULL;
252e86372a0SGvozden Neskovic 
253e86372a0SGvozden Neskovic 	switch (raidz_parity(rm)) {
254e86372a0SGvozden Neskovic 	case PARITY_P:
255e86372a0SGvozden Neskovic 		rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
256e86372a0SGvozden Neskovic 		break;
257e86372a0SGvozden Neskovic 	case PARITY_PQ:
258e86372a0SGvozden Neskovic 		rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
259e86372a0SGvozden Neskovic 		break;
260e86372a0SGvozden Neskovic 	case PARITY_PQR:
261e86372a0SGvozden Neskovic 		rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
262e86372a0SGvozden Neskovic 		break;
263e86372a0SGvozden Neskovic 	default:
264e86372a0SGvozden Neskovic 		cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
265e86372a0SGvozden Neskovic 		    (uint_t)raidz_parity(rm));
266e86372a0SGvozden Neskovic 		break;
267e86372a0SGvozden Neskovic 	}
268e86372a0SGvozden Neskovic 
269e86372a0SGvozden Neskovic 	if (rec_fn == NULL)
270e86372a0SGvozden Neskovic 		return (RAIDZ_ORIGINAL_IMPL);
271e86372a0SGvozden Neskovic 	else
272e86372a0SGvozden Neskovic 		return (rec_fn(rm, dt));
273e86372a0SGvozden Neskovic }
274e86372a0SGvozden Neskovic 
275e86372a0SGvozden Neskovic const char *raidz_gen_name[] = {
276e86372a0SGvozden Neskovic 	"gen_p", "gen_pq", "gen_pqr"
277e86372a0SGvozden Neskovic };
278e86372a0SGvozden Neskovic const char *raidz_rec_name[] = {
279e86372a0SGvozden Neskovic 	"rec_p", "rec_q", "rec_r",
280e86372a0SGvozden Neskovic 	"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
281e86372a0SGvozden Neskovic };
282e86372a0SGvozden Neskovic 
283e86372a0SGvozden Neskovic #if defined(_KERNEL)
284e86372a0SGvozden Neskovic 
285e86372a0SGvozden Neskovic #define	BENCH_D_COLS	(8ULL)
286e86372a0SGvozden Neskovic #define	BENCH_COLS	(BENCH_D_COLS + PARITY_PQR)
287e86372a0SGvozden Neskovic #define	BENCH_ZIO_SIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)	/* 128 kiB */
288*0886dcadSAndy Fiddaman #define	BENCH_NS	MSEC2NSEC(1)			/* 1ms */
289e86372a0SGvozden Neskovic 
290e86372a0SGvozden Neskovic typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
291e86372a0SGvozden Neskovic 
292e86372a0SGvozden Neskovic static void
benchmark_gen_impl(raidz_map_t * rm,const int fn)293e86372a0SGvozden Neskovic benchmark_gen_impl(raidz_map_t *rm, const int fn)
294e86372a0SGvozden Neskovic {
295e86372a0SGvozden Neskovic 	(void) fn;
296e86372a0SGvozden Neskovic 	vdev_raidz_generate_parity(rm);
297e86372a0SGvozden Neskovic }
298e86372a0SGvozden Neskovic 
299e86372a0SGvozden Neskovic static void
benchmark_rec_impl(raidz_map_t * rm,const int fn)300e86372a0SGvozden Neskovic benchmark_rec_impl(raidz_map_t *rm, const int fn)
301e86372a0SGvozden Neskovic {
302e86372a0SGvozden Neskovic 	static const int rec_tgt[7][3] = {
303e86372a0SGvozden Neskovic 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
304e86372a0SGvozden Neskovic 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
305e86372a0SGvozden Neskovic 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
306e86372a0SGvozden Neskovic 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
307e86372a0SGvozden Neskovic 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
308e86372a0SGvozden Neskovic 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
309e86372a0SGvozden Neskovic 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
310e86372a0SGvozden Neskovic 	};
311e86372a0SGvozden Neskovic 
312e86372a0SGvozden Neskovic 	vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
313e86372a0SGvozden Neskovic }
314e86372a0SGvozden Neskovic 
315e86372a0SGvozden Neskovic /*
316e86372a0SGvozden Neskovic  * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
317e86372a0SGvozden Neskovic  * is performed by setting the rm_ops pointer and calling the top level
318e86372a0SGvozden Neskovic  * generate/reconstruct methods of bench_rm.
319e86372a0SGvozden Neskovic  */
320e86372a0SGvozden Neskovic static void
benchmark_raidz_impl(raidz_map_t * bench_rm,const int fn,benchmark_fn bench_fn)321e86372a0SGvozden Neskovic benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
322e86372a0SGvozden Neskovic {
323e86372a0SGvozden Neskovic 	uint64_t run_cnt, speed, best_speed = 0;
324e86372a0SGvozden Neskovic 	hrtime_t t_start, t_diff;
325e86372a0SGvozden Neskovic 	raidz_impl_ops_t *curr_impl;
326e86372a0SGvozden Neskovic 	raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
327e86372a0SGvozden Neskovic 	int impl, i;
328e86372a0SGvozden Neskovic 
329e86372a0SGvozden Neskovic 	for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
330e86372a0SGvozden Neskovic 		/* set an implementation to benchmark */
331e86372a0SGvozden Neskovic 		curr_impl = raidz_supp_impl[impl];
332e86372a0SGvozden Neskovic 		bench_rm->rm_ops = curr_impl;
333e86372a0SGvozden Neskovic 
334e86372a0SGvozden Neskovic 		run_cnt = 0;
335e86372a0SGvozden Neskovic 		t_start = gethrtime();
336e86372a0SGvozden Neskovic 
337e86372a0SGvozden Neskovic 		do {
338*0886dcadSAndy Fiddaman 			for (i = 0; i < 5; i++, run_cnt++)
339e86372a0SGvozden Neskovic 				bench_fn(bench_rm, fn);
340e86372a0SGvozden Neskovic 
341e86372a0SGvozden Neskovic 			t_diff = gethrtime() - t_start;
342e86372a0SGvozden Neskovic 		} while (t_diff < BENCH_NS);
343e86372a0SGvozden Neskovic 
344e86372a0SGvozden Neskovic 		speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
345e86372a0SGvozden Neskovic 		speed /= (t_diff * BENCH_COLS);
346e86372a0SGvozden Neskovic 
347e86372a0SGvozden Neskovic 		if (bench_fn == benchmark_gen_impl)
348e86372a0SGvozden Neskovic 			raidz_impl_kstats[impl].gen[fn] = speed;
349e86372a0SGvozden Neskovic 		else
350e86372a0SGvozden Neskovic 			raidz_impl_kstats[impl].rec[fn] = speed;
351e86372a0SGvozden Neskovic 
352e86372a0SGvozden Neskovic 		/* Update fastest implementation method */
353e86372a0SGvozden Neskovic 		if (speed > best_speed) {
354e86372a0SGvozden Neskovic 			best_speed = speed;
355e86372a0SGvozden Neskovic 
356e86372a0SGvozden Neskovic 			if (bench_fn == benchmark_gen_impl) {
357e86372a0SGvozden Neskovic 				fstat->gen[fn] = impl;
358e86372a0SGvozden Neskovic 				vdev_raidz_fastest_impl.gen[fn] =
359e86372a0SGvozden Neskovic 				    curr_impl->gen[fn];
360e86372a0SGvozden Neskovic 			} else {
361e86372a0SGvozden Neskovic 				fstat->rec[fn] = impl;
362e86372a0SGvozden Neskovic 				vdev_raidz_fastest_impl.rec[fn] =
363e86372a0SGvozden Neskovic 				    curr_impl->rec[fn];
364e86372a0SGvozden Neskovic 			}
365e86372a0SGvozden Neskovic 		}
366e86372a0SGvozden Neskovic 	}
367e86372a0SGvozden Neskovic }
368e86372a0SGvozden Neskovic #endif
369e86372a0SGvozden Neskovic 
370e86372a0SGvozden Neskovic /*
371e86372a0SGvozden Neskovic  * Initialize and benchmark all supported implementations.
372e86372a0SGvozden Neskovic  */
373e86372a0SGvozden Neskovic static void
benchmark_raidz(void)374e86372a0SGvozden Neskovic benchmark_raidz(void)
375e86372a0SGvozden Neskovic {
376e86372a0SGvozden Neskovic 	raidz_impl_ops_t *curr_impl;
377e86372a0SGvozden Neskovic 	int i, c;
378e86372a0SGvozden Neskovic 
379e86372a0SGvozden Neskovic 	/* Move supported impl into raidz_supp_impl */
380e86372a0SGvozden Neskovic 	for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
381e86372a0SGvozden Neskovic 		curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
382e86372a0SGvozden Neskovic 
383e86372a0SGvozden Neskovic 		if (curr_impl->init)
384e86372a0SGvozden Neskovic 			curr_impl->init();
385e86372a0SGvozden Neskovic 
386e86372a0SGvozden Neskovic 		if (curr_impl->is_supported())
387e86372a0SGvozden Neskovic 			raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
388e86372a0SGvozden Neskovic 	}
389e86372a0SGvozden Neskovic 	membar_producer();		/* complete raidz_supp_impl[] init */
390e86372a0SGvozden Neskovic 	raidz_supp_impl_cnt = c;	/* number of supported impl */
391e86372a0SGvozden Neskovic 
392e86372a0SGvozden Neskovic #if defined(_KERNEL)
393e86372a0SGvozden Neskovic 	zio_t *bench_zio = NULL;
394e86372a0SGvozden Neskovic 	raidz_map_t *bench_rm = NULL;
395e86372a0SGvozden Neskovic 	uint64_t bench_parity;
396e86372a0SGvozden Neskovic 
397e86372a0SGvozden Neskovic 	/* Fake a zio and run the benchmark on a warmed up buffer */
398e86372a0SGvozden Neskovic 	bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
399e86372a0SGvozden Neskovic 	bench_zio->io_offset = 0;
400e86372a0SGvozden Neskovic 	bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
401e86372a0SGvozden Neskovic 	bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
402e86372a0SGvozden Neskovic 	memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
403e86372a0SGvozden Neskovic 
404e86372a0SGvozden Neskovic 	/* Benchmark parity generation methods */
405e86372a0SGvozden Neskovic 	for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
406e86372a0SGvozden Neskovic 		bench_parity = fn + 1;
407e86372a0SGvozden Neskovic 		/* New raidz_map is needed for each generate_p/q/r */
408e86372a0SGvozden Neskovic 		bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
409e86372a0SGvozden Neskovic 		    BENCH_D_COLS + bench_parity, bench_parity);
410e86372a0SGvozden Neskovic 
411e86372a0SGvozden Neskovic 		benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
412e86372a0SGvozden Neskovic 
413e86372a0SGvozden Neskovic 		vdev_raidz_map_free(bench_rm);
414e86372a0SGvozden Neskovic 	}
415e86372a0SGvozden Neskovic 
416e86372a0SGvozden Neskovic 	/* Benchmark data reconstruction methods */
417e86372a0SGvozden Neskovic 	bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
418e86372a0SGvozden Neskovic 	    BENCH_COLS, PARITY_PQR);
419e86372a0SGvozden Neskovic 
420e86372a0SGvozden Neskovic 	for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
421e86372a0SGvozden Neskovic 		benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
422e86372a0SGvozden Neskovic 
423e86372a0SGvozden Neskovic 	vdev_raidz_map_free(bench_rm);
424e86372a0SGvozden Neskovic 
425e86372a0SGvozden Neskovic 	/* cleanup the bench zio */
426e86372a0SGvozden Neskovic 	abd_free(bench_zio->io_abd);
427e86372a0SGvozden Neskovic 	kmem_free(bench_zio, sizeof (zio_t));
428e86372a0SGvozden Neskovic #else
429e86372a0SGvozden Neskovic 	/*
430e86372a0SGvozden Neskovic 	 * Skip the benchmark in user space to avoid impacting libzpool
431e86372a0SGvozden Neskovic 	 * consumers (zdb, zhack, zinject, ztest).  The last implementation
432e86372a0SGvozden Neskovic 	 * is assumed to be the fastest and used by default.
433e86372a0SGvozden Neskovic 	 */
434e86372a0SGvozden Neskovic 	memcpy(&vdev_raidz_fastest_impl,
435e86372a0SGvozden Neskovic 	    raidz_supp_impl[raidz_supp_impl_cnt - 1],
436e86372a0SGvozden Neskovic 	    sizeof (vdev_raidz_fastest_impl));
437e86372a0SGvozden Neskovic 	strcpy(vdev_raidz_fastest_impl.name, "fastest");
438e86372a0SGvozden Neskovic #endif /* _KERNEL */
439e86372a0SGvozden Neskovic }
440e86372a0SGvozden Neskovic 
441e86372a0SGvozden Neskovic void
vdev_raidz_math_init(void)442e86372a0SGvozden Neskovic vdev_raidz_math_init(void)
443e86372a0SGvozden Neskovic {
444e86372a0SGvozden Neskovic 	/* Determine the fastest available implementation. */
445e86372a0SGvozden Neskovic 	benchmark_raidz();
446e86372a0SGvozden Neskovic 
447e86372a0SGvozden Neskovic 	/* Finish initialization */
448e86372a0SGvozden Neskovic 	atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
449e86372a0SGvozden Neskovic 	raidz_math_initialized = B_TRUE;
450e86372a0SGvozden Neskovic }
451e86372a0SGvozden Neskovic 
452e86372a0SGvozden Neskovic void
vdev_raidz_math_fini(void)453e86372a0SGvozden Neskovic vdev_raidz_math_fini(void)
454e86372a0SGvozden Neskovic {
455e86372a0SGvozden Neskovic 	raidz_impl_ops_t const *curr_impl;
456e86372a0SGvozden Neskovic 
457e86372a0SGvozden Neskovic 	for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
458e86372a0SGvozden Neskovic 		curr_impl = raidz_all_maths[i];
459e86372a0SGvozden Neskovic 		if (curr_impl->fini)
460e86372a0SGvozden Neskovic 			curr_impl->fini();
461e86372a0SGvozden Neskovic 	}
462e86372a0SGvozden Neskovic }
463e86372a0SGvozden Neskovic 
464e86372a0SGvozden Neskovic static const struct {
465e86372a0SGvozden Neskovic 	char *name;
466e86372a0SGvozden Neskovic 	uint32_t sel;
467e86372a0SGvozden Neskovic } math_impl_opts[] = {
468e86372a0SGvozden Neskovic 		{ "cycle",	IMPL_CYCLE },
469e86372a0SGvozden Neskovic 		{ "fastest",	IMPL_FASTEST },
470e86372a0SGvozden Neskovic 		{ "original",	IMPL_ORIGINAL },
471e86372a0SGvozden Neskovic 		{ "scalar",	IMPL_SCALAR }
472e86372a0SGvozden Neskovic };
473e86372a0SGvozden Neskovic 
474e86372a0SGvozden Neskovic /*
475e86372a0SGvozden Neskovic  * Function sets desired raidz implementation.
476e86372a0SGvozden Neskovic  *
477e86372a0SGvozden Neskovic  * If we are called before init(), user preference will be saved in
478e86372a0SGvozden Neskovic  * user_sel_impl, and applied in later init() call. This occurs when module
479e86372a0SGvozden Neskovic  * parameter is specified on module load. Otherwise, directly update
480e86372a0SGvozden Neskovic  * zfs_vdev_raidz_impl.
481e86372a0SGvozden Neskovic  *
482e86372a0SGvozden Neskovic  * @val		Name of raidz implementation to use
483e86372a0SGvozden Neskovic  * @param	Unused.
484e86372a0SGvozden Neskovic  */
485e86372a0SGvozden Neskovic int
vdev_raidz_impl_set(const char * val)486e86372a0SGvozden Neskovic vdev_raidz_impl_set(const char *val)
487e86372a0SGvozden Neskovic {
488*0886dcadSAndy Fiddaman 	int err = EINVAL;
489e86372a0SGvozden Neskovic 	char req_name[RAIDZ_IMPL_NAME_MAX];
490e86372a0SGvozden Neskovic 	uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
491e86372a0SGvozden Neskovic 	size_t i;
492e86372a0SGvozden Neskovic 
493e86372a0SGvozden Neskovic 	/* sanitize input */
494e86372a0SGvozden Neskovic 	i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
495e86372a0SGvozden Neskovic 	if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
496e86372a0SGvozden Neskovic 		return (err);
497e86372a0SGvozden Neskovic 
498e86372a0SGvozden Neskovic 	strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
499e86372a0SGvozden Neskovic 	while (i > 0 && !!isspace(req_name[i-1]))
500e86372a0SGvozden Neskovic 		i--;
501e86372a0SGvozden Neskovic 	req_name[i] = '\0';
502e86372a0SGvozden Neskovic 
503e86372a0SGvozden Neskovic 	/* Check mandatory options */
504e86372a0SGvozden Neskovic 	for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
505e86372a0SGvozden Neskovic 		if (strcmp(req_name, math_impl_opts[i].name) == 0) {
506e86372a0SGvozden Neskovic 			impl = math_impl_opts[i].sel;
507e86372a0SGvozden Neskovic 			err = 0;
508e86372a0SGvozden Neskovic 			break;
509e86372a0SGvozden Neskovic 		}
510e86372a0SGvozden Neskovic 	}
511e86372a0SGvozden Neskovic 
512e86372a0SGvozden Neskovic 	/* check all supported impl if init() was already called */
513e86372a0SGvozden Neskovic 	if (err != 0 && raidz_math_initialized) {
514e86372a0SGvozden Neskovic 		/* check all supported implementations */
515e86372a0SGvozden Neskovic 		for (i = 0; i < raidz_supp_impl_cnt; i++) {
516e86372a0SGvozden Neskovic 			if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
517e86372a0SGvozden Neskovic 				impl = i;
518e86372a0SGvozden Neskovic 				err = 0;
519e86372a0SGvozden Neskovic 				break;
520e86372a0SGvozden Neskovic 			}
521e86372a0SGvozden Neskovic 		}
522e86372a0SGvozden Neskovic 	}
523e86372a0SGvozden Neskovic 
524e86372a0SGvozden Neskovic 	if (err == 0) {
525e86372a0SGvozden Neskovic 		if (raidz_math_initialized)
526e86372a0SGvozden Neskovic 			atomic_swap_32(&zfs_vdev_raidz_impl, impl);
527e86372a0SGvozden Neskovic 		else
528e86372a0SGvozden Neskovic 			atomic_swap_32(&user_sel_impl, impl);
529e86372a0SGvozden Neskovic 	}
530e86372a0SGvozden Neskovic 
531e86372a0SGvozden Neskovic 	return (err);
532e86372a0SGvozden Neskovic }
533e86372a0SGvozden Neskovic 
534e86372a0SGvozden Neskovic #if defined(_KERNEL) && defined(__linux__)
535e86372a0SGvozden Neskovic 
536e86372a0SGvozden Neskovic static int
zfs_vdev_raidz_impl_set(const char * val,zfs_kernel_param_t * kp)537e86372a0SGvozden Neskovic zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
538e86372a0SGvozden Neskovic {
539e86372a0SGvozden Neskovic 	return (vdev_raidz_impl_set(val));
540e86372a0SGvozden Neskovic }
541e86372a0SGvozden Neskovic 
542e86372a0SGvozden Neskovic static int
zfs_vdev_raidz_impl_get(char * buffer,zfs_kernel_param_t * kp)543e86372a0SGvozden Neskovic zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
544e86372a0SGvozden Neskovic {
545e86372a0SGvozden Neskovic 	int i, cnt = 0;
546e86372a0SGvozden Neskovic 	char *fmt;
547e86372a0SGvozden Neskovic 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
548e86372a0SGvozden Neskovic 
549e86372a0SGvozden Neskovic 	ASSERT(raidz_math_initialized);
550e86372a0SGvozden Neskovic 
551e86372a0SGvozden Neskovic 	/* list mandatory options */
552e86372a0SGvozden Neskovic 	for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
553e86372a0SGvozden Neskovic 		fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
554e86372a0SGvozden Neskovic 		cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
555e86372a0SGvozden Neskovic 	}
556e86372a0SGvozden Neskovic 
557e86372a0SGvozden Neskovic 	/* list all supported implementations */
558e86372a0SGvozden Neskovic 	for (i = 0; i < raidz_supp_impl_cnt; i++) {
559e86372a0SGvozden Neskovic 		fmt = (i == impl) ? "[%s] " : "%s ";
560e86372a0SGvozden Neskovic 		cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
561e86372a0SGvozden Neskovic 	}
562e86372a0SGvozden Neskovic 
563e86372a0SGvozden Neskovic 	return (cnt);
564e86372a0SGvozden Neskovic }
565e86372a0SGvozden Neskovic 
566e86372a0SGvozden Neskovic module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
567e86372a0SGvozden Neskovic     zfs_vdev_raidz_impl_get, NULL, 0644);
568e86372a0SGvozden Neskovic MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
569e86372a0SGvozden Neskovic #endif
570