1e86372a0SGvozden Neskovic /*
2e86372a0SGvozden Neskovic * CDDL HEADER START
3e86372a0SGvozden Neskovic *
4e86372a0SGvozden Neskovic * The contents of this file are subject to the terms of the
5e86372a0SGvozden Neskovic * Common Development and Distribution License (the "License").
6e86372a0SGvozden Neskovic * You may not use this file except in compliance with the License.
7e86372a0SGvozden Neskovic *
8e86372a0SGvozden Neskovic * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9e86372a0SGvozden Neskovic * or http://www.opensolaris.org/os/licensing.
10e86372a0SGvozden Neskovic * See the License for the specific language governing permissions
11e86372a0SGvozden Neskovic * and limitations under the License.
12e86372a0SGvozden Neskovic *
13e86372a0SGvozden Neskovic * When distributing Covered Code, include this CDDL HEADER in each
14e86372a0SGvozden Neskovic * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15e86372a0SGvozden Neskovic * If applicable, add the following below this CDDL HEADER, with the
16e86372a0SGvozden Neskovic * fields enclosed by brackets "[]" replaced with your own identifying
17e86372a0SGvozden Neskovic * information: Portions Copyright [yyyy] [name of copyright owner]
18e86372a0SGvozden Neskovic *
19e86372a0SGvozden Neskovic * CDDL HEADER END
20e86372a0SGvozden Neskovic */
21e86372a0SGvozden Neskovic /*
22e86372a0SGvozden Neskovic * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23e86372a0SGvozden Neskovic */
24e86372a0SGvozden Neskovic
25e86372a0SGvozden Neskovic #include <sys/zfs_context.h>
26e86372a0SGvozden Neskovic #include <sys/types.h>
27e86372a0SGvozden Neskovic #include <sys/zio.h>
28e86372a0SGvozden Neskovic #include <sys/debug.h>
29e86372a0SGvozden Neskovic #include <sys/zfs_debug.h>
30e86372a0SGvozden Neskovic #include <sys/vdev_raidz.h>
31e86372a0SGvozden Neskovic #include <sys/vdev_raidz_impl.h>
32e86372a0SGvozden Neskovic #include <sys/simd.h>
33e86372a0SGvozden Neskovic
34e86372a0SGvozden Neskovic #ifndef isspace
35e86372a0SGvozden Neskovic #define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
36e86372a0SGvozden Neskovic (c) == '\r' || (c) == '\f' || (c) == '\013')
37e86372a0SGvozden Neskovic #endif
38e86372a0SGvozden Neskovic
39e86372a0SGvozden Neskovic extern boolean_t raidz_will_scalar_work(void);
40e86372a0SGvozden Neskovic
41e86372a0SGvozden Neskovic /* Opaque implementation with NULL methods to represent original methods */
42e86372a0SGvozden Neskovic static const raidz_impl_ops_t vdev_raidz_original_impl = {
43e86372a0SGvozden Neskovic .name = "original",
44e86372a0SGvozden Neskovic .is_supported = raidz_will_scalar_work,
45e86372a0SGvozden Neskovic };
46e86372a0SGvozden Neskovic
47e86372a0SGvozden Neskovic /* RAIDZ parity op that contain the fastest methods */
48e86372a0SGvozden Neskovic static raidz_impl_ops_t vdev_raidz_fastest_impl = {
49e86372a0SGvozden Neskovic .name = "fastest"
50e86372a0SGvozden Neskovic };
51e86372a0SGvozden Neskovic
52e86372a0SGvozden Neskovic /* All compiled in implementations */
53e86372a0SGvozden Neskovic const raidz_impl_ops_t *raidz_all_maths[] = {
54e86372a0SGvozden Neskovic &vdev_raidz_original_impl,
55e86372a0SGvozden Neskovic &vdev_raidz_scalar_impl,
56f91a4547SGvozden Neskovic #if defined(__amd64)
57f91a4547SGvozden Neskovic &vdev_raidz_sse2_impl,
58f91a4547SGvozden Neskovic &vdev_raidz_ssse3_impl,
59f91a4547SGvozden Neskovic &vdev_raidz_avx2_impl,
60f91a4547SGvozden Neskovic #endif
61e86372a0SGvozden Neskovic };
62e86372a0SGvozden Neskovic
63e86372a0SGvozden Neskovic /* Indicate that benchmark has been completed */
64e86372a0SGvozden Neskovic static boolean_t raidz_math_initialized = B_FALSE;
65e86372a0SGvozden Neskovic
66e86372a0SGvozden Neskovic /* Select raidz implementation */
67e86372a0SGvozden Neskovic #define IMPL_FASTEST (UINT32_MAX)
68e86372a0SGvozden Neskovic #define IMPL_CYCLE (UINT32_MAX - 1)
69e86372a0SGvozden Neskovic #define IMPL_ORIGINAL (0)
70e86372a0SGvozden Neskovic #define IMPL_SCALAR (1)
71e86372a0SGvozden Neskovic
72e86372a0SGvozden Neskovic #define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
73e86372a0SGvozden Neskovic
74e86372a0SGvozden Neskovic static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
75e86372a0SGvozden Neskovic static uint32_t user_sel_impl = IMPL_FASTEST;
76e86372a0SGvozden Neskovic
77e86372a0SGvozden Neskovic /* Hold all supported implementations */
78e86372a0SGvozden Neskovic static size_t raidz_supp_impl_cnt = 0;
79e86372a0SGvozden Neskovic static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
80e86372a0SGvozden Neskovic
81e86372a0SGvozden Neskovic #if defined(_KERNEL)
82e86372a0SGvozden Neskovic /*
83e86372a0SGvozden Neskovic * kstats values for supported implementations
84e86372a0SGvozden Neskovic * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
85e86372a0SGvozden Neskovic *
86e86372a0SGvozden Neskovic * PORTING NOTE:
87e86372a0SGvozden Neskovic * On illumos this is not a kstat. OpenZFS uses their home-grown kstat code
88e86372a0SGvozden Neskovic * which implements a free-form kstat using additional functionality that does
89e86372a0SGvozden Neskovic * not exist in illumos. Because there are no software consumers of this
90e86372a0SGvozden Neskovic * information, we omit a kstat API. If an administrator needs to see this
91e86372a0SGvozden Neskovic * data for some reason, they can use mdb.
92e86372a0SGvozden Neskovic *
93e86372a0SGvozden Neskovic * The format of the kstat data on OpenZFS would be a "header" that looks like
94e86372a0SGvozden Neskovic * this (a column for each entry in the "raidz_gen_name" and "raidz_rec_name"
95e86372a0SGvozden Neskovic * arrays, starting with the parity function "implementation" name):
96e86372a0SGvozden Neskovic * impl gen_p gen_pq gen_pqr rec_p rec_q rec_r rec_pq rec_pr rec_qr rec_pqr
97e86372a0SGvozden Neskovic * This is followed by a row for each parity function implementation, showing
98e86372a0SGvozden Neskovic * the "speed" values calculated for that implementation for each of the
99e86372a0SGvozden Neskovic * parity generation and reconstruction functions in the "raidz_all_maths"
100e86372a0SGvozden Neskovic * array.
101e86372a0SGvozden Neskovic */
102e86372a0SGvozden Neskovic static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
103e86372a0SGvozden Neskovic
104e86372a0SGvozden Neskovic #endif
105e86372a0SGvozden Neskovic
106e86372a0SGvozden Neskovic /*
107e86372a0SGvozden Neskovic * Returns the RAIDZ operations for raidz_map() parity calculations. When
108e86372a0SGvozden Neskovic * a SIMD implementation is not allowed in the current context, then fallback
109e86372a0SGvozden Neskovic * to the fastest generic implementation.
110e86372a0SGvozden Neskovic */
111e86372a0SGvozden Neskovic const raidz_impl_ops_t *
vdev_raidz_math_get_ops(void)112e86372a0SGvozden Neskovic vdev_raidz_math_get_ops(void)
113e86372a0SGvozden Neskovic {
114f91a4547SGvozden Neskovic if (!kfpu_allowed())
115f91a4547SGvozden Neskovic return (&vdev_raidz_scalar_impl);
116e86372a0SGvozden Neskovic
117e86372a0SGvozden Neskovic raidz_impl_ops_t *ops = NULL;
118e86372a0SGvozden Neskovic const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
119e86372a0SGvozden Neskovic
120e86372a0SGvozden Neskovic switch (impl) {
121e86372a0SGvozden Neskovic case IMPL_FASTEST:
122e86372a0SGvozden Neskovic ASSERT(raidz_math_initialized);
123e86372a0SGvozden Neskovic ops = &vdev_raidz_fastest_impl;
124e86372a0SGvozden Neskovic break;
125e86372a0SGvozden Neskovic case IMPL_CYCLE:
126e86372a0SGvozden Neskovic /* Cycle through all supported implementations */
127e86372a0SGvozden Neskovic ASSERT(raidz_math_initialized);
128e86372a0SGvozden Neskovic ASSERT3U(raidz_supp_impl_cnt, >, 0);
129e86372a0SGvozden Neskovic static size_t cycle_impl_idx = 0;
130e86372a0SGvozden Neskovic size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
131e86372a0SGvozden Neskovic ops = raidz_supp_impl[idx];
132e86372a0SGvozden Neskovic break;
133e86372a0SGvozden Neskovic case IMPL_ORIGINAL:
134e86372a0SGvozden Neskovic ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
135e86372a0SGvozden Neskovic break;
136e86372a0SGvozden Neskovic case IMPL_SCALAR:
137e86372a0SGvozden Neskovic ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
138e86372a0SGvozden Neskovic break;
139e86372a0SGvozden Neskovic default:
140e86372a0SGvozden Neskovic ASSERT3U(impl, <, raidz_supp_impl_cnt);
141e86372a0SGvozden Neskovic ASSERT3U(raidz_supp_impl_cnt, >, 0);
142e86372a0SGvozden Neskovic if (impl < ARRAY_SIZE(raidz_all_maths))
143e86372a0SGvozden Neskovic ops = raidz_supp_impl[impl];
144e86372a0SGvozden Neskovic break;
145e86372a0SGvozden Neskovic }
146e86372a0SGvozden Neskovic
147e86372a0SGvozden Neskovic ASSERT3P(ops, !=, NULL);
148e86372a0SGvozden Neskovic
149e86372a0SGvozden Neskovic return (ops);
150e86372a0SGvozden Neskovic }
151e86372a0SGvozden Neskovic
152e86372a0SGvozden Neskovic /*
153e86372a0SGvozden Neskovic * Select parity generation method for raidz_map
154e86372a0SGvozden Neskovic */
155e86372a0SGvozden Neskovic int
vdev_raidz_math_generate(raidz_map_t * rm)156e86372a0SGvozden Neskovic vdev_raidz_math_generate(raidz_map_t *rm)
157e86372a0SGvozden Neskovic {
158e86372a0SGvozden Neskovic raidz_gen_f gen_parity = NULL;
159e86372a0SGvozden Neskovic
160e86372a0SGvozden Neskovic switch (raidz_parity(rm)) {
161e86372a0SGvozden Neskovic case 1:
162e86372a0SGvozden Neskovic gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
163e86372a0SGvozden Neskovic break;
164e86372a0SGvozden Neskovic case 2:
165e86372a0SGvozden Neskovic gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
166e86372a0SGvozden Neskovic break;
167e86372a0SGvozden Neskovic case 3:
168e86372a0SGvozden Neskovic gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
169e86372a0SGvozden Neskovic break;
170e86372a0SGvozden Neskovic default:
171e86372a0SGvozden Neskovic gen_parity = NULL;
172e86372a0SGvozden Neskovic cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
173e86372a0SGvozden Neskovic (uint_t)raidz_parity(rm));
174e86372a0SGvozden Neskovic break;
175e86372a0SGvozden Neskovic }
176e86372a0SGvozden Neskovic
177e86372a0SGvozden Neskovic /* if method is NULL execute the original implementation */
178e86372a0SGvozden Neskovic if (gen_parity == NULL)
179e86372a0SGvozden Neskovic return (RAIDZ_ORIGINAL_IMPL);
180e86372a0SGvozden Neskovic
181e86372a0SGvozden Neskovic gen_parity(rm);
182e86372a0SGvozden Neskovic
183e86372a0SGvozden Neskovic return (0);
184e86372a0SGvozden Neskovic }
185e86372a0SGvozden Neskovic
186e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_p_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)187e86372a0SGvozden Neskovic reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
188e86372a0SGvozden Neskovic const int nbaddata)
189e86372a0SGvozden Neskovic {
190e86372a0SGvozden Neskovic if (nbaddata == 1 && parity_valid[CODE_P]) {
191e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_P]);
192e86372a0SGvozden Neskovic }
193e86372a0SGvozden Neskovic return ((raidz_rec_f) NULL);
194e86372a0SGvozden Neskovic }
195e86372a0SGvozden Neskovic
196e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_pq_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)197e86372a0SGvozden Neskovic reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
198e86372a0SGvozden Neskovic const int nbaddata)
199e86372a0SGvozden Neskovic {
200e86372a0SGvozden Neskovic if (nbaddata == 1) {
201e86372a0SGvozden Neskovic if (parity_valid[CODE_P]) {
202e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_P]);
203e86372a0SGvozden Neskovic } else if (parity_valid[CODE_Q]) {
204e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_Q]);
205e86372a0SGvozden Neskovic }
206e86372a0SGvozden Neskovic } else if (nbaddata == 2 &&
207e86372a0SGvozden Neskovic parity_valid[CODE_P] && parity_valid[CODE_Q]) {
208e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
209e86372a0SGvozden Neskovic }
210e86372a0SGvozden Neskovic return ((raidz_rec_f) NULL);
211e86372a0SGvozden Neskovic }
212e86372a0SGvozden Neskovic
213e86372a0SGvozden Neskovic static raidz_rec_f
reconstruct_fun_pqr_sel(raidz_map_t * rm,const int * parity_valid,const int nbaddata)214e86372a0SGvozden Neskovic reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
215e86372a0SGvozden Neskovic const int nbaddata)
216e86372a0SGvozden Neskovic {
217e86372a0SGvozden Neskovic if (nbaddata == 1) {
218e86372a0SGvozden Neskovic if (parity_valid[CODE_P]) {
219e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_P]);
220e86372a0SGvozden Neskovic } else if (parity_valid[CODE_Q]) {
221e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_Q]);
222e86372a0SGvozden Neskovic } else if (parity_valid[CODE_R]) {
223e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_R]);
224e86372a0SGvozden Neskovic }
225e86372a0SGvozden Neskovic } else if (nbaddata == 2) {
226e86372a0SGvozden Neskovic if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
227e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
228e86372a0SGvozden Neskovic } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
229e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_PR]);
230e86372a0SGvozden Neskovic } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
231e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_QR]);
232e86372a0SGvozden Neskovic }
233e86372a0SGvozden Neskovic } else if (nbaddata == 3 &&
234e86372a0SGvozden Neskovic parity_valid[CODE_P] && parity_valid[CODE_Q] &&
235e86372a0SGvozden Neskovic parity_valid[CODE_R]) {
236e86372a0SGvozden Neskovic return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
237e86372a0SGvozden Neskovic }
238e86372a0SGvozden Neskovic return ((raidz_rec_f) NULL);
239e86372a0SGvozden Neskovic }
240e86372a0SGvozden Neskovic
241e86372a0SGvozden Neskovic /*
242e86372a0SGvozden Neskovic * Select data reconstruction method for raidz_map
243e86372a0SGvozden Neskovic * @parity_valid - Parity validity flag
244e86372a0SGvozden Neskovic * @dt - Failed data index array
245e86372a0SGvozden Neskovic * @nbaddata - Number of failed data columns
246e86372a0SGvozden Neskovic */
247e86372a0SGvozden Neskovic int
vdev_raidz_math_reconstruct(raidz_map_t * rm,const int * parity_valid,const int * dt,const int nbaddata)248e86372a0SGvozden Neskovic vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
249e86372a0SGvozden Neskovic const int *dt, const int nbaddata)
250e86372a0SGvozden Neskovic {
251e86372a0SGvozden Neskovic raidz_rec_f rec_fn = NULL;
252e86372a0SGvozden Neskovic
253e86372a0SGvozden Neskovic switch (raidz_parity(rm)) {
254e86372a0SGvozden Neskovic case PARITY_P:
255e86372a0SGvozden Neskovic rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
256e86372a0SGvozden Neskovic break;
257e86372a0SGvozden Neskovic case PARITY_PQ:
258e86372a0SGvozden Neskovic rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
259e86372a0SGvozden Neskovic break;
260e86372a0SGvozden Neskovic case PARITY_PQR:
261e86372a0SGvozden Neskovic rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
262e86372a0SGvozden Neskovic break;
263e86372a0SGvozden Neskovic default:
264e86372a0SGvozden Neskovic cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
265e86372a0SGvozden Neskovic (uint_t)raidz_parity(rm));
266e86372a0SGvozden Neskovic break;
267e86372a0SGvozden Neskovic }
268e86372a0SGvozden Neskovic
269e86372a0SGvozden Neskovic if (rec_fn == NULL)
270e86372a0SGvozden Neskovic return (RAIDZ_ORIGINAL_IMPL);
271e86372a0SGvozden Neskovic else
272e86372a0SGvozden Neskovic return (rec_fn(rm, dt));
273e86372a0SGvozden Neskovic }
274e86372a0SGvozden Neskovic
275e86372a0SGvozden Neskovic const char *raidz_gen_name[] = {
276e86372a0SGvozden Neskovic "gen_p", "gen_pq", "gen_pqr"
277e86372a0SGvozden Neskovic };
278e86372a0SGvozden Neskovic const char *raidz_rec_name[] = {
279e86372a0SGvozden Neskovic "rec_p", "rec_q", "rec_r",
280e86372a0SGvozden Neskovic "rec_pq", "rec_pr", "rec_qr", "rec_pqr"
281e86372a0SGvozden Neskovic };
282e86372a0SGvozden Neskovic
283e86372a0SGvozden Neskovic #if defined(_KERNEL)
284e86372a0SGvozden Neskovic
285e86372a0SGvozden Neskovic #define BENCH_D_COLS (8ULL)
286e86372a0SGvozden Neskovic #define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
287e86372a0SGvozden Neskovic #define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
288*0886dcadSAndy Fiddaman #define BENCH_NS MSEC2NSEC(1) /* 1ms */
289e86372a0SGvozden Neskovic
290e86372a0SGvozden Neskovic typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
291e86372a0SGvozden Neskovic
292e86372a0SGvozden Neskovic static void
benchmark_gen_impl(raidz_map_t * rm,const int fn)293e86372a0SGvozden Neskovic benchmark_gen_impl(raidz_map_t *rm, const int fn)
294e86372a0SGvozden Neskovic {
295e86372a0SGvozden Neskovic (void) fn;
296e86372a0SGvozden Neskovic vdev_raidz_generate_parity(rm);
297e86372a0SGvozden Neskovic }
298e86372a0SGvozden Neskovic
299e86372a0SGvozden Neskovic static void
benchmark_rec_impl(raidz_map_t * rm,const int fn)300e86372a0SGvozden Neskovic benchmark_rec_impl(raidz_map_t *rm, const int fn)
301e86372a0SGvozden Neskovic {
302e86372a0SGvozden Neskovic static const int rec_tgt[7][3] = {
303e86372a0SGvozden Neskovic {1, 2, 3}, /* rec_p: bad QR & D[0] */
304e86372a0SGvozden Neskovic {0, 2, 3}, /* rec_q: bad PR & D[0] */
305e86372a0SGvozden Neskovic {0, 1, 3}, /* rec_r: bad PQ & D[0] */
306e86372a0SGvozden Neskovic {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
307e86372a0SGvozden Neskovic {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
308e86372a0SGvozden Neskovic {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
309e86372a0SGvozden Neskovic {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
310e86372a0SGvozden Neskovic };
311e86372a0SGvozden Neskovic
312e86372a0SGvozden Neskovic vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
313e86372a0SGvozden Neskovic }
314e86372a0SGvozden Neskovic
315e86372a0SGvozden Neskovic /*
316e86372a0SGvozden Neskovic * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
317e86372a0SGvozden Neskovic * is performed by setting the rm_ops pointer and calling the top level
318e86372a0SGvozden Neskovic * generate/reconstruct methods of bench_rm.
319e86372a0SGvozden Neskovic */
320e86372a0SGvozden Neskovic static void
benchmark_raidz_impl(raidz_map_t * bench_rm,const int fn,benchmark_fn bench_fn)321e86372a0SGvozden Neskovic benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
322e86372a0SGvozden Neskovic {
323e86372a0SGvozden Neskovic uint64_t run_cnt, speed, best_speed = 0;
324e86372a0SGvozden Neskovic hrtime_t t_start, t_diff;
325e86372a0SGvozden Neskovic raidz_impl_ops_t *curr_impl;
326e86372a0SGvozden Neskovic raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
327e86372a0SGvozden Neskovic int impl, i;
328e86372a0SGvozden Neskovic
329e86372a0SGvozden Neskovic for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
330e86372a0SGvozden Neskovic /* set an implementation to benchmark */
331e86372a0SGvozden Neskovic curr_impl = raidz_supp_impl[impl];
332e86372a0SGvozden Neskovic bench_rm->rm_ops = curr_impl;
333e86372a0SGvozden Neskovic
334e86372a0SGvozden Neskovic run_cnt = 0;
335e86372a0SGvozden Neskovic t_start = gethrtime();
336e86372a0SGvozden Neskovic
337e86372a0SGvozden Neskovic do {
338*0886dcadSAndy Fiddaman for (i = 0; i < 5; i++, run_cnt++)
339e86372a0SGvozden Neskovic bench_fn(bench_rm, fn);
340e86372a0SGvozden Neskovic
341e86372a0SGvozden Neskovic t_diff = gethrtime() - t_start;
342e86372a0SGvozden Neskovic } while (t_diff < BENCH_NS);
343e86372a0SGvozden Neskovic
344e86372a0SGvozden Neskovic speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
345e86372a0SGvozden Neskovic speed /= (t_diff * BENCH_COLS);
346e86372a0SGvozden Neskovic
347e86372a0SGvozden Neskovic if (bench_fn == benchmark_gen_impl)
348e86372a0SGvozden Neskovic raidz_impl_kstats[impl].gen[fn] = speed;
349e86372a0SGvozden Neskovic else
350e86372a0SGvozden Neskovic raidz_impl_kstats[impl].rec[fn] = speed;
351e86372a0SGvozden Neskovic
352e86372a0SGvozden Neskovic /* Update fastest implementation method */
353e86372a0SGvozden Neskovic if (speed > best_speed) {
354e86372a0SGvozden Neskovic best_speed = speed;
355e86372a0SGvozden Neskovic
356e86372a0SGvozden Neskovic if (bench_fn == benchmark_gen_impl) {
357e86372a0SGvozden Neskovic fstat->gen[fn] = impl;
358e86372a0SGvozden Neskovic vdev_raidz_fastest_impl.gen[fn] =
359e86372a0SGvozden Neskovic curr_impl->gen[fn];
360e86372a0SGvozden Neskovic } else {
361e86372a0SGvozden Neskovic fstat->rec[fn] = impl;
362e86372a0SGvozden Neskovic vdev_raidz_fastest_impl.rec[fn] =
363e86372a0SGvozden Neskovic curr_impl->rec[fn];
364e86372a0SGvozden Neskovic }
365e86372a0SGvozden Neskovic }
366e86372a0SGvozden Neskovic }
367e86372a0SGvozden Neskovic }
368e86372a0SGvozden Neskovic #endif
369e86372a0SGvozden Neskovic
370e86372a0SGvozden Neskovic /*
371e86372a0SGvozden Neskovic * Initialize and benchmark all supported implementations.
372e86372a0SGvozden Neskovic */
373e86372a0SGvozden Neskovic static void
benchmark_raidz(void)374e86372a0SGvozden Neskovic benchmark_raidz(void)
375e86372a0SGvozden Neskovic {
376e86372a0SGvozden Neskovic raidz_impl_ops_t *curr_impl;
377e86372a0SGvozden Neskovic int i, c;
378e86372a0SGvozden Neskovic
379e86372a0SGvozden Neskovic /* Move supported impl into raidz_supp_impl */
380e86372a0SGvozden Neskovic for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
381e86372a0SGvozden Neskovic curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
382e86372a0SGvozden Neskovic
383e86372a0SGvozden Neskovic if (curr_impl->init)
384e86372a0SGvozden Neskovic curr_impl->init();
385e86372a0SGvozden Neskovic
386e86372a0SGvozden Neskovic if (curr_impl->is_supported())
387e86372a0SGvozden Neskovic raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
388e86372a0SGvozden Neskovic }
389e86372a0SGvozden Neskovic membar_producer(); /* complete raidz_supp_impl[] init */
390e86372a0SGvozden Neskovic raidz_supp_impl_cnt = c; /* number of supported impl */
391e86372a0SGvozden Neskovic
392e86372a0SGvozden Neskovic #if defined(_KERNEL)
393e86372a0SGvozden Neskovic zio_t *bench_zio = NULL;
394e86372a0SGvozden Neskovic raidz_map_t *bench_rm = NULL;
395e86372a0SGvozden Neskovic uint64_t bench_parity;
396e86372a0SGvozden Neskovic
397e86372a0SGvozden Neskovic /* Fake a zio and run the benchmark on a warmed up buffer */
398e86372a0SGvozden Neskovic bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
399e86372a0SGvozden Neskovic bench_zio->io_offset = 0;
400e86372a0SGvozden Neskovic bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
401e86372a0SGvozden Neskovic bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
402e86372a0SGvozden Neskovic memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
403e86372a0SGvozden Neskovic
404e86372a0SGvozden Neskovic /* Benchmark parity generation methods */
405e86372a0SGvozden Neskovic for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
406e86372a0SGvozden Neskovic bench_parity = fn + 1;
407e86372a0SGvozden Neskovic /* New raidz_map is needed for each generate_p/q/r */
408e86372a0SGvozden Neskovic bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
409e86372a0SGvozden Neskovic BENCH_D_COLS + bench_parity, bench_parity);
410e86372a0SGvozden Neskovic
411e86372a0SGvozden Neskovic benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
412e86372a0SGvozden Neskovic
413e86372a0SGvozden Neskovic vdev_raidz_map_free(bench_rm);
414e86372a0SGvozden Neskovic }
415e86372a0SGvozden Neskovic
416e86372a0SGvozden Neskovic /* Benchmark data reconstruction methods */
417e86372a0SGvozden Neskovic bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
418e86372a0SGvozden Neskovic BENCH_COLS, PARITY_PQR);
419e86372a0SGvozden Neskovic
420e86372a0SGvozden Neskovic for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
421e86372a0SGvozden Neskovic benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
422e86372a0SGvozden Neskovic
423e86372a0SGvozden Neskovic vdev_raidz_map_free(bench_rm);
424e86372a0SGvozden Neskovic
425e86372a0SGvozden Neskovic /* cleanup the bench zio */
426e86372a0SGvozden Neskovic abd_free(bench_zio->io_abd);
427e86372a0SGvozden Neskovic kmem_free(bench_zio, sizeof (zio_t));
428e86372a0SGvozden Neskovic #else
429e86372a0SGvozden Neskovic /*
430e86372a0SGvozden Neskovic * Skip the benchmark in user space to avoid impacting libzpool
431e86372a0SGvozden Neskovic * consumers (zdb, zhack, zinject, ztest). The last implementation
432e86372a0SGvozden Neskovic * is assumed to be the fastest and used by default.
433e86372a0SGvozden Neskovic */
434e86372a0SGvozden Neskovic memcpy(&vdev_raidz_fastest_impl,
435e86372a0SGvozden Neskovic raidz_supp_impl[raidz_supp_impl_cnt - 1],
436e86372a0SGvozden Neskovic sizeof (vdev_raidz_fastest_impl));
437e86372a0SGvozden Neskovic strcpy(vdev_raidz_fastest_impl.name, "fastest");
438e86372a0SGvozden Neskovic #endif /* _KERNEL */
439e86372a0SGvozden Neskovic }
440e86372a0SGvozden Neskovic
441e86372a0SGvozden Neskovic void
vdev_raidz_math_init(void)442e86372a0SGvozden Neskovic vdev_raidz_math_init(void)
443e86372a0SGvozden Neskovic {
444e86372a0SGvozden Neskovic /* Determine the fastest available implementation. */
445e86372a0SGvozden Neskovic benchmark_raidz();
446e86372a0SGvozden Neskovic
447e86372a0SGvozden Neskovic /* Finish initialization */
448e86372a0SGvozden Neskovic atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
449e86372a0SGvozden Neskovic raidz_math_initialized = B_TRUE;
450e86372a0SGvozden Neskovic }
451e86372a0SGvozden Neskovic
452e86372a0SGvozden Neskovic void
vdev_raidz_math_fini(void)453e86372a0SGvozden Neskovic vdev_raidz_math_fini(void)
454e86372a0SGvozden Neskovic {
455e86372a0SGvozden Neskovic raidz_impl_ops_t const *curr_impl;
456e86372a0SGvozden Neskovic
457e86372a0SGvozden Neskovic for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
458e86372a0SGvozden Neskovic curr_impl = raidz_all_maths[i];
459e86372a0SGvozden Neskovic if (curr_impl->fini)
460e86372a0SGvozden Neskovic curr_impl->fini();
461e86372a0SGvozden Neskovic }
462e86372a0SGvozden Neskovic }
463e86372a0SGvozden Neskovic
464e86372a0SGvozden Neskovic static const struct {
465e86372a0SGvozden Neskovic char *name;
466e86372a0SGvozden Neskovic uint32_t sel;
467e86372a0SGvozden Neskovic } math_impl_opts[] = {
468e86372a0SGvozden Neskovic { "cycle", IMPL_CYCLE },
469e86372a0SGvozden Neskovic { "fastest", IMPL_FASTEST },
470e86372a0SGvozden Neskovic { "original", IMPL_ORIGINAL },
471e86372a0SGvozden Neskovic { "scalar", IMPL_SCALAR }
472e86372a0SGvozden Neskovic };
473e86372a0SGvozden Neskovic
474e86372a0SGvozden Neskovic /*
475e86372a0SGvozden Neskovic * Function sets desired raidz implementation.
476e86372a0SGvozden Neskovic *
477e86372a0SGvozden Neskovic * If we are called before init(), user preference will be saved in
478e86372a0SGvozden Neskovic * user_sel_impl, and applied in later init() call. This occurs when module
479e86372a0SGvozden Neskovic * parameter is specified on module load. Otherwise, directly update
480e86372a0SGvozden Neskovic * zfs_vdev_raidz_impl.
481e86372a0SGvozden Neskovic *
482e86372a0SGvozden Neskovic * @val Name of raidz implementation to use
483e86372a0SGvozden Neskovic * @param Unused.
484e86372a0SGvozden Neskovic */
485e86372a0SGvozden Neskovic int
vdev_raidz_impl_set(const char * val)486e86372a0SGvozden Neskovic vdev_raidz_impl_set(const char *val)
487e86372a0SGvozden Neskovic {
488*0886dcadSAndy Fiddaman int err = EINVAL;
489e86372a0SGvozden Neskovic char req_name[RAIDZ_IMPL_NAME_MAX];
490e86372a0SGvozden Neskovic uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
491e86372a0SGvozden Neskovic size_t i;
492e86372a0SGvozden Neskovic
493e86372a0SGvozden Neskovic /* sanitize input */
494e86372a0SGvozden Neskovic i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
495e86372a0SGvozden Neskovic if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
496e86372a0SGvozden Neskovic return (err);
497e86372a0SGvozden Neskovic
498e86372a0SGvozden Neskovic strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
499e86372a0SGvozden Neskovic while (i > 0 && !!isspace(req_name[i-1]))
500e86372a0SGvozden Neskovic i--;
501e86372a0SGvozden Neskovic req_name[i] = '\0';
502e86372a0SGvozden Neskovic
503e86372a0SGvozden Neskovic /* Check mandatory options */
504e86372a0SGvozden Neskovic for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
505e86372a0SGvozden Neskovic if (strcmp(req_name, math_impl_opts[i].name) == 0) {
506e86372a0SGvozden Neskovic impl = math_impl_opts[i].sel;
507e86372a0SGvozden Neskovic err = 0;
508e86372a0SGvozden Neskovic break;
509e86372a0SGvozden Neskovic }
510e86372a0SGvozden Neskovic }
511e86372a0SGvozden Neskovic
512e86372a0SGvozden Neskovic /* check all supported impl if init() was already called */
513e86372a0SGvozden Neskovic if (err != 0 && raidz_math_initialized) {
514e86372a0SGvozden Neskovic /* check all supported implementations */
515e86372a0SGvozden Neskovic for (i = 0; i < raidz_supp_impl_cnt; i++) {
516e86372a0SGvozden Neskovic if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
517e86372a0SGvozden Neskovic impl = i;
518e86372a0SGvozden Neskovic err = 0;
519e86372a0SGvozden Neskovic break;
520e86372a0SGvozden Neskovic }
521e86372a0SGvozden Neskovic }
522e86372a0SGvozden Neskovic }
523e86372a0SGvozden Neskovic
524e86372a0SGvozden Neskovic if (err == 0) {
525e86372a0SGvozden Neskovic if (raidz_math_initialized)
526e86372a0SGvozden Neskovic atomic_swap_32(&zfs_vdev_raidz_impl, impl);
527e86372a0SGvozden Neskovic else
528e86372a0SGvozden Neskovic atomic_swap_32(&user_sel_impl, impl);
529e86372a0SGvozden Neskovic }
530e86372a0SGvozden Neskovic
531e86372a0SGvozden Neskovic return (err);
532e86372a0SGvozden Neskovic }
533e86372a0SGvozden Neskovic
534e86372a0SGvozden Neskovic #if defined(_KERNEL) && defined(__linux__)
535e86372a0SGvozden Neskovic
536e86372a0SGvozden Neskovic static int
zfs_vdev_raidz_impl_set(const char * val,zfs_kernel_param_t * kp)537e86372a0SGvozden Neskovic zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
538e86372a0SGvozden Neskovic {
539e86372a0SGvozden Neskovic return (vdev_raidz_impl_set(val));
540e86372a0SGvozden Neskovic }
541e86372a0SGvozden Neskovic
542e86372a0SGvozden Neskovic static int
zfs_vdev_raidz_impl_get(char * buffer,zfs_kernel_param_t * kp)543e86372a0SGvozden Neskovic zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
544e86372a0SGvozden Neskovic {
545e86372a0SGvozden Neskovic int i, cnt = 0;
546e86372a0SGvozden Neskovic char *fmt;
547e86372a0SGvozden Neskovic const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
548e86372a0SGvozden Neskovic
549e86372a0SGvozden Neskovic ASSERT(raidz_math_initialized);
550e86372a0SGvozden Neskovic
551e86372a0SGvozden Neskovic /* list mandatory options */
552e86372a0SGvozden Neskovic for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
553e86372a0SGvozden Neskovic fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
554e86372a0SGvozden Neskovic cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
555e86372a0SGvozden Neskovic }
556e86372a0SGvozden Neskovic
557e86372a0SGvozden Neskovic /* list all supported implementations */
558e86372a0SGvozden Neskovic for (i = 0; i < raidz_supp_impl_cnt; i++) {
559e86372a0SGvozden Neskovic fmt = (i == impl) ? "[%s] " : "%s ";
560e86372a0SGvozden Neskovic cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
561e86372a0SGvozden Neskovic }
562e86372a0SGvozden Neskovic
563e86372a0SGvozden Neskovic return (cnt);
564e86372a0SGvozden Neskovic }
565e86372a0SGvozden Neskovic
566e86372a0SGvozden Neskovic module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
567e86372a0SGvozden Neskovic zfs_vdev_raidz_impl_get, NULL, 0644);
568e86372a0SGvozden Neskovic MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
569e86372a0SGvozden Neskovic #endif
570