1199767fToomas Soome/*
2199767fToomas Soome * CDDL HEADER START
3199767fToomas Soome *
4199767fToomas Soome * The contents of this file are subject to the terms of the
5199767fToomas Soome * Common Development and Distribution License (the "License").
6199767fToomas Soome * You may not use this file except in compliance with the License.
7199767fToomas Soome *
8199767fToomas Soome * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9199767fToomas Soome * or http://www.opensolaris.org/os/licensing.
10199767fToomas Soome * See the License for the specific language governing permissions
11199767fToomas Soome * and limitations under the License.
12199767fToomas Soome *
13199767fToomas Soome * When distributing Covered Code, include this CDDL HEADER in each
14199767fToomas Soome * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15199767fToomas Soome * If applicable, add the following below this CDDL HEADER, with the
16199767fToomas Soome * fields enclosed by brackets "[]" replaced with your own identifying
17199767fToomas Soome * information: Portions Copyright [yyyy] [name of copyright owner]
18199767fToomas Soome *
19199767fToomas Soome * CDDL HEADER END
20199767fToomas Soome */
21199767fToomas Soome/*
22199767fToomas Soome * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23199767fToomas Soome * Use is subject to license terms.
24199767fToomas Soome */
25199767fToomas Soome
26199767fToomas Soome#include <sys/cdefs.h>
2710ae99eToomas Soome#include <lz4.h>
28199767fToomas Soome
29199767fToomas Soomestatic uint64_t zfs_crc64_table[256];
30199767fToomas Soome
31199767fToomas Soome#define	ECKSUM	666
32199767fToomas Soome
33199767fToomas Soome#define	ASSERT3S(x, y, z)	((void)0)
34199767fToomas Soome#define	ASSERT3U(x, y, z)	((void)0)
35199767fToomas Soome#define	ASSERT3P(x, y, z)	((void)0)
36199767fToomas Soome#define	ASSERT0(x)		((void)0)
37199767fToomas Soome#define	ASSERT(x)		((void)0)
38199767fToomas Soome
39199767fToomas Soomestatic void
40199767fToomas Soomezfs_init_crc(void)
41199767fToomas Soome{
42199767fToomas Soome	int i, j;
43199767fToomas Soome	uint64_t *ct;
44199767fToomas Soome
45199767fToomas Soome	/*
46199767fToomas Soome	 * Calculate the crc64 table (used for the zap hash
47199767fToomas Soome	 * function).
48199767fToomas Soome	 */
49199767fToomas Soome	if (zfs_crc64_table[128] != ZFS_CRC64_POLY) {
507bbcfb4Toomas Soome		memset(zfs_crc64_table, 0, sizeof (zfs_crc64_table));
517bbcfb4Toomas Soome		for (i = 0; i < 256; i++) {
527bbcfb4Toomas Soome			ct = zfs_crc64_table + i;
537bbcfb4Toomas Soome			for (*ct = i, j = 8; j > 0; j--)
547bbcfb4Toomas Soome				*ct = (*ct >> 1) ^
557bbcfb4Toomas Soome				    (-(*ct & 1) & ZFS_CRC64_POLY);
567bbcfb4Toomas Soome		}
57199767fToomas Soome	}
58199767fToomas Soome}
59199767fToomas Soome
60199767fToomas Soomestatic void
618eef2abToomas Soomezio_checksum_off(const void *buf __unused, uint64_t size __unused,
628eef2abToomas Soome    const void *ctx_template __unused, zio_cksum_t *zcp)
63199767fToomas Soome{
64199767fToomas Soome	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
65199767fToomas Soome}
66199767fToomas Soome
67199767fToomas Soome/*
68199767fToomas Soome * Signature for checksum functions.
69199767fToomas Soome */
70199767fToomas Soometypedef void zio_checksum_t(const void *data, uint64_t size,
71199767fToomas Soome    const void *ctx_template, zio_cksum_t *zcp);
72199767fToomas Soometypedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt);
73199767fToomas Soometypedef void zio_checksum_tmpl_free_t(void *ctx_template);
74199767fToomas Soome
75199767fToomas Soometypedef enum zio_checksum_flags {
76199767fToomas Soome	/* Strong enough for metadata? */
77199767fToomas Soome	ZCHECKSUM_FLAG_METADATA = (1 << 1),
78199767fToomas Soome	/* ZIO embedded checksum */
79199767fToomas Soome	ZCHECKSUM_FLAG_EMBEDDED = (1 << 2),
80199767fToomas Soome	/* Strong enough for dedup (without verification)? */
81199767fToomas Soome	ZCHECKSUM_FLAG_DEDUP = (1 << 3),
82199767fToomas Soome	/* Uses salt value */
83199767fToomas Soome	ZCHECKSUM_FLAG_SALTED = (1 << 4),
84199767fToomas Soome	/* Strong enough for nopwrite? */
85199767fToomas Soome	ZCHECKSUM_FLAG_NOPWRITE = (1 << 5)
86199767fToomas Soome} zio_checksum_flags_t;
87199767fToomas Soome
88199767fToomas Soome/*
89199767fToomas Soome * Information about each checksum function.
90199767fToomas Soome */
91199767fToomas Soometypedef struct zio_checksum_info {
92199767fToomas Soome	/* checksum function for each byteorder */
93199767fToomas Soome	zio_checksum_t			*ci_func[2];
94199767fToomas Soome	zio_checksum_tmpl_init_t	*ci_tmpl_init;
95199767fToomas Soome	zio_checksum_tmpl_free_t	*ci_tmpl_free;
96199767fToomas Soome	zio_checksum_flags_t		ci_flags;
97199767fToomas Soome	const char			*ci_name;	/* descriptive name */
98199767fToomas Soome} zio_checksum_info_t;
99199767fToomas Soome
100199767fToomas Soome#include "blkptr.c"
101199767fToomas Soome
102199767fToomas Soome#include "fletcher.c"
103199767fToomas Soome#include "sha256.c"
1044a04e8dToomas Soome#include "skein_zfs.c"
1054a04e8dToomas Soome#include "edonr_zfs.c"
106199767fToomas Soome
107199767fToomas Soomestatic zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
108199767fToomas Soome	{{NULL, NULL}, NULL, NULL, 0, "inherit"},
109199767fToomas Soome	{{NULL, NULL}, NULL, NULL, 0, "on"},
110199767fToomas Soome	{{zio_checksum_off,	zio_checksum_off}, NULL, NULL, 0, "off"},
111199767fToomas Soome	{{zio_checksum_SHA256,	zio_checksum_SHA256}, NULL, NULL,
112199767fToomas Soome	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "label"},
113199767fToomas Soome	{{zio_checksum_SHA256,	zio_checksum_SHA256}, NULL, NULL,
114199767fToomas Soome	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "gang_header"},
115199767fToomas Soome	{{fletcher_2_native,	fletcher_2_byteswap}, NULL, NULL,
116199767fToomas Soome	    ZCHECKSUM_FLAG_EMBEDDED, "zilog"},
117199767fToomas Soome	{{fletcher_2_native,	fletcher_2_byteswap}, NULL, NULL,
118199767fToomas Soome	    0, "fletcher2"},
119199767fToomas Soome	{{fletcher_4_native,	fletcher_4_byteswap}, NULL, NULL,
120199767fToomas Soome	    ZCHECKSUM_FLAG_METADATA, "fletcher4"},
121199767fToomas Soome	{{zio_checksum_SHA256,	zio_checksum_SHA256}, NULL, NULL,
122199767fToomas Soome	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
123199767fToomas Soome	    ZCHECKSUM_FLAG_NOPWRITE, "SHA256"},
124199767fToomas Soome	{{fletcher_4_native,	fletcher_4_byteswap}, NULL, NULL,
125199767fToomas Soome	    ZCHECKSUM_FLAG_EMBEDDED, "zillog2"},
126199767fToomas Soome	{{zio_checksum_off,	zio_checksum_off}, NULL, NULL,
127199767fToomas Soome	    0, "noparity"},
128199767fToomas Soome	{{zio_checksum_SHA512_native,	zio_checksum_SHA512_byteswap},
129199767fToomas Soome	    NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
130199767fToomas Soome	    ZCHECKSUM_FLAG_NOPWRITE, "SHA512"},
131199767fToomas Soome	/* no skein and edonr for now */
1324a04e8dToomas Soome	{{zio_checksum_skein_native, zio_checksum_skein_byteswap},
1334a04e8dToomas Soome	    zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free,
1344a04e8dToomas Soome	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
1354a04e8dToomas Soome	    ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"},
1364a04e8dToomas Soome	{{zio_checksum_edonr_native, zio_checksum_edonr_byteswap},
1374a04e8dToomas Soome	    zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free,
1384a04e8dToomas Soome	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
1394a04e8dToomas Soome	    ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
140199767fToomas Soome};
141199767fToomas Soome
142199767fToomas Soome/*
143199767fToomas Soome * Common signature for all zio compress/decompress functions.
144199767fToomas Soome */
145199767fToomas Soometypedef size_t zio_compress_func_t(void *src, void *dst,
146199767fToomas Soome    size_t s_len, size_t d_len, int);
147199767fToomas Soometypedef int zio_decompress_func_t(void *src, void *dst,
148199767fToomas Soome    size_t s_len, size_t d_len, int);
149199767fToomas Soome
150199767fToomas Soomeextern int gzip_decompress(void *src, void *dst,
151199767fToomas Soome    size_t s_len, size_t d_len, int);
152199767fToomas Soome/*
153199767fToomas Soome * Information about each compression function.
154199767fToomas Soome */
155199767fToomas Soometypedef struct zio_compress_info {
156199767fToomas Soome	zio_compress_func_t	*ci_compress;	/* compression function */
157199767fToomas Soome	zio_decompress_func_t	*ci_decompress;	/* decompression function */
158199767fToomas Soome	int			ci_level;	/* level parameter */
159199767fToomas Soome	const char		*ci_name;	/* algorithm name */
160199767fToomas Soome} zio_compress_info_t;
161199767fToomas Soome
162199767fToomas Soome#include "lzjb.c"
163199767fToomas Soome#include "zle.c"
164199767fToomas Soome
165199767fToomas Soome/*
166199767fToomas Soome * Compression vectors.
167199767fToomas Soome */
168199767fToomas Soomestatic zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
169199767fToomas Soome	{NULL,			NULL,			0,	"inherit"},
170199767fToomas Soome	{NULL,			NULL,			0,	"on"},
171199767fToomas Soome	{NULL,			NULL,			0,	"uncompressed"},
172199767fToomas Soome	{NULL,			lzjb_decompress,	0,	"lzjb"},
173199767fToomas Soome	{NULL,			NULL,			0,	"empty"},
174199767fToomas Soome	{NULL,			gzip_decompress,	1,	"gzip-1"},
175199767fToomas Soome	{NULL,			gzip_decompress,	2,	"gzip-2"},
176199767fToomas Soome	{NULL,			gzip_decompress,	3,	"gzip-3"},
177199767fToomas Soome	{NULL,			gzip_decompress,	4,	"gzip-4"},
178199767fToomas Soome	{NULL,			gzip_decompress,	5,	"gzip-5"},
179199767fToomas Soome	{NULL,			gzip_decompress,	6,	"gzip-6"},
180199767fToomas Soome	{NULL,			gzip_decompress,	7,	"gzip-7"},
181199767fToomas Soome	{NULL,			gzip_decompress,	8,	"gzip-8"},
182199767fToomas Soome	{NULL,			gzip_decompress,	9,	"gzip-9"},
183199767fToomas Soome	{NULL,			zle_decompress,		64,	"zle"},
184199767fToomas Soome	{NULL,			lz4_decompress,		0,	"lz4"},
185199767fToomas Soome};
186199767fToomas Soome
187199767fToomas Soomestatic void
188199767fToomas Soomebyteswap_uint64_array(void *vbuf, size_t size)
189199767fToomas Soome{
190199767fToomas Soome	uint64_t *buf = vbuf;
191199767fToomas Soome	size_t count = size >> 3;
192199767fToomas Soome	int i;
193199767fToomas Soome
194199767fToomas Soome	ASSERT((size & 7) == 0);
195199767fToomas Soome
196199767fToomas Soome	for (i = 0; i < count; i++)
197199767fToomas Soome		buf[i] = BSWAP_64(buf[i]);
198199767fToomas Soome}
199199767fToomas Soome
200199767fToomas Soome/*
201199767fToomas Soome * Set the external verifier for a gang block based on <vdev, offset, txg>,
202199767fToomas Soome * a tuple which is guaranteed to be unique for the life of the pool.
203199767fToomas Soome */
204199767fToomas Soomestatic void
205199767fToomas Soomezio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
206199767fToomas Soome{
207199767fToomas Soome	const dva_t *dva = BP_IDENTITY(bp);
208199767fToomas Soome	uint64_t txg = BP_PHYSICAL_BIRTH(bp);
209199767fToomas Soome
210199767fToomas Soome	ASSERT(BP_IS_GANG(bp));
211199767fToomas Soome
212199767fToomas Soome	ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0);
213199767fToomas Soome}
214199767fToomas Soome
215199767fToomas Soome/*
216199767fToomas Soome * Set the external verifier for a label block based on its offset.
217199767fToomas Soome * The vdev is implicit, and the txg is unknowable at pool open time --
218199767fToomas Soome * hence the logic in vdev_uberblock_load() to find the most recent copy.
219199767fToomas Soome */
220199767fToomas Soomestatic void
221199767fToomas Soomezio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
222199767fToomas Soome{
223199767fToomas Soome	ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
224199767fToomas Soome}
225199767fToomas Soome
226199767fToomas Soome/*
227199767fToomas Soome * Calls the template init function of a checksum which supports context
228199767fToomas Soome * templates and installs the template into the spa_t.
229199767fToomas Soome */
230199767fToomas Soomestatic void
2314a04e8dToomas Soomezio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
232199767fToomas Soome{
233199767fToomas Soome	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
234199767fToomas Soome
235199767fToomas Soome	if (ci->ci_tmpl_init == NULL)
236199767fToomas Soome		return;
2374a04e8dToomas Soome
238199767fToomas Soome	if (spa->spa_cksum_tmpls[checksum] != NULL)
239199767fToomas Soome		return;
240199767fToomas Soome
241199767fToomas Soome	if (spa->spa_cksum_tmpls[checksum] == NULL) {
242199767fToomas Soome		spa->spa_cksum_tmpls[checksum] =
243199767fToomas Soome		    ci->ci_tmpl_init(&spa->spa_cksum_salt);
244199767fToomas Soome	}
2454a04e8dToomas Soome}
2464a04e8dToomas Soome
2474a04e8dToomas Soome/*
2484a04e8dToomas Soome * Called by a spa_t that's about to be deallocated. This steps through
2494a04e8dToomas Soome * all of the checksum context templates and deallocates any that were
2504a04e8dToomas Soome * initialized using the algorithm-specific template init function.
2514a04e8dToomas Soome */
2524a04e8dToomas Soomevoid
2534a04e8dToomas Soomezio_checksum_templates_free(spa_t *spa)
2544a04e8dToomas Soome{
2554a04e8dToomas Soome	for (enum zio_checksum checksum = 0;
2564a04e8dToomas Soome	    checksum < ZIO_CHECKSUM_FUNCTIONS; checksum++) {
2574a04e8dToomas Soome		if (spa->spa_cksum_tmpls[checksum] != NULL) {
2584a04e8dToomas Soome			zio_checksum_info_t *ci = &zio_checksum_table[checksum];
2594a04e8dToomas Soome
2604a04e8dToomas Soome			ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]);
2614a04e8dToomas Soome			spa->spa_cksum_tmpls[checksum] = NULL;
2624a04e8dToomas Soome		}
2634a04e8dToomas Soome	}
264199767fToomas Soome}
265199767fToomas Soome
266199767fToomas Soomestatic int
2674a04e8dToomas Soomezio_checksum_verify(const spa_t *spa, const blkptr_t *bp, void *data)
268199767fToomas Soome{
269199767fToomas Soome	uint64_t size;
270199767fToomas Soome	unsigned int checksum;
271199767fToomas Soome	zio_checksum_info_t *ci;
2724a04e8dToomas Soome	void *ctx = NULL;
273199767fToomas Soome	zio_cksum_t actual_cksum, expected_cksum, verifier;
274199767fToomas Soome	int byteswap;
275199767fToomas Soome
276199767fToomas Soome	checksum = BP_GET_CHECKSUM(bp);
277199767fToomas Soome	size = BP_GET_PSIZE(bp);
278199767fToomas Soome
279199767fToomas Soome	if (checksum >= ZIO_CHECKSUM_FUNCTIONS)
280199767fToomas Soome		return (EINVAL);
281199767fToomas Soome	ci = &zio_checksum_table[checksum];
282199767fToomas Soome	if (ci->ci_func[0] == NULL || ci->ci_func[1] == NULL)
283199767fToomas Soome		return (EINVAL);
284199767fToomas Soome
2854a04e8dToomas Soome	if (spa != NULL) {
2867bbcfb4Toomas Soome		zio_checksum_template_init(checksum, (spa_t *)spa);
2874a04e8dToomas Soome		ctx = spa->spa_cksum_tmpls[checksum];
2884a04e8dToomas Soome	}
2894a04e8dToomas Soome
290199767fToomas Soome	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
291199767fToomas Soome		zio_eck_t *eck;
292199767fToomas Soome
293199767fToomas Soome		ASSERT(checksum == ZIO_CHECKSUM_GANG_HEADER ||
294199767fToomas Soome		    checksum == ZIO_CHECKSUM_LABEL);
295199767fToomas Soome
296199767fToomas Soome		eck = (zio_eck_t *)((char *)data + size) - 1;
297199767fToomas Soome
298199767fToomas Soome		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
299199767fToomas Soome			zio_checksum_gang_verifier(&verifier, bp);
300199767fToomas Soome		else if (checksum == ZIO_CHECKSUM_LABEL)
301199767fToomas Soome			zio_checksum_label_verifier(&verifier,
302199767fToomas Soome			    DVA_GET_OFFSET(BP_IDENTITY(bp)));
303199767fToomas Soome		else
304199767fToomas Soome			verifier = bp->blk_cksum;
305199767fToomas Soome
306199767fToomas Soome		byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
307199767fToomas Soome
308199767fToomas Soome		if (byteswap)
309199767fToomas Soome			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
310199767fToomas Soome
311199767fToomas Soome		expected_cksum = eck->zec_cksum;
312199767fToomas Soome		eck->zec_cksum = verifier;
3134a04e8dToomas Soome		ci->ci_func[byteswap](data, size, ctx, &actual_cksum);
314199767fToomas Soome		eck->zec_cksum = expected_cksum;
315199767fToomas Soome
316199767fToomas Soome		if (byteswap)
317199767fToomas Soome			byteswap_uint64_array(&expected_cksum,
318199767fToomas Soome			    sizeof (zio_cksum_t));
319199767fToomas Soome	} else {
320ece0bc8Toomas Soome		byteswap = BP_SHOULD_BYTESWAP(bp);
321199767fToomas Soome		expected_cksum = bp->blk_cksum;
322ece0bc8Toomas Soome		ci->ci_func[byteswap](data, size, ctx, &actual_cksum);
323199767fToomas Soome	}
324199767fToomas Soome
325199767fToomas Soome	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
3264a04e8dToomas Soome		/* printf("ZFS: read checksum %s failed\n", ci->ci_name); */
327199767fToomas Soome		return (EIO);
328199767fToomas Soome	}
329199767fToomas Soome
330199767fToomas Soome	return (0);
331199767fToomas Soome}
332199767fToomas Soome
333199767fToomas Soomestatic int
334199767fToomas Soomezio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
3357bbcfb4Toomas Soome    void *dest, uint64_t destsize)
336199767fToomas Soome{
337199767fToomas Soome	zio_compress_info_t *ci;
338199767fToomas Soome
339199767fToomas Soome	if (cpfunc >= ZIO_COMPRESS_FUNCTIONS) {
340199767fToomas Soome		printf("ZFS: unsupported compression algorithm %u\n", cpfunc);
341199767fToomas Soome		return (EIO);
342199767fToomas Soome	}
343199767fToomas Soome
344199767fToomas Soome	ci = &zio_compress_table[cpfunc];
345199767fToomas Soome	if (!ci->ci_decompress) {
346199767fToomas Soome		printf("ZFS: unsupported compression algorithm %s\n",
347199767fToomas Soome		    ci->ci_name);
348199767fToomas Soome		return (EIO);
349199767fToomas Soome	}
350199767fToomas Soome
351199767fToomas Soome	return (ci->ci_decompress(src, dest, srcsize, destsize, ci->ci_level));
352199767fToomas Soome}
353199767fToomas Soome
354199767fToomas Soomestatic uint64_t
355199767fToomas Soomezap_hash(uint64_t salt, const char *name)
356199767fToomas Soome{
357199767fToomas Soome	const uint8_t *cp;
358199767fToomas Soome	uint8_t c;
359199767fToomas Soome	uint64_t crc = salt;
360199767fToomas Soome
361199767fToomas Soome	ASSERT(crc != 0);
362199767fToomas Soome	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
363199767fToomas Soome	for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
364199767fToomas Soome		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF];
365199767fToomas Soome
366199767fToomas Soome	/*
367199767fToomas Soome	 * Only use 28 bits, since we need 4 bits in the cookie for the
368199767fToomas Soome	 * collision differentiator.  We MUST use the high bits, since
369199767fToomas Soome	 * those are the onces that we first pay attention to when
370199767fToomas Soome	 * chosing the bucket.
371199767fToomas Soome	 */
372199767fToomas Soome	crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
373199767fToomas Soome
374199767fToomas Soome	return (crc);
375199767fToomas Soome}
376199767fToomas Soome
377199767fToomas Soometypedef struct raidz_col {
378199767fToomas Soome	uint64_t rc_devidx;		/* child device index for I/O */
379199767fToomas Soome	uint64_t rc_offset;		/* device offset */
380199767fToomas Soome	uint64_t rc_size;		/* I/O size */
381199767fToomas Soome	void *rc_data;			/* I/O data */
382199767fToomas Soome	int rc_error;			/* I/O error for this device */
383199767fToomas Soome	uint8_t rc_tried;		/* Did we attempt this I/O column? */
384199767fToomas Soome	uint8_t rc_skipped;		/* Did we skip this I/O column? */
385199767fToomas Soome} raidz_col_t;
386199767fToomas Soome
387199767fToomas Soometypedef struct raidz_map {
388199767fToomas Soome	uint64_t rm_cols;		/* Regular column count */
389199767fToomas Soome	uint64_t rm_scols;		/* Count including skipped columns */
390199767fToomas Soome	uint64_t rm_bigcols;		/* Number of oversized columns */
391199767fToomas Soome	uint64_t rm_asize;		/* Actual total I/O size */
392199767fToomas Soome	uint64_t rm_missingdata;	/* Count of missing data devices */
393199767fToomas Soome	uint64_t rm_missingparity;	/* Count of missing parity devices */
394199767fToomas Soome	uint64_t rm_firstdatacol;	/* First data column/parity count */
395199767fToomas Soome	uint64_t rm_nskip;		/* Skipped sectors for padding */
396199767fToomas Soome	uint64_t rm_skipstart;		/* Column index of padding start */
397199767fToomas Soome	uintptr_t rm_reports;		/* # of referencing checksum reports */
398199767fToomas Soome	uint8_t	rm_freed;		/* map no longer has referencing ZIO */
399199767fToomas Soome	uint8_t	rm_ecksuminjected;	/* checksum error was injected */
400199767fToomas Soome	raidz_col_t rm_col[1];		/* Flexible array of I/O columns */
401199767fToomas Soome} raidz_map_t;
402199767fToomas Soome
403199767fToomas Soome#define	VDEV_RAIDZ_P		0
404199767fToomas Soome#define	VDEV_RAIDZ_Q		1
405199767fToomas Soome#define	VDEV_RAIDZ_R		2
406199767fToomas Soome
407199767fToomas Soome#define	VDEV_RAIDZ_MUL_2(x)	(((x) << 1) ^ (((x) & 0x80) ? 0x1d : 0))
408199767fToomas Soome#define	VDEV_RAIDZ_MUL_4(x)	(VDEV_RAIDZ_MUL_2(VDEV_RAIDZ_MUL_2(x)))
409199767fToomas Soome
410199767fToomas Soome/*
411199767fToomas Soome * We provide a mechanism to perform the field multiplication operation on a
412199767fToomas Soome * 64-bit value all at once rather than a byte at a time. This works by
413199767fToomas Soome * creating a mask from the top bit in each byte and using that to
414199767fToomas Soome * conditionally apply the XOR of 0x1d.
415199767fToomas Soome */
416199767fToomas Soome#define	VDEV_RAIDZ_64MUL_2(x, mask) \
417199767fToomas Soome{ \
418199767fToomas Soome	(mask) = (x) & 0x8080808080808080ULL; \
419199767fToomas Soome	(mask) = ((mask) << 1) - ((mask) >> 7); \
420199767fToomas Soome	(x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
421199767fToomas Soome	    ((mask) & 0x1d1d1d1d1d1d1d1dULL); \
422199767fToomas Soome}
423199767fToomas Soome
424199767fToomas Soome#define	VDEV_RAIDZ_64MUL_4(x, mask) \
425199767fToomas Soome{ \
426199767fToomas Soome	VDEV_RAIDZ_64MUL_2((x), mask); \
427199767fToomas Soome	VDEV_RAIDZ_64MUL_2((x), mask); \
428199767fToomas Soome}
429199767fToomas Soome
430199767fToomas Soome/*
431199767fToomas Soome * These two tables represent powers and logs of 2 in the Galois field defined
432199767fToomas Soome * above. These values were computed by repeatedly multiplying by 2 as above.
433199767fToomas Soome */
434199767fToomas Soomestatic const uint8_t vdev_raidz_pow2[256] = {
435199767fToomas Soome	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
436199767fToomas Soome	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
437199767fToomas Soome	0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
438199767fToomas Soome	0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
439199767fToomas Soome	0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
440199767fToomas Soome	0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
441199767fToomas Soome	0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
442199767fToomas Soome	0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
443199767fToomas Soome	0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
444199767fToomas Soome	0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
445199767fToomas Soome	0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
446199767fToomas Soome	0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
447199767fToomas Soome	0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
448199767fToomas Soome	0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
449199767fToomas Soome	0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
450199767fToomas Soome	0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
451199767fToomas Soome	0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
452199767fToomas Soome	0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
453199767fToomas Soome	0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
454199767fToomas Soome	0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
455199767fToomas Soome	0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
456199767fToomas Soome	0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
457199767fToomas Soome	0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
458199767fToomas Soome	0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
459199767fToomas Soome	0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
460199767fToomas Soome	0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
461199767fToomas Soome	0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
462199767fToomas Soome	0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
463199767fToomas Soome	0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
464199767fToomas Soome	0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
465199767fToomas Soome	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
466199767fToomas Soome	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
467199767fToomas Soome};
468199767fToomas Soomestatic const uint8_t vdev_raidz_log2[256] = {
469199767fToomas Soome	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
470199767fToomas Soome	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
471199767fToomas Soome	0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
472199767fToomas Soome	0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
473199767fToomas Soome	0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
474199767fToomas Soome	0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
475199767fToomas Soome	0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
476199767fToomas Soome	0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
477199767fToomas Soome	0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
478199767fToomas Soome	0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
479199767fToomas Soome	0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
480199767fToomas Soome	0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
481199767fToomas Soome	0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
482199767fToomas Soome	0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
483199767fToomas Soome	0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
484199767fToomas Soome	0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
485199767fToomas Soome	0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
486199767fToomas Soome	0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
487199767fToomas Soome	0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
488199767fToomas Soome	0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
489199767fToomas Soome	0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
490199767fToomas Soome	0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
491199767fToomas Soome	0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
492199767fToomas Soome	0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
493199767fToomas Soome	0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
494199767fToomas Soome	0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
495199767fToomas Soome	0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
496199767fToomas Soome	0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
497199767fToomas Soome	0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
498199767fToomas Soome	0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
499199767fToomas Soome	0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
500199767fToomas Soome	0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
501199767fToomas Soome};
502199767fToomas Soome
503199767fToomas Soome/*
504199767fToomas Soome * Multiply a given number by 2 raised to the given power.
505199767fToomas Soome */
506199767fToomas Soomestatic uint8_t
507199767fToomas Soomevdev_raidz_exp2(uint8_t a, int exp)
508199767fToomas Soome{
509199767fToomas Soome	if (a == 0)
510199767fToomas Soome		return (0);
511199767fToomas Soome
512199767fToomas Soome	ASSERT(exp >= 0);
513199767fToomas Soome	ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
514199767fToomas Soome
515199767fToomas Soome	exp += vdev_raidz_log2[a];
516199767fToomas Soome	if (exp > 255)
517199767fToomas Soome		exp -= 255;
518199767fToomas Soome
519199767fToomas Soome	return (vdev_raidz_pow2[exp]);
520199767fToomas Soome}
521199767fToomas Soome
522199767fToomas Soomestatic void
523199767fToomas Soomevdev_raidz_generate_parity_p(raidz_map_t *rm)
524199767fToomas Soome{
525199767fToomas Soome	uint64_t *p, *src, pcount __attribute__((unused)), ccount, i;
526199767fToomas Soome	int c;
527199767fToomas Soome
528199767fToomas Soome	pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
529199767fToomas Soome
530199767fToomas Soome	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
531199767fToomas Soome		src = rm->rm_col[c].rc_data;
532199767fToomas Soome		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
533199767fToomas Soome		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
534199767fToomas Soome
535199767fToomas Soome		if (c == rm->rm_firstdatacol) {
536199767fToomas Soome			ASSERT(ccount == pcount);
537199767fToomas Soome			for (i = 0; i < ccount; i++, src++, p++) {
538199767fToomas Soome				*p = *src;
539199767fToomas Soome			}
540199767fToomas Soome		} else {
541199767fToomas Soome			ASSERT(ccount <= pcount);
542199767fToomas Soome			for (i = 0; i < ccount; i++, src++, p++) {
543199767fToomas Soome				*p ^= *src;
544199767fToomas Soome			}
545199767fToomas Soome		}
546199767fToomas Soome	}
547199767fToomas Soome}
548199767fToomas Soome
549199767fToomas Soomestatic void
550199767fToomas Soomevdev_raidz_generate_parity_pq(raidz_map_t *rm)
551199767fToomas Soome{
552199767fToomas Soome	uint64_t *p, *q, *src, pcnt, ccnt, mask, i;
553199767fToomas Soome	int c;
554199767fToomas Soome
555199767fToomas Soome	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
556199767fToomas Soome	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
557199767fToomas Soome	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
558199767fToomas Soome
559199767fToomas Soome	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
560199767fToomas Soome		src = rm->rm_col[c].rc_data;
561199767fToomas Soome		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
562199767fToomas Soome		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
563199767fToomas Soome
564199767fToomas Soome		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
565199767fToomas Soome
566199767fToomas Soome		if (c == rm->rm_firstdatacol) {
567199767fToomas Soome			ASSERT(ccnt == pcnt || ccnt == 0);
568199767fToomas Soome			for (i = 0; i < ccnt; i++, src++, p++, q++) {
569199767fToomas Soome				*p = *src;
570199767fToomas Soome				*q = *src;
571199767fToomas Soome			}
572199767fToomas Soome			for (; i < pcnt; i++, src++, p++, q++) {
573199767fToomas Soome				*p = 0;
574199767fToomas Soome				*q = 0;
575199767fToomas Soome			}
576199767fToomas Soome		} else {
577199767fToomas Soome			ASSERT(ccnt <= pcnt);
578199767fToomas Soome
579199767fToomas Soome			/*
580199767fToomas Soome			 * Apply the algorithm described above by multiplying
581199767fToomas Soome			 * the previous result and adding in the new value.
582199767fToomas Soome			 */
583199767fToomas Soome			for (i = 0; i < ccnt; i++, src++, p++, q++) {
584199767fToomas Soome				*p ^= *src;
585199767fToomas Soome
586199767fToomas Soome				VDEV_RAIDZ_64MUL_2(*q, mask);
587199767fToomas Soome				*q ^= *src;
588199767fToomas Soome			}
589199767fToomas Soome
590199767fToomas Soome			/*
591199767fToomas Soome			 * Treat short columns as though they are full of 0s.
592199767fToomas Soome			 * Note that there's therefore nothing needed for P.
593199767fToomas Soome			 */
594199767fToomas Soome			for (; i < pcnt; i++, q++) {
595199767fToomas Soome				VDEV_RAIDZ_64MUL_2(*q, mask);
596199767fToomas Soome			}
597199767fToomas Soome		}
598199767fToomas Soome	}
599199767fToomas Soome}
600199767fToomas Soome
601199767fToomas Soomestatic void
602199767fToomas Soomevdev_raidz_generate_parity_pqr(raidz_map_t *rm)
603199767fToomas Soome{
604199767fToomas Soome	uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i;
605199767fToomas Soome	int c;
606199767fToomas Soome
607199767fToomas Soome	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
608199767fToomas Soome	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
609199767fToomas Soome	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
610199767fToomas Soome	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
611199767fToomas Soome	    rm->rm_col[VDEV_RAIDZ_R].rc_size);
612199767fToomas Soome
613199767fToomas Soome	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
614199767fToomas Soome		src = rm->rm_col[c].rc_data;
615199767fToomas Soome		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
616199767fToomas Soome		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
617199767fToomas Soome		r = rm->rm_col[VDEV_RAIDZ_R].rc_data;
618199767fToomas Soome
619199767fToomas Soome		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
620199767fToomas Soome
621199767fToomas Soome		if (c == rm->rm_firstdatacol) {
622199767fToomas Soome			ASSERT(ccnt == pcnt || ccnt == 0);
623199767fToomas Soome			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
624199767fToomas Soome				*p = *src;
625199767fToomas Soome				*q = *src;
626199767fToomas Soome				*r = *src;
627199767fToomas Soome			}
628199767fToomas Soome			for (; i < pcnt; i++, src++, p++, q++, r++) {
629199767fToomas Soome				*p = 0;
630199767fToomas Soome				*q = 0;
631199767fToomas Soome				*r = 0;
632199767fToomas Soome			}
633199767fToomas Soome		} else {
634199767fToomas Soome			ASSERT(ccnt <= pcnt);
635199767fToomas Soome
636199767fToomas Soome			/*
637199767fToomas Soome			 * Apply the algorithm described above by multiplying
638199767fToomas Soome			 * the previous result and adding in the new value.
639199767fToomas Soome			 */
640199767fToomas Soome			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
641199767fToomas Soome				*p ^= *src;
642199767fToomas Soome
643199767fToomas Soome				VDEV_RAIDZ_64MUL_2(*q, mask);
644199767fToomas Soome				*q ^= *src;
645199767fToomas Soome
646199767fToomas Soome				VDEV_RAIDZ_64MUL_4(*r, mask);
647199767fToomas Soome				*r ^= *src;
648199767fToomas Soome			}
649199767fToomas Soome
650199767fToomas Soome			/*
651199767fToomas Soome			 * Treat short columns as though they are full of 0s.
652199767fToomas Soome			 * Note that there's therefore nothing needed for P.
653199767fToomas Soome			 */
654199767fToomas Soome			for (; i < pcnt; i++, q++, r++) {
655199767fToomas Soome				VDEV_RAIDZ_64MUL_2(*q, mask);
656199767fToomas Soome				VDEV_RAIDZ_64MUL_4(*r, mask);
657199767fToomas Soome			}
658199767fToomas Soome		}
659199767fToomas Soome	}
660199767fToomas Soome}
661199767fToomas Soome
662199767fToomas Soome/*
663199767fToomas Soome * Generate RAID parity in the first virtual columns according to the number of
664199767fToomas Soome * parity columns available.
665199767fToomas Soome */
666199767fToomas Soomestatic void
667199767fToomas Soomevdev_raidz_generate_parity(raidz_map_t *rm)
668199767fToomas Soome{
669199767fToomas Soome	switch (rm->rm_firstdatacol) {
670199767fToomas Soome	case 1:
671199767fToomas Soome		vdev_raidz_generate_parity_p(rm);
672199767fToomas Soome		break;
673199767fToomas Soome	case 2:
674199767fToomas Soome		vdev_raidz_generate_parity_pq(rm);
675199767fToomas Soome		break;
676199767fToomas Soome	case 3:
677199767fToomas Soome		vdev_raidz_generate_parity_pqr(rm);
678199767fToomas Soome		break;
679199767fToomas Soome	default:
680199767fToomas Soome		panic("invalid RAID-Z configuration");
681199767fToomas Soome	}
682199767fToomas Soome}
683199767fToomas Soome
684199767fToomas Soome/* BEGIN CSTYLED */
685199767fToomas Soome/*
686199767fToomas Soome * In the general case of reconstruction, we must solve the system of linear
687199767fToomas Soome * equations defined by the coeffecients used to generate parity as well as
688199767fToomas Soome * the contents of the data and parity disks. This can be expressed with
689199767fToomas Soome * vectors for the original data (D) and the actual data (d) and parity (p)
690199767fToomas Soome * and a matrix composed of the identity matrix (I) and a dispersal matrix (V):
691199767fToomas Soome *
692199767fToomas Soome *            __   __                     __     __
693199767fToomas Soome *            |     |         __     __   |  p_0  |
694199767fToomas Soome *            |  V  |         |  D_0  |   | p_m-1 |
695199767fToomas Soome *            |     |    x    |   :   | = |  d_0  |
696199767fToomas Soome *            |  I  |         | D_n-1 |   |   :   |
697199767fToomas Soome *            |     |         ~~     ~~   | d_n-1 |
698199767fToomas Soome *            ~~   ~~                     ~~     ~~
699199767fToomas Soome *
700199767fToomas Soome * I is simply a square identity matrix of size n, and V is a vandermonde
701199767fToomas Soome * matrix defined by the coeffecients we chose for the various parity columns
702199767fToomas Soome * (1, 2, 4). Note that these values were chosen both for simplicity, speedy
703199767fToomas Soome * computation as well as linear separability.
704199767fToomas Soome *
705199767fToomas Soome *      __               __               __     __
706199767fToomas Soome *      |   1   ..  1 1 1 |               |  p_0  |
707199767fToomas Soome *      | 2^n-1 ..  4 2 1 |   __     __   |   :   |
708199767fToomas Soome *      | 4^n-1 .. 16 4 1 |   |  D_0  |   | p_m-1 |
709199767fToomas Soome *      |   1   ..  0 0 0 |   |  D_1  |   |  d_0  |
710199767fToomas Soome *      |   0   ..  0 0 0 | x |  D_2  | = |  d_1  |
711199767fToomas Soome *      |   :       : : : |   |   :   |   |  d_2  |
712199767fToomas Soome *      |   0   ..  1 0 0 |   | D_n-1 |   |   :   |
713199767fToomas Soome *      |   0   ..  0 1 0 |   ~~     ~~   |   :   |
714199767fToomas Soome *      |   0   ..  0 0 1 |               | d_n-1 |
715199767fToomas Soome *      ~~               ~~               ~~     ~~
716199767fToomas Soome *
717199767fToomas Soome * Note that I, V, d, and p are known. To compute D, we must invert the
718199767fToomas Soome * matrix and use the known data and parity values to reconstruct the unknown
719199767fToomas Soome * data values. We begin by removing the rows in V|I and d|p that correspond
720199767fToomas Soome * to failed or missing columns; we then make V|I square (n x n) and d|p
721199767fToomas Soome * sized n by removing rows corresponding to unused parity from the bottom up
722199767fToomas Soome * to generate (V|I)' and (d|p)'. We can then generate the inverse of (V|I)'
723199767fToomas Soome * using Gauss-Jordan elimination. In the example below we use m=3 parity
724199767fToomas Soome * columns, n=8 data columns, with errors in d_1, d_2, and p_1:
725199767fToomas Soome *           __                               __
726199767fToomas Soome *           |  1   1   1   1   1   1   1   1  |
727199767fToomas Soome *           | 128  64  32  16  8   4   2   1  | <-----+-+-- missing disks
728199767fToomas Soome *           |  19 205 116  29  64  16  4   1  |      / /
729199767fToomas Soome *           |  1   0   0   0   0   0   0   0  |     / /
730199767fToomas Soome *           |  0   1   0   0   0   0   0   0  | <--' /
731199767fToomas Soome *  (V|I)  = |  0   0   1   0   0   0   0   0  | <---'
732199767fToomas Soome *           |  0   0   0   1   0   0   0   0  |
733199767fToomas Soome *           |  0   0   0   0   1   0   0   0  |
734199767fToomas Soome *           |  0   0   0   0   0   1   0   0  |
735199767fToomas Soome *           |  0   0   0   0   0   0   1   0  |
736199767fToomas Soome *           |  0   0   0   0   0   0   0   1  |
737199767fToomas Soome *           ~~                               ~~
738199767fToomas Soome *           __                               __
739199767fToomas Soome *           |  1   1   1   1   1   1   1   1  |
740199767fToomas Soome *           | 128  64  32  16  8   4   2   1  |
741199767fToomas Soome *           |  19 205 116  29  64  16  4   1  |
742199767fToomas Soome *           |  1   0   0   0   0   0   0   0  |
743199767fToomas Soome *           |  0   1   0   0   0   0   0   0  |
744199767fToomas Soome *  (V|I)' = |  0   0   1   0   0   0   0   0  |
745199767fToomas Soome *           |  0   0   0   1   0   0   0   0  |
746199767fToomas Soome *           |  0   0   0   0   1   0   0   0  |
747199767fToomas Soome *           |  0   0   0   0   0   1   0   0  |
748199767fToomas Soome *           |  0   0   0   0   0   0   1   0  |
749199767fToomas Soome *           |  0   0   0   0   0   0   0   1  |
750199767fToomas Soome *           ~~                               ~~
751199767fToomas Soome *
752199767fToomas Soome * Here we employ Gauss-Jordan elimination to find the inverse of (V|I)'. We
753199767fToomas Soome * have carefully chosen the seed values 1, 2, and 4 to ensure that this
754199767fToomas Soome * matrix is not singular.
755199767fToomas Soome * __                                                                 __
756199767fToomas Soome * |  1   1   1   1   1   1   1   1     1   0   0   0   0   0   0   0  |
757199767fToomas Soome * |  19 205 116  29  64  16  4   1     0   1   0   0   0   0   0   0  |
758199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
759199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
760199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
761199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
762199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
763199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
764199767fToomas Soome * ~~                                                                 ~~
765199767fToomas Soome * __                                                                 __
766199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
767199767fToomas Soome * |  1   1   1   1   1   1   1   1     1   0   0   0   0   0   0   0  |
768199767fToomas Soome * |  19 205 116  29  64  16  4   1     0   1   0   0   0   0   0   0  |
769199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
770199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
771199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
772199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
773199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
774199767fToomas Soome * ~~                                                                 ~~
775199767fToomas Soome * __                                                                 __
776199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
777199767fToomas Soome * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
778199767fToomas Soome * |  0  205 116  0   0   0   0   0     0   1   19  29  64  16  4   1  |
779199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
780199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
781199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
782199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
783199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
784199767fToomas Soome * ~~                                                                 ~~
785199767fToomas Soome * __                                                                 __
786199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
787199767fToomas Soome * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
788199767fToomas Soome * |  0   0  185  0   0   0   0   0    205  1  222 208 141 221 201 204 |
789199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
790199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
791199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
792199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
793199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
794199767fToomas Soome * ~~                                                                 ~~
795199767fToomas Soome * __                                                                 __
796199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
797199767fToomas Soome * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
798199767fToomas Soome * |  0   0   1   0   0   0   0   0    166 100  4   40 158 168 216 209 |
799199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
800199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
801199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
802199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
803199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
804199767fToomas Soome * ~~                                                                 ~~
805199767fToomas Soome * __                                                                 __
806199767fToomas Soome * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
807199767fToomas Soome * |  0   1   0   0   0   0   0   0    167 100  5   41 159 169 217 208 |
808199767fToomas Soome * |  0   0   1   0   0   0   0   0    166 100  4   40 158 168 216 209 |
809199767fToomas Soome * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
810199767fToomas Soome * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
811199767fToomas Soome * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
812199767fToomas Soome * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
813199767fToomas Soome * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
814199767fToomas Soome * ~~                                                                 ~~
815199767fToomas Soome *                   __                               __
816199767fToomas Soome *                   |  0   0   1   0   0   0   0   0  |
817199767fToomas Soome *                   | 167 100  5   41 159 169 217 208 |
818199767fToomas Soome *                   | 166 100  4   40 158 168 216 209 |
819199767fToomas Soome *       (V|I)'^-1 = |  0   0   0   1   0   0   0   0  |
820199767fToomas Soome *                   |  0   0   0   0   1   0   0   0  |
821199767fToomas Soome *                   |  0   0   0   0   0   1   0   0  |
822199767fToomas Soome *                   |  0   0   0   0   0   0   1   0  |
823199767fToomas Soome *                   |  0   0   0   0   0   0   0   1  |
824199767fToomas Soome *                   ~~                               ~~
825199767fToomas Soome *
826199767fToomas Soome * We can then simply compute D = (V|I)'^-1 x (d|p)' to discover the values
827199767fToomas Soome * of the missing data.
828199767fToomas Soome *
829199767fToomas Soome * As is apparent from the example above, the only non-trivial rows in the
830199767fToomas Soome * inverse matrix correspond to the data disks that we're trying to
831199767fToomas Soome * reconstruct. Indeed, those are the only rows we need as the others would
832199767fToomas Soome * only be useful for reconstructing data known or assumed to be valid. For
833199767fToomas Soome * that reason, we only build the coefficients in the rows that correspond to
834199767fToomas Soome * targeted columns.
835199767fToomas Soome */
836199767fToomas Soome/* END CSTYLED */
837199767fToomas Soome
838199767fToomas Soomestatic void
8398eef2abToomas Soomevdev_raidz_matrix_init(raidz_map_t *rm __unused, int n, int nmap, int *map,
840199767fToomas Soome    uint8_t **rows)
841199767fToomas Soome{
842199767fToomas Soome	int i, j;
843199767fToomas Soome	int pow;
844199767fToomas Soome
845199767fToomas Soome	ASSERT(n == rm->rm_cols - rm->rm_firstdatacol);
846199767fToomas Soome
847199767fToomas Soome	/*
848199767fToomas Soome	 * Fill in the missing rows of interest.
849199767fToomas Soome	 */
850199767fToomas Soome	for (i = 0; i < nmap; i++) {
851199767fToomas Soome		ASSERT3S(0, <=, map[i]);
852199767fToomas Soome		ASSERT3S(map[i], <=, 2);
853199767fToomas Soome
854199767fToomas Soome		pow = map[i] * n;
855199767fToomas Soome		if (pow > 255)
856199767fToomas Soome			pow -= 255;
857199767fToomas Soome		ASSERT(pow <= 255);
858199767fToomas Soome
859199767fToomas Soome		for (j = 0; j < n; j++) {
860199767fToomas Soome			pow -= map[i];
861199767fToomas Soome			if (pow < 0)
862199767fToomas Soome				pow += 255;
863199767fToomas Soome			rows[i][j] = vdev_raidz_pow2[pow];
864199767fToomas Soome		}
865199767fToomas Soome	}
866199767fToomas Soome}
867199767fToomas Soome
868199767fToomas Soomestatic void
869199767fToomas Soomevdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing,
870199767fToomas Soome    uint8_t **rows, uint8_t **invrows, const uint8_t *used)
871199767fToomas Soome{
872199767fToomas Soome	int i, j, ii, jj;
873199767fToomas Soome	uint8_t log;
874199767fToomas Soome
875199767fToomas Soome	/*
876199767fToomas Soome	 * Assert that the first nmissing entries from the array of used
877199767fToomas Soome	 * columns correspond to parity columns and that subsequent entries
878199767fToomas Soome	 * correspond to data columns.
879199767fToomas Soome	 */
880199767fToomas Soome	for (i = 0; i < nmissing; i++) {
881199767fToomas Soome		ASSERT3S(used[i], <, rm->rm_firstdatacol);
882199767fToomas Soome	}
883199767fToomas Soome	for (; i < n; i++) {
884199767fToomas Soome		ASSERT3S(used[i], >=, rm->rm_firstdatacol);
885199767fToomas Soome	}
886199767fToomas Soome
887199767fToomas Soome	/*
888199767fToomas Soome	 * First initialize the storage where we'll compute the inverse rows.
889199767fToomas Soome	 */
890199767fToomas Soome	for (i = 0; i < nmissing; i++) {
891199767fToomas Soome		for (j = 0; j < n; j++) {
892199767fToomas Soome			invrows[i][j] = (i == j) ? 1 : 0;
893199767fToomas Soome		}
894199767fToomas Soome	}
895199767fToomas Soome
896199767fToomas Soome	/*
897199767fToomas Soome	 * Subtract all trivial rows from the rows of consequence.
898199767fToomas Soome	 */
899199767fToomas Soome	for (i = 0; i < nmissing; i++) {
900199767fToomas Soome		for (j = nmissing; j < n; j++) {
901199767fToomas Soome			ASSERT3U(used[j], >=, rm->rm_firstdatacol);
902199767fToomas Soome			jj = used[j] - rm->rm_firstdatacol;
903199767fToomas Soome			ASSERT3S(jj, <, n);
904199767fToomas Soome			invrows[i][j] = rows[i][jj];
905199767fToomas Soome			rows[i][jj] = 0;
906199767fToomas Soome		}
907199767fToomas Soome	}
908199767fToomas Soome
909199767fToomas Soome	/*
910199767fToomas Soome	 * For each of the rows of interest, we must normalize it and subtract
911199767fToomas Soome	 * a multiple of it from the other rows.
912199767fToomas Soome	 */
913199767fToomas Soome	for (i = 0; i < nmissing; i++) {
914199767fToomas Soome		for (j = 0; j < missing[i]; j++) {
915199767fToomas Soome			ASSERT3U(rows[i][j], ==, 0);
916199767fToomas Soome		}
917199767fToomas Soome		ASSERT3U(rows[i][missing[i]], !=, 0);
918199767fToomas Soome
919199767fToomas Soome		/*
920199767fToomas Soome		 * Compute the inverse of the first element and multiply each
921199767fToomas Soome		 * element in the row by that value.
922199767fToomas Soome		 */
923199767fToomas Soome		log = 255 - vdev_raidz_log2[rows[i][missing[i]]];
924199767fToomas Soome
925199767fToomas Soome		for (j = 0; j < n; j++) {
926199767fToomas Soome			rows[i][j] = vdev_raidz_exp2(rows[i][j], log);
927199767fToomas Soome			invrows[i][j] = vdev_raidz_exp2(invrows[i][j], log);
928199767fToomas Soome		}
929199767fToomas Soome
930199767fToomas Soome		for (ii = 0; ii < nmissing; ii++) {
931199767fToomas Soome			if (i == ii)
932199767fToomas Soome				continue;
933199767fToomas Soome
934199767fToomas Soome			ASSERT3U(rows[ii][missing[i]], !=, 0);
935199767fToomas Soome
936199767fToomas Soome			log = vdev_raidz_log2[rows[ii][missing[i]]];
937199767fToomas Soome
938199767fToomas Soome			for (j = 0; j < n; j++) {
939199767fToomas Soome				rows[ii][j] ^=
940199767fToomas Soome				    vdev_raidz_exp2(rows[i][j], log);
941199767fToomas Soome				invrows[ii][j] ^=
942199767fToomas Soome				    vdev_raidz_exp2(invrows[i][j], log);
943199767fToomas Soome			}
944199767fToomas Soome		}
945199767fToomas Soome	}
946199767fToomas Soome
947199767fToomas Soome	/*
948199767fToomas Soome	 * Verify that the data that is left in the rows are properly part of
949199767fToomas Soome	 * an identity matrix.
950199767fToomas Soome	 */
951199767fToomas Soome	for (i = 0; i < nmissing; i++) {
952199767fToomas Soome		for (j = 0; j < n; j++) {
953199767fToomas Soome			if (j == missing[i]) {
954199767fToomas Soome				ASSERT3U(rows[i][j], ==, 1);
955199767fToomas Soome			} else {
956199767fToomas Soome				ASSERT3U(rows[i][j], ==, 0);
957199767fToomas Soome			}
958199767fToomas Soome		}
959199767fToomas Soome	}
960199767fToomas Soome}
961199767fToomas Soome
962199767fToomas Soomestatic void
963199767fToomas Soomevdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
964199767fToomas Soome    int *missing, uint8_t **invrows, const uint8_t *used)
965199767fToomas Soome{
966199767fToomas Soome	int i, j, x, cc, c;
967199767fToomas Soome	uint8_t *src;
968199767fToomas Soome	uint64_t ccount;
969199767fToomas Soome	uint8_t *dst[VDEV_RAIDZ_MAXPARITY];
970199767fToomas Soome	uint64_t dcount[VDEV_RAIDZ_MAXPARITY];
971199767fToomas Soome	uint8_t log, val;
972199767fToomas Soome	int ll;
973199767fToomas Soome	uint8_t *invlog[VDEV_RAIDZ_MAXPARITY];
974199767fToomas Soome	uint8_t *p, *pp;
975199767fToomas Soome	size_t psize;
976199767fToomas Soome
977199767fToomas Soome	log = 0;	/* gcc */
978199767fToomas Soome	psize = sizeof (invlog[0][0]) * n * nmissing;
9793e8c7f1Toomas Soome	p = malloc(psize);
9803e8c7f1Toomas Soome	if (p == NULL) {
9813e8c7f1Toomas Soome		printf("Out of memory\n");
9823e8c7f1Toomas Soome		return;
9833e8c7f1Toomas Soome	}
984199767fToomas Soome
985199767fToomas Soome	for (pp = p, i = 0; i < nmissing; i++) {
986199767fToomas Soome		invlog[i] = pp;
987199767fToomas Soome		pp += n;
988199767fToomas Soome	}
989199767fToomas Soome
990199767fToomas Soome	for (i = 0; i < nmissing; i++) {
991199767fToomas Soome		for (j = 0; j < n; j++) {
992199767fToomas Soome			ASSERT3U(invrows[i][j], !=, 0);
993199767fToomas Soome			invlog[i][j] = vdev_raidz_log2[invrows[i][j]];
994199767fToomas Soome		}
995199767fToomas Soome	}
996199767fToomas Soome
997199767fToomas Soome	for (i = 0; i < n; i++) {
998199767fToomas Soome		c = used[i];
999199767fToomas Soome		ASSERT3U(c, <, rm->rm_cols);
1000199767fToomas Soome
1001199767fToomas Soome		src = rm->rm_col[c].rc_data;
1002199767fToomas Soome		ccount = rm->rm_col[c].rc_size;
1003199767fToomas Soome		for (j = 0; j < nmissing; j++) {
1004199767fToomas Soome			cc = missing[j] + rm->rm_firstdatacol;
1005199767fToomas Soome			ASSERT3U(cc, >=, rm->rm_firstdatacol);
1006199767fToomas Soome			ASSERT3U(cc, <, rm->rm_cols);
1007199767fToomas Soome			ASSERT3U(cc, !=, c);
1008199767fToomas Soome
1009199767fToomas Soome			dst[j] = rm->rm_col[cc].rc_data;
1010199767fToomas Soome			dcount[j] = rm->rm_col[cc].rc_size;
1011199767fToomas Soome		}
1012199767fToomas Soome
1013199767fToomas Soome		ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0);
1014199767fToomas Soome
1015199767fToomas Soome		for (x = 0; x < ccount; x++, src++) {
1016199767fToomas Soome			if (*src != 0)
1017199767fToomas Soome				log = vdev_raidz_log2[*src];
1018199767fToomas Soome
1019199767fToomas Soome			for (cc = 0; cc < nmissing; cc++) {
1020199767fToomas Soome				if (x >= dcount[cc])
1021199767fToomas Soome					continue;
1022199767fToomas Soome
1023199767fToomas Soome				if (*src == 0) {
1024199767fToomas Soome					val = 0;
1025199767fToomas Soome				} else {
1026199767fToomas Soome					if ((ll = log + invlog[cc][i]) >= 255)
1027