1c023f651SToomas Soome /*
2199767f8SToomas Soome * Copyright (c) 2007 Doug Rabson
3199767f8SToomas Soome * All rights reserved.
4199767f8SToomas Soome *
5199767f8SToomas Soome * Redistribution and use in source and binary forms, with or without
6199767f8SToomas Soome * modification, are permitted provided that the following conditions
7199767f8SToomas Soome * are met:
8199767f8SToomas Soome * 1. Redistributions of source code must retain the above copyright
9199767f8SToomas Soome * notice, this list of conditions and the following disclaimer.
10199767f8SToomas Soome * 2. Redistributions in binary form must reproduce the above copyright
11199767f8SToomas Soome * notice, this list of conditions and the following disclaimer in the
12199767f8SToomas Soome * documentation and/or other materials provided with the distribution.
13199767f8SToomas Soome *
14199767f8SToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15199767f8SToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16199767f8SToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17199767f8SToomas Soome * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18199767f8SToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19199767f8SToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20199767f8SToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21199767f8SToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22199767f8SToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23199767f8SToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24199767f8SToomas Soome * SUCH DAMAGE.
25199767f8SToomas Soome */
26199767f8SToomas Soome
27199767f8SToomas Soome #include <sys/cdefs.h>
28199767f8SToomas Soome
29199767f8SToomas Soome /*
30199767f8SToomas Soome * Stand-alone ZFS file reader.
31199767f8SToomas Soome */
32199767f8SToomas Soome
33b713c91eSToomas Soome #include <stdbool.h>
3413a6e30dSToomas Soome #include <sys/endian.h>
35199767f8SToomas Soome #include <sys/stat.h>
36199767f8SToomas Soome #include <sys/stdint.h>
37c023f651SToomas Soome #include <sys/list.h>
38b713c91eSToomas Soome #include <sys/zfs_bootenv.h>
39c023f651SToomas Soome #include <inttypes.h>
40199767f8SToomas Soome
41199767f8SToomas Soome #include "zfsimpl.h"
42199767f8SToomas Soome #include "zfssubr.c"
43199767f8SToomas Soome
44199767f8SToomas Soome
45199767f8SToomas Soome struct zfsmount {
46199767f8SToomas Soome const spa_t *spa;
47199767f8SToomas Soome objset_phys_t objset;
48199767f8SToomas Soome uint64_t rootobj;
49199767f8SToomas Soome };
50199767f8SToomas Soome
51c023f651SToomas Soome /*
52c023f651SToomas Soome * The indirect_child_t represents the vdev that we will read from, when we
53c023f651SToomas Soome * need to read all copies of the data (e.g. for scrub or reconstruction).
54c023f651SToomas Soome * For plain (non-mirror) top-level vdevs (i.e. is_vdev is not a mirror),
55c023f651SToomas Soome * ic_vdev is the same as is_vdev. However, for mirror top-level vdevs,
56c023f651SToomas Soome * ic_vdev is a child of the mirror.
57c023f651SToomas Soome */
58c023f651SToomas Soome typedef struct indirect_child {
59c023f651SToomas Soome void *ic_data;
60c023f651SToomas Soome vdev_t *ic_vdev;
61c023f651SToomas Soome } indirect_child_t;
62c023f651SToomas Soome
63c023f651SToomas Soome /*
64c023f651SToomas Soome * The indirect_split_t represents one mapped segment of an i/o to the
65c023f651SToomas Soome * indirect vdev. For non-split (contiguously-mapped) blocks, there will be
66c023f651SToomas Soome * only one indirect_split_t, with is_split_offset==0 and is_size==io_size.
67c023f651SToomas Soome * For split blocks, there will be several of these.
68c023f651SToomas Soome */
69c023f651SToomas Soome typedef struct indirect_split {
70c023f651SToomas Soome list_node_t is_node; /* link on iv_splits */
71c023f651SToomas Soome
72c023f651SToomas Soome /*
73c023f651SToomas Soome * is_split_offset is the offset into the i/o.
74c023f651SToomas Soome * This is the sum of the previous splits' is_size's.
75c023f651SToomas Soome */
76c023f651SToomas Soome uint64_t is_split_offset;
77c023f651SToomas Soome
78c023f651SToomas Soome vdev_t *is_vdev; /* top-level vdev */
79c023f651SToomas Soome uint64_t is_target_offset; /* offset on is_vdev */
80c023f651SToomas Soome uint64_t is_size;
81c023f651SToomas Soome int is_children; /* number of entries in is_child[] */
82c023f651SToomas Soome
83c023f651SToomas Soome /*
84c023f651SToomas Soome * is_good_child is the child that we are currently using to
85c023f651SToomas Soome * attempt reconstruction.
86c023f651SToomas Soome */
87c023f651SToomas Soome int is_good_child;
88c023f651SToomas Soome
89c023f651SToomas Soome indirect_child_t is_child[1]; /* variable-length */
90c023f651SToomas Soome } indirect_split_t;
91c023f651SToomas Soome
92c023f651SToomas Soome /*
93c023f651SToomas Soome * The indirect_vsd_t is associated with each i/o to the indirect vdev.
94c023f651SToomas Soome * It is the "Vdev-Specific Data" in the zio_t's io_vsd.
95c023f651SToomas Soome */
96c023f651SToomas Soome typedef struct indirect_vsd {
97c023f651SToomas Soome boolean_t iv_split_block;
98c023f651SToomas Soome boolean_t iv_reconstruct;
99c023f651SToomas Soome
100c023f651SToomas Soome list_t iv_splits; /* list of indirect_split_t's */
101c023f651SToomas Soome } indirect_vsd_t;
102c023f651SToomas Soome
103199767f8SToomas Soome /*
104199767f8SToomas Soome * List of all vdevs, chained through v_alllink.
105199767f8SToomas Soome */
106199767f8SToomas Soome static vdev_list_t zfs_vdevs;
107199767f8SToomas Soome
1086fd7fa35SToomas Soome /*
109199767f8SToomas Soome * List of ZFS features supported for read
110199767f8SToomas Soome */
111199767f8SToomas Soome static const char *features_for_read[] = {
112199767f8SToomas Soome "org.illumos:lz4_compress",
113199767f8SToomas Soome "com.delphix:hole_birth",
114199767f8SToomas Soome "com.delphix:extensible_dataset",
115199767f8SToomas Soome "com.delphix:embedded_data",
116199767f8SToomas Soome "org.open-zfs:large_blocks",
117199767f8SToomas Soome "org.illumos:sha512",
1184a04e8dbSToomas Soome "org.illumos:skein",
1194a04e8dbSToomas Soome "org.illumos:edonr",
120f905073dSToomas Soome "org.zfsonlinux:large_dnode",
1216f8e6e5eSAlex Wilson "com.joyent:multi_vdev_crash_dump",
12242b4b09eSAndy Fiddaman "com.delphix:spacemap_histogram",
12342b4b09eSAndy Fiddaman "com.delphix:zpool_checkpoint",
12442b4b09eSAndy Fiddaman "com.delphix:spacemap_v2",
12542b4b09eSAndy Fiddaman "com.datto:encryption",
12642b4b09eSAndy Fiddaman "com.datto:bookmark_v2",
12742b4b09eSAndy Fiddaman "org.zfsonlinux:allocation_classes",
12842b4b09eSAndy Fiddaman "com.datto:resilver_defer",
129c023f651SToomas Soome "com.delphix:device_removal",
130c023f651SToomas Soome "com.delphix:obsolete_counts",
131199767f8SToomas Soome NULL
132199767f8SToomas Soome };
133199767f8SToomas Soome
134199767f8SToomas Soome /*
135199767f8SToomas Soome * List of all pools, chained through spa_link.
136199767f8SToomas Soome */
137199767f8SToomas Soome static spa_list_t zfs_pools;
138199767f8SToomas Soome
139edb35047SToomas Soome static const dnode_phys_t *dnode_cache_obj;
140199767f8SToomas Soome static uint64_t dnode_cache_bn;
141199767f8SToomas Soome static char *dnode_cache_buf;
142199767f8SToomas Soome
143199767f8SToomas Soome static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf);
144199767f8SToomas Soome static int zfs_get_root(const spa_t *spa, uint64_t *objid);
145199767f8SToomas Soome static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result);
1464a04e8dbSToomas Soome static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode,
1474a04e8dbSToomas Soome const char *name, uint64_t integer_size, uint64_t num_integers,
1484a04e8dbSToomas Soome void *value);
149c023f651SToomas Soome static int objset_get_dnode(const spa_t *, const objset_phys_t *, uint64_t,
150c023f651SToomas Soome dnode_phys_t *);
151c023f651SToomas Soome static int dnode_read(const spa_t *, const dnode_phys_t *, off_t, void *,
152c023f651SToomas Soome size_t);
153c023f651SToomas Soome static int vdev_indirect_read(vdev_t *, const blkptr_t *, void *, off_t,
154c023f651SToomas Soome size_t);
155c023f651SToomas Soome static int vdev_mirror_read(vdev_t *, const blkptr_t *, void *, off_t,
156c023f651SToomas Soome size_t);
157199767f8SToomas Soome
158199767f8SToomas Soome static void
zfs_init(void)159199767f8SToomas Soome zfs_init(void)
160199767f8SToomas Soome {
161199767f8SToomas Soome STAILQ_INIT(&zfs_vdevs);
162199767f8SToomas Soome STAILQ_INIT(&zfs_pools);
163199767f8SToomas Soome
164199767f8SToomas Soome dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
165199767f8SToomas Soome
166199767f8SToomas Soome zfs_init_crc();
167199767f8SToomas Soome }
168199767f8SToomas Soome
169199767f8SToomas Soome static int
nvlist_check_features_for_read(nvlist_t * nvl)170b713c91eSToomas Soome nvlist_check_features_for_read(nvlist_t *nvl)
171199767f8SToomas Soome {
172b713c91eSToomas Soome nvlist_t *features = NULL;
173b713c91eSToomas Soome nvs_data_t *data;
174b713c91eSToomas Soome nvp_header_t *nvp;
175b713c91eSToomas Soome nv_string_t *nvp_name;
176199767f8SToomas Soome int rc;
177199767f8SToomas Soome
178b4adc50cSToomas Soome /*
179b4adc50cSToomas Soome * We may have all features disabled.
180b4adc50cSToomas Soome */
181b713c91eSToomas Soome rc = nvlist_find(nvl, ZPOOL_CONFIG_FEATURES_FOR_READ,
182b713c91eSToomas Soome DATA_TYPE_NVLIST, NULL, &features, NULL);
183b4adc50cSToomas Soome switch (rc) {
184b4adc50cSToomas Soome case 0:
185b4adc50cSToomas Soome break; /* Continue with checks */
186b4adc50cSToomas Soome
187b4adc50cSToomas Soome case ENOENT:
188b4adc50cSToomas Soome return (0); /* All features are disabled */
189b4adc50cSToomas Soome
190b4adc50cSToomas Soome default:
191b4adc50cSToomas Soome return (rc); /* Error while reading nvlist */
192b4adc50cSToomas Soome }
193199767f8SToomas Soome
194b713c91eSToomas Soome data = (nvs_data_t *)features->nv_data;
195b713c91eSToomas Soome nvp = &data->nvl_pair; /* first pair in nvlist */
196199767f8SToomas Soome
197b713c91eSToomas Soome while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
198199767f8SToomas Soome int i, found;
199199767f8SToomas Soome
200b713c91eSToomas Soome nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof (*nvp));
201199767f8SToomas Soome found = 0;
202199767f8SToomas Soome
203199767f8SToomas Soome for (i = 0; features_for_read[i] != NULL; i++) {
204b713c91eSToomas Soome if (memcmp(nvp_name->nv_data, features_for_read[i],
205b713c91eSToomas Soome nvp_name->nv_size) == 0) {
206199767f8SToomas Soome found = 1;
207199767f8SToomas Soome break;
208199767f8SToomas Soome }
209199767f8SToomas Soome }
210199767f8SToomas Soome
211199767f8SToomas Soome if (!found) {
212b713c91eSToomas Soome printf("ZFS: unsupported feature: %.*s\n",
213b713c91eSToomas Soome nvp_name->nv_size, nvp_name->nv_data);
214199767f8SToomas Soome rc = EIO;
215199767f8SToomas Soome }
216b713c91eSToomas Soome nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size);
217199767f8SToomas Soome }
218b713c91eSToomas Soome nvlist_destroy(features);
219199767f8SToomas Soome
220199767f8SToomas Soome return (rc);
221199767f8SToomas Soome }
222199767f8SToomas Soome
223199767f8SToomas Soome static int
vdev_read_phys(vdev_t * vdev,const blkptr_t * bp,void * buf,off_t offset,size_t size)224199767f8SToomas Soome vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
225199767f8SToomas Soome off_t offset, size_t size)
226199767f8SToomas Soome {
227199767f8SToomas Soome size_t psize;
228199767f8SToomas Soome int rc;
229199767f8SToomas Soome
230b713c91eSToomas Soome if (vdev->v_phys_read == NULL)
231b713c91eSToomas Soome return (ENOTSUP);
232199767f8SToomas Soome
233199767f8SToomas Soome if (bp) {
234199767f8SToomas Soome psize = BP_GET_PSIZE(bp);
235199767f8SToomas Soome } else {
236199767f8SToomas Soome psize = size;
237199767f8SToomas Soome }
238199767f8SToomas Soome
239b713c91eSToomas Soome rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize);
240da9bf005SToomas Soome if (rc == 0) {
241da9bf005SToomas Soome if (bp != NULL)
242da9bf005SToomas Soome rc = zio_checksum_verify(vdev->v_spa, bp, buf);
243da9bf005SToomas Soome }
244199767f8SToomas Soome
245da9bf005SToomas Soome return (rc);
246199767f8SToomas Soome }
247199767f8SToomas Soome
248b713c91eSToomas Soome static int
vdev_write_phys(vdev_t * vdev,void * buf,off_t offset,size_t size)249b713c91eSToomas Soome vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size)
250b713c91eSToomas Soome {
251b713c91eSToomas Soome if (vdev->v_phys_write == NULL)
252b713c91eSToomas Soome return (ENOTSUP);
253b713c91eSToomas Soome
254b713c91eSToomas Soome return (vdev->v_phys_write(vdev, offset, buf, size));
255b713c91eSToomas Soome }
256b713c91eSToomas Soome
257c023f651SToomas Soome typedef struct remap_segment {
258c023f651SToomas Soome vdev_t *rs_vd;
259c023f651SToomas Soome uint64_t rs_offset;
260c023f651SToomas Soome uint64_t rs_asize;
261c023f651SToomas Soome uint64_t rs_split_offset;
262c023f651SToomas Soome list_node_t rs_node;
263c023f651SToomas Soome } remap_segment_t;
264c023f651SToomas Soome
265c023f651SToomas Soome static remap_segment_t *
rs_alloc(vdev_t * vd,uint64_t offset,uint64_t asize,uint64_t split_offset)266c023f651SToomas Soome rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset)
267c023f651SToomas Soome {
268c023f651SToomas Soome remap_segment_t *rs = malloc(sizeof (remap_segment_t));
269c023f651SToomas Soome
270c023f651SToomas Soome if (rs != NULL) {
271c023f651SToomas Soome rs->rs_vd = vd;
272c023f651SToomas Soome rs->rs_offset = offset;
273c023f651SToomas Soome rs->rs_asize = asize;
274c023f651SToomas Soome rs->rs_split_offset = split_offset;
275c023f651SToomas Soome }
276c023f651SToomas Soome
277c023f651SToomas Soome return (rs);
278c023f651SToomas Soome }
279c023f651SToomas Soome
280c023f651SToomas Soome vdev_indirect_mapping_t *
vdev_indirect_mapping_open(spa_t * spa,objset_phys_t * os,uint64_t mapping_object)281c023f651SToomas Soome vdev_indirect_mapping_open(spa_t *spa, objset_phys_t *os,
282c023f651SToomas Soome uint64_t mapping_object)
283c023f651SToomas Soome {
284c023f651SToomas Soome vdev_indirect_mapping_t *vim;
285c023f651SToomas Soome vdev_indirect_mapping_phys_t *vim_phys;
286c023f651SToomas Soome int rc;
287c023f651SToomas Soome
288c023f651SToomas Soome vim = calloc(1, sizeof (*vim));
289c023f651SToomas Soome if (vim == NULL)
290c023f651SToomas Soome return (NULL);
291c023f651SToomas Soome
292c023f651SToomas Soome vim->vim_dn = calloc(1, sizeof (*vim->vim_dn));
293c023f651SToomas Soome if (vim->vim_dn == NULL) {
294c023f651SToomas Soome free(vim);
295c023f651SToomas Soome return (NULL);
296c023f651SToomas Soome }
297c023f651SToomas Soome
298c023f651SToomas Soome rc = objset_get_dnode(spa, os, mapping_object, vim->vim_dn);
299c023f651SToomas Soome if (rc != 0) {
300c023f651SToomas Soome free(vim->vim_dn);
301c023f651SToomas Soome free(vim);
302c023f651SToomas Soome return (NULL);
303c023f651SToomas Soome }
304c023f651SToomas Soome
305c023f651SToomas Soome vim->vim_spa = spa;
306c023f651SToomas Soome vim->vim_phys = malloc(sizeof (*vim->vim_phys));
307c023f651SToomas Soome if (vim->vim_phys == NULL) {
308c023f651SToomas Soome free(vim->vim_dn);
309c023f651SToomas Soome free(vim);
310c023f651SToomas Soome return (NULL);
311c023f651SToomas Soome }
312c023f651SToomas Soome
313c023f651SToomas Soome vim_phys = (vdev_indirect_mapping_phys_t *)DN_BONUS(vim->vim_dn);
314c023f651SToomas Soome *vim->vim_phys = *vim_phys;
315c023f651SToomas Soome
316c023f651SToomas Soome vim->vim_objset = os;
317c023f651SToomas Soome vim->vim_object = mapping_object;
318c023f651SToomas Soome vim->vim_entries = NULL;
319c023f651SToomas Soome
320c023f651SToomas Soome vim->vim_havecounts =
321c023f651SToomas Soome (vim->vim_dn->dn_bonuslen > VDEV_INDIRECT_MAPPING_SIZE_V0);
322c023f651SToomas Soome
323c023f651SToomas Soome return (vim);
324c023f651SToomas Soome }
325c023f651SToomas Soome
326c023f651SToomas Soome /*
327c023f651SToomas Soome * Compare an offset with an indirect mapping entry; there are three
328c023f651SToomas Soome * possible scenarios:
329c023f651SToomas Soome *
330c023f651SToomas Soome * 1. The offset is "less than" the mapping entry; meaning the
331c023f651SToomas Soome * offset is less than the source offset of the mapping entry. In
332c023f651SToomas Soome * this case, there is no overlap between the offset and the
333c023f651SToomas Soome * mapping entry and -1 will be returned.
334c023f651SToomas Soome *
335c023f651SToomas Soome * 2. The offset is "greater than" the mapping entry; meaning the
336c023f651SToomas Soome * offset is greater than the mapping entry's source offset plus
337c023f651SToomas Soome * the entry's size. In this case, there is no overlap between
338c023f651SToomas Soome * the offset and the mapping entry and 1 will be returned.
339c023f651SToomas Soome *
340c023f651SToomas Soome * NOTE: If the offset is actually equal to the entry's offset
341c023f651SToomas Soome * plus size, this is considered to be "greater" than the entry,
342c023f651SToomas Soome * and this case applies (i.e. 1 will be returned). Thus, the
343c023f651SToomas Soome * entry's "range" can be considered to be inclusive at its
344c023f651SToomas Soome * start, but exclusive at its end: e.g. [src, src + size).
345c023f651SToomas Soome *
346c023f651SToomas Soome * 3. The last case to consider is if the offset actually falls
347c023f651SToomas Soome * within the mapping entry's range. If this is the case, the
348c023f651SToomas Soome * offset is considered to be "equal to" the mapping entry and
349c023f651SToomas Soome * 0 will be returned.
350c023f651SToomas Soome *
351c023f651SToomas Soome * NOTE: If the offset is equal to the entry's source offset,
352c023f651SToomas Soome * this case applies and 0 will be returned. If the offset is
353c023f651SToomas Soome * equal to the entry's source plus its size, this case does
354c023f651SToomas Soome * *not* apply (see "NOTE" above for scenario 2), and 1 will be
355c023f651SToomas Soome * returned.
356c023f651SToomas Soome */
357c023f651SToomas Soome static int
dva_mapping_overlap_compare(const void * v_key,const void * v_array_elem)358c023f651SToomas Soome dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem)
359c023f651SToomas Soome {
360c023f651SToomas Soome const uint64_t *key = v_key;
361c023f651SToomas Soome const vdev_indirect_mapping_entry_phys_t *array_elem =
362c023f651SToomas Soome v_array_elem;
363c023f651SToomas Soome uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem);
364c023f651SToomas Soome
365c023f651SToomas Soome if (*key < src_offset) {
366c023f651SToomas Soome return (-1);
367c023f651SToomas Soome } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) {
368c023f651SToomas Soome return (0);
369c023f651SToomas Soome } else {
370c023f651SToomas Soome return (1);
371c023f651SToomas Soome }
372c023f651SToomas Soome }
373c023f651SToomas Soome
374c023f651SToomas Soome /*
375c023f651SToomas Soome * Return array entry.
376c023f651SToomas Soome */
377c023f651SToomas Soome static vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_entry(vdev_indirect_mapping_t * vim,uint64_t index)378c023f651SToomas Soome vdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index)
379c023f651SToomas Soome {
380c023f651SToomas Soome uint64_t size;
381c023f651SToomas Soome off_t offset = 0;
382c023f651SToomas Soome int rc;
383c023f651SToomas Soome
384c023f651SToomas Soome if (vim->vim_phys->vimp_num_entries == 0)
385c023f651SToomas Soome return (NULL);
386c023f651SToomas Soome
387c023f651SToomas Soome if (vim->vim_entries == NULL) {
388c023f651SToomas Soome uint64_t bsize;
389c023f651SToomas Soome
390c023f651SToomas Soome bsize = vim->vim_dn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
391c023f651SToomas Soome size = vim->vim_phys->vimp_num_entries *
392c023f651SToomas Soome sizeof (*vim->vim_entries);
393c023f651SToomas Soome if (size > bsize) {
394c023f651SToomas Soome size = bsize / sizeof (*vim->vim_entries);
395c023f651SToomas Soome size *= sizeof (*vim->vim_entries);
396c023f651SToomas Soome }
397c023f651SToomas Soome vim->vim_entries = malloc(size);
398c023f651SToomas Soome if (vim->vim_entries == NULL)
399c023f651SToomas Soome return (NULL);
400c023f651SToomas Soome vim->vim_num_entries = size / sizeof (*vim->vim_entries);
401c023f651SToomas Soome offset = index * sizeof (*vim->vim_entries);
402c023f651SToomas Soome }
403c023f651SToomas Soome
404c023f651SToomas Soome /* We have data in vim_entries */
405c023f651SToomas Soome if (offset == 0) {
406c023f651SToomas Soome if (index >= vim->vim_entry_offset &&
407c023f651SToomas Soome index <= vim->vim_entry_offset + vim->vim_num_entries) {
408c023f651SToomas Soome index -= vim->vim_entry_offset;
409c023f651SToomas Soome return (&vim->vim_entries[index]);
410c023f651SToomas Soome }
411c023f651SToomas Soome offset = index * sizeof (*vim->vim_entries);
412c023f651SToomas Soome }
413c023f651SToomas Soome
414c023f651SToomas Soome vim->vim_entry_offset = index;
415c023f651SToomas Soome size = vim->vim_num_entries * sizeof (*vim->vim_entries);
416c023f651SToomas Soome rc = dnode_read(vim->vim_spa, vim->vim_dn, offset, vim->vim_entries,
417c023f651SToomas Soome size);
418c023f651SToomas Soome if (rc != 0) {
419c023f651SToomas Soome /* Read error, invalidate vim_entries. */
420c023f651SToomas Soome free(vim->vim_entries);
421c023f651SToomas Soome vim->vim_entries = NULL;
422c023f651SToomas Soome return (NULL);
423c023f651SToomas Soome }
424c023f651SToomas Soome index -= vim->vim_entry_offset;
425c023f651SToomas Soome return (&vim->vim_entries[index]);
426c023f651SToomas Soome }
427c023f651SToomas Soome
428c023f651SToomas Soome /*
429c023f651SToomas Soome * Returns the mapping entry for the given offset.
430c023f651SToomas Soome *
431c023f651SToomas Soome * It's possible that the given offset will not be in the mapping table
432c023f651SToomas Soome * (i.e. no mapping entries contain this offset), in which case, the
433c023f651SToomas Soome * return value value depends on the "next_if_missing" parameter.
434c023f651SToomas Soome *
435c023f651SToomas Soome * If the offset is not found in the table and "next_if_missing" is
436c023f651SToomas Soome * B_FALSE, then NULL will always be returned. The behavior is intended
437c023f651SToomas Soome * to allow consumers to get the entry corresponding to the offset
438c023f651SToomas Soome * parameter, iff the offset overlaps with an entry in the table.
439c023f651SToomas Soome *
440c023f651SToomas Soome * If the offset is not found in the table and "next_if_missing" is
441c023f651SToomas Soome * B_TRUE, then the entry nearest to the given offset will be returned,
442c023f651SToomas Soome * such that the entry's source offset is greater than the offset
443c023f651SToomas Soome * passed in (i.e. the "next" mapping entry in the table is returned, if
444c023f651SToomas Soome * the offset is missing from the table). If there are no entries whose
445c023f651SToomas Soome * source offset is greater than the passed in offset, NULL is returned.
446c023f651SToomas Soome */
447c023f651SToomas Soome static vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t * vim,uint64_t offset)448c023f651SToomas Soome vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim,
449c023f651SToomas Soome uint64_t offset)
450c023f651SToomas Soome {
451c023f651SToomas Soome ASSERT(vim->vim_phys->vimp_num_entries > 0);
452c023f651SToomas Soome
453c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *entry;
454c023f651SToomas Soome
455c023f651SToomas Soome uint64_t last = vim->vim_phys->vimp_num_entries - 1;
456c023f651SToomas Soome uint64_t base = 0;
457c023f651SToomas Soome
458c023f651SToomas Soome /*
459c023f651SToomas Soome * We don't define these inside of the while loop because we use
460c023f651SToomas Soome * their value in the case that offset isn't in the mapping.
461c023f651SToomas Soome */
462c023f651SToomas Soome uint64_t mid;
463c023f651SToomas Soome int result;
464c023f651SToomas Soome
465c023f651SToomas Soome while (last >= base) {
466c023f651SToomas Soome mid = base + ((last - base) >> 1);
467c023f651SToomas Soome
468c023f651SToomas Soome entry = vdev_indirect_mapping_entry(vim, mid);
469c023f651SToomas Soome if (entry == NULL)
470c023f651SToomas Soome break;
471c023f651SToomas Soome result = dva_mapping_overlap_compare(&offset, entry);
472c023f651SToomas Soome
473c023f651SToomas Soome if (result == 0) {
474c023f651SToomas Soome break;
475c023f651SToomas Soome } else if (result < 0) {
476c023f651SToomas Soome last = mid - 1;
477c023f651SToomas Soome } else {
478c023f651SToomas Soome base = mid + 1;
479c023f651SToomas Soome }
480c023f651SToomas Soome }
481c023f651SToomas Soome return (entry);
482c023f651SToomas Soome }
483c023f651SToomas Soome
484c023f651SToomas Soome /*
485c023f651SToomas Soome * Given an indirect vdev and an extent on that vdev, it duplicates the
486c023f651SToomas Soome * physical entries of the indirect mapping that correspond to the extent
487c023f651SToomas Soome * to a new array and returns a pointer to it. In addition, copied_entries
488c023f651SToomas Soome * is populated with the number of mapping entries that were duplicated.
489c023f651SToomas Soome *
490c023f651SToomas Soome * Finally, since we are doing an allocation, it is up to the caller to
491c023f651SToomas Soome * free the array allocated in this function.
492c023f651SToomas Soome */
493c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t * vd,uint64_t offset,uint64_t asize,uint64_t * copied_entries)494c023f651SToomas Soome vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset,
495c023f651SToomas Soome uint64_t asize, uint64_t *copied_entries)
496c023f651SToomas Soome {
497c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL;
498c023f651SToomas Soome vdev_indirect_mapping_t *vim = vd->v_mapping;
499c023f651SToomas Soome uint64_t entries = 0;
500c023f651SToomas Soome
501c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *first_mapping =
502c023f651SToomas Soome vdev_indirect_mapping_entry_for_offset(vim, offset);
503c023f651SToomas Soome ASSERT3P(first_mapping, !=, NULL);
504c023f651SToomas Soome
505c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *m = first_mapping;
506c023f651SToomas Soome while (asize > 0) {
507c023f651SToomas Soome uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
508c023f651SToomas Soome uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m);
509c023f651SToomas Soome uint64_t inner_size = MIN(asize, size - inner_offset);
510c023f651SToomas Soome
511c023f651SToomas Soome offset += inner_size;
512c023f651SToomas Soome asize -= inner_size;
513c023f651SToomas Soome entries++;
514c023f651SToomas Soome m++;
515c023f651SToomas Soome }
516c023f651SToomas Soome
517c023f651SToomas Soome size_t copy_length = entries * sizeof (*first_mapping);
518c023f651SToomas Soome duplicate_mappings = malloc(copy_length);
519c023f651SToomas Soome if (duplicate_mappings != NULL)
520c023f651SToomas Soome bcopy(first_mapping, duplicate_mappings, copy_length);
521c023f651SToomas Soome else
522c023f651SToomas Soome entries = 0;
523c023f651SToomas Soome
524c023f651SToomas Soome *copied_entries = entries;
525c023f651SToomas Soome
526c023f651SToomas Soome return (duplicate_mappings);
527c023f651SToomas Soome }
528c023f651SToomas Soome
529c023f651SToomas Soome static vdev_t *
vdev_lookup_top(spa_t * spa,uint64_t vdev)530c023f651SToomas Soome vdev_lookup_top(spa_t *spa, uint64_t vdev)
531c023f651SToomas Soome {
532c023f651SToomas Soome vdev_t *rvd;
533da9bf005SToomas Soome vdev_list_t *vlist;
534c023f651SToomas Soome
535da9bf005SToomas Soome vlist = &spa->spa_root_vdev->v_children;
536da9bf005SToomas Soome STAILQ_FOREACH(rvd, vlist, v_childlink)
537c023f651SToomas Soome if (rvd->v_id == vdev)
538c023f651SToomas Soome break;
539c023f651SToomas Soome
540c023f651SToomas Soome return (rvd);
541c023f651SToomas Soome }
542c023f651SToomas Soome
543c023f651SToomas Soome /*
544c023f651SToomas Soome * This is a callback for vdev_indirect_remap() which allocates an
545c023f651SToomas Soome * indirect_split_t for each split segment and adds it to iv_splits.
546c023f651SToomas Soome */
547c023f651SToomas Soome static void
vdev_indirect_gather_splits(uint64_t split_offset,vdev_t * vd,uint64_t offset,uint64_t size,void * arg)548c023f651SToomas Soome vdev_indirect_gather_splits(uint64_t split_offset, vdev_t *vd, uint64_t offset,
549c023f651SToomas Soome uint64_t size, void *arg)
550c023f651SToomas Soome {
551c023f651SToomas Soome int n = 1;
552c023f651SToomas Soome zio_t *zio = arg;
553c023f651SToomas Soome indirect_vsd_t *iv = zio->io_vsd;
554c023f651SToomas Soome
555c023f651SToomas Soome if (vd->v_read == vdev_indirect_read)
556c023f651SToomas Soome return;
557c023f651SToomas Soome
558c023f651SToomas Soome if (vd->v_read == vdev_mirror_read)
559c023f651SToomas Soome n = vd->v_nchildren;
560c023f651SToomas Soome
561c023f651SToomas Soome indirect_split_t *is =
562c023f651SToomas Soome malloc(offsetof(indirect_split_t, is_child[n]));
563c023f651SToomas Soome if (is == NULL) {
564c023f651SToomas Soome zio->io_error = ENOMEM;
565c023f651SToomas Soome return;
566c023f651SToomas Soome }
567c023f651SToomas Soome bzero(is, offsetof(indirect_split_t, is_child[n]));
568c023f651SToomas Soome
569c023f651SToomas Soome is->is_children = n;
570c023f651SToomas Soome is->is_size = size;
571c023f651SToomas Soome is->is_split_offset = split_offset;
572c023f651SToomas Soome is->is_target_offset = offset;
573c023f651SToomas Soome is->is_vdev = vd;
574c023f651SToomas Soome
575c023f651SToomas Soome /*
576c023f651SToomas Soome * Note that we only consider multiple copies of the data for
577c023f651SToomas Soome * *mirror* vdevs. We don't for "replacing" or "spare" vdevs, even
578c023f651SToomas Soome * though they use the same ops as mirror, because there's only one
579c023f651SToomas Soome * "good" copy under the replacing/spare.
580c023f651SToomas Soome */
581c023f651SToomas Soome if (vd->v_read == vdev_mirror_read) {
582c023f651SToomas Soome int i = 0;
583c023f651SToomas Soome vdev_t *kid;
584c023f651SToomas Soome
585c023f651SToomas Soome STAILQ_FOREACH(kid, &vd->v_children, v_childlink) {
586c023f651SToomas Soome is->is_child[i++].ic_vdev = kid;
587c023f651SToomas Soome }
588c023f651SToomas Soome } else {
589c023f651SToomas Soome is->is_child[0].ic_vdev = vd;
590c023f651SToomas Soome }
591c023f651SToomas Soome
592c023f651SToomas Soome list_insert_tail(&iv->iv_splits, is);
593c023f651SToomas Soome }
594c023f651SToomas Soome
595c023f651SToomas Soome static void
vdev_indirect_remap(vdev_t * vd,uint64_t offset,uint64_t asize,void * arg)596c023f651SToomas Soome vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize, void *arg)
597c023f651SToomas Soome {
598c023f651SToomas Soome list_t stack;
599da9bf005SToomas Soome spa_t *spa = vd->v_spa;
600c023f651SToomas Soome zio_t *zio = arg;
601042b5608SToomas Soome remap_segment_t *rs;
602c023f651SToomas Soome
603c023f651SToomas Soome list_create(&stack, sizeof (remap_segment_t),
604c023f651SToomas Soome offsetof(remap_segment_t, rs_node));
605c023f651SToomas Soome
606042b5608SToomas Soome rs = rs_alloc(vd, offset, asize, 0);
607042b5608SToomas Soome if (rs == NULL) {
608042b5608SToomas Soome printf("vdev_indirect_remap: out of memory.\n");
609042b5608SToomas Soome zio->io_error = ENOMEM;
610042b5608SToomas Soome }
6116fd7fa35SToomas Soome for (; rs != NULL; rs = list_remove_head(&stack)) {
612c023f651SToomas Soome vdev_t *v = rs->rs_vd;
613c023f651SToomas Soome uint64_t num_entries = 0;
614c023f651SToomas Soome /* vdev_indirect_mapping_t *vim = v->v_mapping; */
615c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *mapping =
616c023f651SToomas Soome vdev_indirect_mapping_duplicate_adjacent_entries(v,
617c023f651SToomas Soome rs->rs_offset, rs->rs_asize, &num_entries);
618c023f651SToomas Soome
619042b5608SToomas Soome if (num_entries == 0)
620042b5608SToomas Soome zio->io_error = ENOMEM;
621042b5608SToomas Soome
622c023f651SToomas Soome for (uint64_t i = 0; i < num_entries; i++) {
623c023f651SToomas Soome vdev_indirect_mapping_entry_phys_t *m = &mapping[i];
624c023f651SToomas Soome uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
625c023f651SToomas Soome uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst);
626c023f651SToomas Soome uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst);
627c023f651SToomas Soome uint64_t inner_offset = rs->rs_offset -
628c023f651SToomas Soome DVA_MAPPING_GET_SRC_OFFSET(m);
629c023f651SToomas Soome uint64_t inner_size =
630c023f651SToomas Soome MIN(rs->rs_asize, size - inner_offset);
631c023f651SToomas Soome vdev_t *dst_v = vdev_lookup_top(spa, dst_vdev);
632c023f651SToomas Soome
633c023f651SToomas Soome if (dst_v->v_read == vdev_indirect_read) {
634042b5608SToomas Soome remap_segment_t *o;
635042b5608SToomas Soome
636042b5608SToomas Soome o = rs_alloc(dst_v, dst_offset + inner_offset,
637042b5608SToomas Soome inner_size, rs->rs_split_offset);
638042b5608SToomas Soome if (o == NULL) {
639042b5608SToomas Soome printf("vdev_indirect_remap: "
640042b5608SToomas Soome "out of memory.\n");
641042b5608SToomas Soome zio->io_error = ENOMEM;
642042b5608SToomas Soome break;
643042b5608SToomas Soome }
644042b5608SToomas Soome
645042b5608SToomas Soome list_insert_head(&stack, o);
646c023f651SToomas Soome }
647c023f651SToomas Soome vdev_indirect_gather_splits(rs->rs_split_offset, dst_v,
648c023f651SToomas Soome dst_offset + inner_offset,
649c023f651SToomas Soome inner_size, arg);
650c023f651SToomas Soome
651c023f651SToomas Soome /*
652c023f651SToomas Soome * vdev_indirect_gather_splits can have memory
653c023f651SToomas Soome * allocation error, we can not recover from it.
654c023f651SToomas Soome */
655c023f651SToomas Soome if (zio->io_error != 0)
656c023f651SToomas Soome break;
657c023f651SToomas Soome rs->rs_offset += inner_size;
658c023f651SToomas Soome rs->rs_asize -= inner_size;
659c023f651SToomas Soome rs->rs_split_offset += inner_size;
660c023f651SToomas Soome }
661c023f651SToomas Soome
662c023f651SToomas Soome free(mapping);
663c023f651SToomas Soome free(rs);
664c023f651SToomas Soome if (zio->io_error != 0)
665c023f651SToomas Soome break;
666c023f651SToomas Soome }
667c023f651SToomas Soome
668c023f651SToomas Soome list_destroy(&stack);
669c023f651SToomas Soome }
670c023f651SToomas Soome
671c023f651SToomas Soome static void
vdev_indirect_map_free(zio_t * zio)672c023f651SToomas Soome vdev_indirect_map_free(zio_t *zio)
673c023f651SToomas Soome {
674c023f651SToomas Soome indirect_vsd_t *iv = zio->io_vsd;
675c023f651SToomas Soome indirect_split_t *is;
676c023f651SToomas Soome
677c023f651SToomas Soome while ((is = list_head(&iv->iv_splits)) != NULL) {
678c023f651SToomas Soome for (int c = 0; c < is->is_children; c++) {
679c023f651SToomas Soome indirect_child_t *ic = &is->is_child[c];
680c023f651SToomas Soome free(ic->ic_data);
681c023f651SToomas Soome }
682c023f651SToomas Soome list_remove(&iv->iv_splits, is);
683c023f651SToomas Soome free(is);
684c023f651SToomas Soome }
685c023f651SToomas Soome free(iv);
686c023f651SToomas Soome }
687c023f651SToomas Soome
688c023f651SToomas Soome static int
vdev_indirect_read(vdev_t * vdev,const blkptr_t * bp,void * buf,off_t offset,size_t bytes)689c023f651SToomas Soome vdev_indirect_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
690c023f651SToomas Soome off_t offset, size_t bytes)
691c023f651SToomas Soome {
692da9bf005SToomas Soome zio_t zio;
693da9bf005SToomas Soome spa_t *spa = vdev->v_spa;
694da9bf005SToomas Soome indirect_vsd_t *iv;
695c023f651SToomas Soome indirect_split_t *first;
696c023f651SToomas Soome int rc = EIO;
697c023f651SToomas Soome
698da9bf005SToomas Soome iv = calloc(1, sizeof (*iv));
699c023f651SToomas Soome if (iv == NULL)
700c023f651SToomas Soome return (ENOMEM);
701c023f651SToomas Soome
702c023f651SToomas Soome list_create(&iv->iv_splits,
703c023f651SToomas Soome sizeof (indirect_split_t), offsetof(indirect_split_t, is_node));
704c023f651SToomas Soome
705da9bf005SToomas Soome bzero(&zio, sizeof (zio));
706c023f651SToomas Soome zio.io_spa = spa;
707c023f651SToomas Soome zio.io_bp = (blkptr_t *)bp;
708c023f651SToomas Soome zio.io_data = buf;
709c023f651SToomas Soome zio.io_size = bytes;
710c023f651SToomas Soome zio.io_offset = offset;
711c023f651SToomas Soome zio.io_vd = vdev;
712c023f651SToomas Soome zio.io_vsd = iv;
713c023f651SToomas Soome
714c023f651SToomas Soome if (vdev->v_mapping == NULL) {
715c023f651SToomas Soome vdev_indirect_config_t *vic;
716c023f651SToomas Soome
717c023f651SToomas Soome vic = &vdev->vdev_indirect_config;
718c023f651SToomas Soome vdev->v_mapping = vdev_indirect_mapping_open(spa,
719c023f651SToomas Soome &spa->spa_mos, vic->vic_mapping_object);
720c023f651SToomas Soome }
721c023f651SToomas Soome
722c023f651SToomas Soome vdev_indirect_remap(vdev, offset, bytes, &zio);
723c023f651SToomas Soome if (zio.io_error != 0)
724c023f651SToomas Soome return (zio.io_error);
725c023f651SToomas Soome
726c023f651SToomas Soome first = list_head(&iv->iv_splits);
727c023f651SToomas Soome if (first->is_size == zio.io_size) {
728c023f651SToomas Soome /*
729c023f651SToomas Soome * This is not a split block; we are pointing to the entire
730c023f651SToomas Soome * data, which will checksum the same as the original data.
731c023f651SToomas Soome * Pass the BP down so that the child i/o can verify the
732c023f651SToomas Soome * checksum, and try a different location if available
733c023f651SToomas Soome * (e.g. on a mirror).
734c023f651SToomas Soome *
735c023f651SToomas Soome * While this special case could be handled the same as the
736c023f651SToomas Soome * general (split block) case, doing it this way ensures
737c023f651SToomas Soome * that the vast majority of blocks on indirect vdevs
738c023f651SToomas Soome * (which are not split) are handled identically to blocks
739c023f651SToomas Soome * on non-indirect vdevs. This allows us to be less strict
740c023f651SToomas Soome * about performance in the general (but rare) case.
741c023f651SToomas Soome */
742c023f651SToomas Soome rc = first->is_vdev->v_read(first->is_vdev, zio.io_bp,
743c023f651SToomas Soome zio.io_data, first->is_target_offset, bytes);
744c023f651SToomas Soome } else {
745c023f651SToomas Soome iv->iv_split_block = B_TRUE;
746c023f651SToomas Soome /*
747c023f651SToomas Soome * Read one copy of each split segment, from the
748c023f651SToomas Soome * top-level vdev. Since we don't know the
749c023f651SToomas Soome * checksum of each split individually, the child
750c023f651SToomas Soome * zio can't ensure that we get the right data.
751c023f651SToomas Soome * E.g. if it's a mirror, it will just read from a
752c023f651SToomas Soome * random (healthy) leaf vdev. We have to verify
753c023f651SToomas Soome * the checksum in vdev_indirect_io_done().
754c023f651SToomas Soome */
755c023f651SToomas Soome for (indirect_split_t *is = list_head(&iv->iv_splits);
756c023f651SToomas Soome is != NULL; is = list_next(&iv->iv_splits, is)) {
757c023f651SToomas Soome char *ptr = zio.io_data;
758c023f651SToomas Soome
759c023f651SToomas Soome rc = is->is_vdev->v_read(is->is_vdev, zio.io_bp,
760c023f651SToomas Soome ptr + is->is_split_offset, is->is_target_offset,
761c023f651SToomas Soome is->is_size);
762c023f651SToomas Soome }
763c023f651SToomas Soome if (zio_checksum_verify(spa, zio.io_bp, zio.io_data))
764c023f651SToomas Soome rc = ECKSUM;
765c023f651SToomas Soome else
766c023f651SToomas Soome rc = 0;
767c023f651SToomas Soome }
768c023f651SToomas Soome
769c023f651SToomas Soome vdev_indirect_map_free(&zio);
770c023f651SToomas Soome if (rc == 0)
771c023f651SToomas Soome rc = zio.io_error;
772c023f651SToomas Soome
773c023f651SToomas Soome return (rc);
774c023f651SToomas Soome }
775c023f651SToomas Soome
776199767f8SToomas Soome static int
vdev_disk_read(vdev_t * vdev,const blkptr_t * bp,void * buf,off_t offset,size_t bytes)777199767f8SToomas Soome vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
778199767f8SToomas Soome off_t offset, size_t bytes)
779199767f8SToomas Soome {
780199767f8SToomas Soome
781199767f8SToomas Soome return (vdev_read_phys(vdev, bp, buf,
7826fd7fa35SToomas Soome offset + VDEV_LABEL_START_SIZE, bytes));
783199767f8SToomas Soome }
784199767f8SToomas Soome
7859117d17eSToomas Soome static int
vdev_missing_read(vdev_t * vdev __unused,const blkptr_t * bp __unused,void * buf __unused,off_t offset __unused,size_t bytes __unused)7869117d17eSToomas Soome vdev_missing_read(vdev_t *vdev __unused, const blkptr_t *bp __unused,
7879117d17eSToomas Soome void *buf __unused, off_t offset __unused, size_t bytes __unused)
7889117d17eSToomas Soome {
7899117d17eSToomas Soome
7909117d17eSToomas Soome return (ENOTSUP);
7919117d17eSToomas Soome }
792199767f8SToomas Soome
793199767f8SToomas Soome static int
vdev_mirror_read(vdev_t * vdev,const blkptr_t * bp,void * buf,off_t offset,size_t bytes)794199767f8SToomas Soome vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
795199767f8SToomas Soome off_t offset, size_t bytes)
796199767f8SToomas Soome {
797199767f8SToomas Soome vdev_t *kid;
798199767f8SToomas Soome int rc;
799199767f8SToomas Soome
800199767f8SToomas Soome rc = EIO;
801199767f8SToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
802199767f8SToomas Soome if (kid->v_state != VDEV_STATE_HEALTHY)
803199767f8SToomas Soome continue;
804199767f8SToomas Soome rc = kid->v_read(kid, bp, buf, offset, bytes);
805199767f8SToomas Soome if (!rc)
806199767f8SToomas Soome return (0);
807199767f8SToomas Soome }
808199767f8SToomas Soome
809199767f8SToomas Soome return (rc);
810199767f8SToomas Soome }
811199767f8SToomas Soome
812199767f8SToomas Soome static int
vdev_replacing_read(vdev_t * vdev,const blkptr_t * bp,void * buf,off_t offset,size_t bytes)813199767f8SToomas Soome vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
814199767f8SToomas Soome off_t offset, size_t bytes)
815199767f8SToomas Soome {
816199767f8SToomas Soome vdev_t *kid;
817199767f8SToomas Soome
818199767f8SToomas Soome /*
819199767f8SToomas Soome * Here we should have two kids:
820199767f8SToomas Soome * First one which is the one we are replacing and we can trust
821199767f8SToomas Soome * only this one to have valid data, but it might not be present.
822199767f8SToomas Soome * Second one is that one we are replacing with. It is most likely
823199767f8SToomas Soome * healthy, but we can't trust it has needed data, so we won't use it.
824199767f8SToomas Soome */
825199767f8SToomas Soome kid = STAILQ_FIRST(&vdev->v_children);
826199767f8SToomas Soome if (kid == NULL)
827199767f8SToomas Soome return (EIO);
828199767f8SToomas Soome if (kid->v_state != VDEV_STATE_HEALTHY)
829199767f8SToomas Soome return (EIO);
830199767f8SToomas Soome return (kid->v_read(kid, bp, buf, offset, bytes));
831199767f8SToomas Soome }
832199767f8SToomas Soome
833199767f8SToomas Soome static vdev_t *
vdev_find(uint64_t guid)834199767f8SToomas Soome vdev_find(uint64_t guid)
835199767f8SToomas Soome {
836199767f8SToomas Soome vdev_t *vdev;
837199767f8SToomas Soome
838199767f8SToomas Soome STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
839199767f8SToomas Soome if (vdev->v_guid == guid)
840199767f8SToomas Soome return (vdev);
841199767f8SToomas Soome
842199767f8SToomas Soome return (0);
843199767f8SToomas Soome }
844199767f8SToomas Soome
845199767f8SToomas Soome static vdev_t *
vdev_create(uint64_t guid,vdev_read_t * vdev_read)846199767f8SToomas Soome vdev_create(uint64_t guid, vdev_read_t *vdev_read)
847199767f8SToomas Soome {
848199767f8SToomas Soome vdev_t *vdev;
849c023f651SToomas Soome vdev_indirect_config_t *vic;
850199767f8SToomas Soome
851da9bf005SToomas Soome vdev = calloc(1, sizeof (vdev_t));
852da9bf005SToomas Soome if (vdev != NULL) {
853da9bf005SToomas Soome STAILQ_INIT(&vdev->v_children);
854da9bf005SToomas Soome vdev->v_guid = guid;
855da9bf005SToomas Soome vdev->v_read = vdev_read;
856c023f651SToomas Soome
857da9bf005SToomas Soome /*
858da9bf005SToomas Soome * root vdev has no read function, we use this fact to
859da9bf005SToomas Soome * skip setting up data we do not need for root vdev.
860da9bf005SToomas Soome * We only point root vdev from spa.
861da9bf005SToomas Soome */
862da9bf005SToomas Soome if (vdev_read != NULL) {
863da9bf005SToomas Soome vic = &vdev->vdev_indirect_config;
864da9bf005SToomas Soome vic->vic_prev_indirect_vdev = UINT64_MAX;
865da9bf005SToomas Soome STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
866da9bf005SToomas Soome }
867da9bf005SToomas Soome }
868199767f8SToomas Soome
869199767f8SToomas Soome return (vdev);
870199767f8SToomas Soome }
871199767f8SToomas Soome
872da9bf005SToomas Soome static void
vdev_set_initial_state(vdev_t * vdev,const nvlist_t * nvlist)873b713c91eSToomas Soome vdev_set_initial_state(vdev_t *vdev, const nvlist_t *nvlist)
874199767f8SToomas Soome {
875199767f8SToomas Soome uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present;
87667806cd7SToomas Soome uint64_t is_log;
877199767f8SToomas Soome
878da9bf005SToomas Soome is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0;
879da9bf005SToomas Soome is_log = 0;
880da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL,
881ce5f7fb8SToomas Soome &is_offline, NULL);
882da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, NULL,
883ce5f7fb8SToomas Soome &is_removed, NULL);
884da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, NULL,
885ce5f7fb8SToomas Soome &is_faulted, NULL);
886da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64,
887ce5f7fb8SToomas Soome NULL, &is_degraded, NULL);
888da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64,
889ce5f7fb8SToomas Soome NULL, &isnt_present, NULL);
890da9bf005SToomas Soome (void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, NULL,
891ce5f7fb8SToomas Soome &is_log, NULL);
892da9bf005SToomas Soome
893da9bf005SToomas Soome if (is_offline != 0)
894da9bf005SToomas Soome vdev->v_state = VDEV_STATE_OFFLINE;
895da9bf005SToomas Soome else if (is_removed != 0)
896da9bf005SToomas Soome vdev->v_state = VDEV_STATE_REMOVED;
897da9bf005SToomas Soome else if (is_faulted != 0)
898da9bf005SToomas Soome vdev->v_state = VDEV_STATE_FAULTED;
899da9bf005SToomas Soome else if (is_degraded != 0)
900da9bf005SToomas Soome vdev->v_state = VDEV_STATE_DEGRADED;
901da9bf005SToomas Soome else if (isnt_present != 0)
902da9bf005SToomas Soome vdev->v_state = VDEV_STATE_CANT_OPEN;
903da9bf005SToomas Soome
904da9bf005SToomas Soome vdev->v_islog = is_log != 0;
905da9bf005SToomas Soome }
906da9bf005SToomas Soome
907da9bf005SToomas Soome static int
vdev_init(uint64_t guid,const nvlist_t * nvlist,vdev_t ** vdevp)908b713c91eSToomas Soome vdev_init(uint64_t guid, const nvlist_t *nvlist, vdev_t **vdevp)
909da9bf005SToomas Soome {
910da9bf005SToomas Soome uint64_t id, ashift, asize, nparity;
911da9bf005SToomas Soome const char *path;
912da9bf005SToomas Soome const char *type;
913ce5f7fb8SToomas Soome int len, pathlen;
914ce5f7fb8SToomas Soome char *name;
915da9bf005SToomas Soome vdev_t *vdev;
916da9bf005SToomas Soome
917ce5f7fb8SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id,
918ce5f7fb8SToomas Soome NULL) ||
919edb35047SToomas Soome nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING,
920ce5f7fb8SToomas Soome NULL, &type, &len)) {
921199767f8SToomas Soome return (ENOENT);
922199767f8SToomas Soome }
923199767f8SToomas Soome
924ce5f7fb8SToomas Soome if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 &&
925ce5f7fb8SToomas Soome memcmp(type, VDEV_TYPE_DISK, len) != 0 &&
926199767f8SToomas Soome #ifdef ZFS_TEST
927ce5f7fb8SToomas Soome memcmp(type, VDEV_TYPE_FILE, len) != 0 &&
928199767f8SToomas Soome #endif
929ce5f7fb8SToomas Soome memcmp(type, VDEV_TYPE_RAIDZ, len) != 0 &&
930ce5f7fb8SToomas Soome memcmp(type, VDEV_TYPE_INDIRECT, len) != 0 &&
9319117d17eSToomas Soome memcmp(type, VDEV_TYPE_REPLACING, len) != 0 &&
9329117d17eSToomas Soome memcmp(type, VDEV_TYPE_HOLE, len) != 0) {
9336fd7fa35SToomas Soome printf("ZFS: can only boot from disk, mirror, raidz1, "
9349117d17eSToomas Soome "raidz2 and raidz3 vdevs, got: %.*s\n", len, type);
935199767f8SToomas Soome return (EIO);
936199767f8SToomas Soome }
937199767f8SToomas Soome
938ce5f7fb8SToomas Soome if (memcmp(type, VDEV_TYPE_MIRROR, len) == 0)
939da9bf005SToomas Soome vdev = vdev_create(guid, vdev_mirror_read);
940ce5f7fb8SToomas Soome else if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0)
941da9bf005SToomas Soome vdev = vdev_create(guid, vdev_raidz_read);
942ce5f7fb8SToomas Soome else if (memcmp(type, VDEV_TYPE_REPLACING, len) == 0)
943da9bf005SToomas Soome vdev = vdev_create(guid, vdev_replacing_read);
944ce5f7fb8SToomas Soome else if (memcmp(type, VDEV_TYPE_INDIRECT, len) == 0) {
945da9bf005SToomas Soome vdev_indirect_config_t *vic;
946199767f8SToomas Soome
947da9bf005SToomas Soome vdev = vdev_create(guid, vdev_indirect_read);
948da9bf005SToomas Soome if (vdev != NULL) {
949c023f651SToomas Soome vdev->v_state = VDEV_STATE_HEALTHY;
950c023f651SToomas Soome vic = &vdev->vdev_indirect_config;
951c023f651SToomas Soome
952c023f651SToomas Soome nvlist_find(nvlist,
953da9bf005SToomas Soome ZPOOL_CONFIG_INDIRECT_OBJECT,
954da9bf005SToomas Soome DATA_TYPE_UINT64,
955ce5f7fb8SToomas Soome NULL, &vic->vic_mapping_object, NULL);
956c023f651SToomas Soome nvlist_find(nvlist,
957da9bf005SToomas Soome ZPOOL_CONFIG_INDIRECT_BIRTHS,
958da9bf005SToomas Soome DATA_TYPE_UINT64,
959ce5f7fb8SToomas Soome NULL, &vic->vic_births_object, NULL);
960c023f651SToomas Soome nvlist_find(nvlist,
961da9bf005SToomas Soome ZPOOL_CONFIG_PREV_INDIRECT_VDEV,
962da9bf005SToomas Soome DATA_TYPE_UINT64,
963ce5f7fb8SToomas Soome NULL, &vic->vic_prev_indirect_vdev, NULL);
9644c2b14fdSToomas Soome }
9659117d17eSToomas Soome } else if (memcmp(type, VDEV_TYPE_HOLE, len) == 0) {
9669117d17eSToomas Soome vdev = vdev_create(guid, vdev_missing_read);
967da9bf005SToomas Soome } else {
968da9bf005SToomas Soome vdev = vdev_create(guid, vdev_disk_read);
969da9bf005SToomas Soome }
970da9bf005SToomas Soome
971da9bf005SToomas Soome if (vdev == NULL)
972da9bf005SToomas Soome return (ENOMEM);
973da9bf005SToomas Soome
974da9bf005SToomas Soome vdev_set_initial_state(vdev, nvlist);
975da9bf005SToomas Soome vdev->v_id = id;
976da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
977ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &ashift, NULL) == 0)
978da9bf005SToomas Soome vdev->v_ashift = ashift;
979da9bf005SToomas Soome
980da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
981ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) {
982da9bf005SToomas Soome vdev->v_psize = asize +
983da9bf005SToomas Soome VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
984da9bf005SToomas Soome }
985da9bf005SToomas Soome
986da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY,
987ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &nparity, NULL) == 0)
988da9bf005SToomas Soome vdev->v_nparity = nparity;
989da9bf005SToomas Soome
990da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH,
991ce5f7fb8SToomas Soome DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
992ce5f7fb8SToomas Soome char prefix[] = "/dev/dsk/";
993ce5f7fb8SToomas Soome
994ce5f7fb8SToomas Soome len = strlen(prefix);
995ce5f7fb8SToomas Soome if (len < pathlen && memcmp(path, prefix, len) == 0) {
996ce5f7fb8SToomas Soome path += len;
997ce5f7fb8SToomas Soome pathlen -= len;
998ce5f7fb8SToomas Soome }
999ce5f7fb8SToomas Soome name = malloc(pathlen + 1);
1000ce5f7fb8SToomas Soome if (name != NULL) {
1001ce5f7fb8SToomas Soome bcopy(path, name, pathlen);
1002ce5f7fb8SToomas Soome name[pathlen] = '\0';
1003ce5f7fb8SToomas Soome }
1004ce5f7fb8SToomas Soome vdev->v_name = name;
1005ce5f7fb8SToomas Soome vdev->v_phys_path = NULL;
1006ce5f7fb8SToomas Soome vdev->v_devid = NULL;
1007da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_PHYS_PATH,
1008ce5f7fb8SToomas Soome DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
1009ce5f7fb8SToomas Soome name = malloc(pathlen + 1);
1010ce5f7fb8SToomas Soome if (name != NULL) {
1011ce5f7fb8SToomas Soome bcopy(path, name, pathlen);
1012ce5f7fb8SToomas Soome name[pathlen] = '\0';
1013ce5f7fb8SToomas Soome vdev->v_phys_path = name;
1014ce5f7fb8SToomas Soome }
1015edb35047SToomas Soome }
1016da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_DEVID,
1017ce5f7fb8SToomas Soome DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
1018ce5f7fb8SToomas Soome name = malloc(pathlen + 1);
1019ce5f7fb8SToomas Soome if (name != NULL) {
1020ce5f7fb8SToomas Soome bcopy(path, name, pathlen);
1021ce5f7fb8SToomas Soome name[pathlen] = '\0';
1022ce5f7fb8SToomas Soome vdev->v_devid = name;
1023ce5f7fb8SToomas Soome }
1024199767f8SToomas Soome }
1025199767f8SToomas Soome } else {
1026da9bf005SToomas Soome name = NULL;
1027ce5f7fb8SToomas Soome if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) {
1028da9bf005SToomas Soome if (vdev->v_nparity < 1 ||
1029da9bf005SToomas Soome vdev->v_nparity > 3) {
1030da9bf005SToomas Soome printf("ZFS: invalid raidz parity: %d\n",
1031da9bf005SToomas Soome vdev->v_nparity);
1032da9bf005SToomas Soome return (EIO);
1033da9bf005SToomas Soome }
1034ce5f7fb8SToomas Soome (void) asprintf(&name, "%.*s%d-%" PRIu64, len, type,
1035da9bf005SToomas Soome vdev->v_nparity, id);
1036da9bf005SToomas Soome } else {
1037ce5f7fb8SToomas Soome (void) asprintf(&name, "%.*s-%" PRIu64, len, type, id);
1038da9bf005SToomas Soome }
1039da9bf005SToomas Soome vdev->v_name = name;
1040da9bf005SToomas Soome }
1041da9bf005SToomas Soome *vdevp = vdev;
1042da9bf005SToomas Soome return (0);
1043da9bf005SToomas Soome }
1044da9bf005SToomas Soome
1045da9bf005SToomas Soome /*
1046da9bf005SToomas Soome * Find slot for vdev. We return either NULL to signal to use
1047da9bf005SToomas Soome * STAILQ_INSERT_HEAD, or we return link element to be used with
1048da9bf005SToomas Soome * STAILQ_INSERT_AFTER.
1049da9bf005SToomas Soome */
1050da9bf005SToomas Soome static vdev_t *
vdev_find_previous(vdev_t * top_vdev,vdev_t * vdev)1051da9bf005SToomas Soome vdev_find_previous(vdev_t *top_vdev, vdev_t *vdev)
1052da9bf005SToomas Soome {
1053da9bf005SToomas Soome vdev_t *v, *previous;
1054da9bf005SToomas Soome
1055da9bf005SToomas Soome if (STAILQ_EMPTY(&top_vdev->v_children))
1056da9bf005SToomas Soome return (NULL);
1057da9bf005SToomas Soome
1058da9bf005SToomas Soome previous = NULL;
1059da9bf005SToomas Soome STAILQ_FOREACH(v, &top_vdev->v_children, v_childlink) {
1060da9bf005SToomas Soome if (v->v_id > vdev->v_id)
1061da9bf005SToomas Soome return (previous);
1062da9bf005SToomas Soome
1063da9bf005SToomas Soome if (v->v_id == vdev->v_id)
1064da9bf005SToomas Soome return (v);
1065da9bf005SToomas Soome
1066da9bf005SToomas Soome if (v->v_id < vdev->v_id)
1067da9bf005SToomas Soome previous = v;
1068199767f8SToomas Soome }
1069da9bf005SToomas Soome return (previous);
1070da9bf005SToomas Soome }
1071da9bf005SToomas Soome
1072da9bf005SToomas Soome static size_t
vdev_child_count(vdev_t * vdev)1073da9bf005SToomas Soome vdev_child_count(vdev_t *vdev)
1074da9bf005SToomas Soome {
1075da9bf005SToomas Soome vdev_t *v;
1076da9bf005SToomas Soome size_t count;
1077da9bf005SToomas Soome
1078da9bf005SToomas Soome count = 0;
1079da9bf005SToomas Soome STAILQ_FOREACH(v, &vdev->v_children, v_childlink) {
1080da9bf005SToomas Soome count++;
1081da9bf005SToomas Soome }
1082da9bf005SToomas Soome return (count);
1083da9bf005SToomas Soome }
1084da9bf005SToomas Soome
1085da9bf005SToomas Soome /*
1086da9bf005SToomas Soome * Insert vdev into top_vdev children list. List is ordered by v_id.
1087da9bf005SToomas Soome */
1088da9bf005SToomas Soome static void
vdev_insert(vdev_t * top_vdev,vdev_t * vdev)1089da9bf005SToomas Soome vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
1090da9bf005SToomas Soome {
1091da9bf005SToomas Soome vdev_t *previous;
1092da9bf005SToomas Soome size_t count;
1093da9bf005SToomas Soome
1094da9bf005SToomas Soome /*
1095da9bf005SToomas Soome * The top level vdev can appear in random order, depending how
1096da9bf005SToomas Soome * the firmware is presenting the disk devices.
1097da9bf005SToomas Soome * However, we will insert vdev to create list ordered by v_id,
1098da9bf005SToomas Soome * so we can use either STAILQ_INSERT_HEAD or STAILQ_INSERT_AFTER
1099da9bf005SToomas Soome * as STAILQ does not have insert before.
1100da9bf005SToomas Soome */
1101da9bf005SToomas Soome previous = vdev_find_previous(top_vdev, vdev);
1102199767f8SToomas Soome
1103da9bf005SToomas Soome if (previous == NULL) {
1104da9bf005SToomas Soome STAILQ_INSERT_HEAD(&top_vdev->v_children, vdev, v_childlink);
1105da9bf005SToomas Soome } else if (previous->v_id == vdev->v_id) {
1106199767f8SToomas Soome /*
1107da9bf005SToomas Soome * This vdev was configured from label config,
1108da9bf005SToomas Soome * do not insert duplicate.
1109199767f8SToomas Soome */
1110da9bf005SToomas Soome return;
1111da9bf005SToomas Soome } else {
1112da9bf005SToomas Soome STAILQ_INSERT_AFTER(&top_vdev->v_children, previous, vdev,
1113da9bf005SToomas Soome v_childlink);
1114da9bf005SToomas Soome }
1115da9bf005SToomas Soome
1116da9bf005SToomas Soome count = vdev_child_count(top_vdev);
1117da9bf005SToomas Soome if (top_vdev->v_nchildren < count)
1118da9bf005SToomas Soome top_vdev->v_nchildren = count;
1119da9bf005SToomas Soome }
1120da9bf005SToomas Soome
1121da9bf005SToomas Soome static int
vdev_from_nvlist(spa_t * spa,uint64_t top_guid,const nvlist_t * nvlist)1122b713c91eSToomas Soome vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist)
1123da9bf005SToomas Soome {
1124da9bf005SToomas Soome vdev_t *top_vdev, *vdev;
1125b713c91eSToomas Soome nvlist_t **kids = NULL;
1126da9bf005SToomas Soome int rc, nkids;
1127da9bf005SToomas Soome
1128da9bf005SToomas Soome /* Get top vdev. */
1129da9bf005SToomas Soome top_vdev = vdev_find(top_guid);
1130da9bf005SToomas Soome if (top_vdev == NULL) {
1131da9bf005SToomas Soome rc = vdev_init(top_guid, nvlist, &top_vdev);
1132da9bf005SToomas Soome if (rc != 0)
1133da9bf005SToomas Soome return (rc);
1134da9bf005SToomas Soome top_vdev->v_spa = spa;
1135da9bf005SToomas Soome top_vdev->v_top = top_vdev;
1136da9bf005SToomas Soome vdev_insert(spa->spa_root_vdev, top_vdev);
1137199767f8SToomas Soome }
1138199767f8SToomas Soome
1139da9bf005SToomas Soome /* Add children if there are any. */
1140edb35047SToomas Soome rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
1141ce5f7fb8SToomas Soome &nkids, &kids, NULL);
1142199767f8SToomas Soome if (rc == 0) {
1143da9bf005SToomas Soome for (int i = 0; i < nkids; i++) {
1144da9bf005SToomas Soome uint64_t guid;
1145da9bf005SToomas Soome
1146b713c91eSToomas Soome rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID,
1147ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &guid, NULL);
1148da9bf005SToomas Soome if (rc != 0)
1149b713c91eSToomas Soome goto done;
1150b713c91eSToomas Soome
1151b713c91eSToomas Soome rc = vdev_init(guid, kids[i], &vdev);
1152da9bf005SToomas Soome if (rc != 0)
1153b713c91eSToomas Soome goto done;
1154da9bf005SToomas Soome
1155da9bf005SToomas Soome vdev->v_spa = spa;
1156da9bf005SToomas Soome vdev->v_top = top_vdev;
1157da9bf005SToomas Soome vdev_insert(top_vdev, vdev);
1158199767f8SToomas Soome }
1159199767f8SToomas Soome } else {
1160da9bf005SToomas Soome /*
1161da9bf005SToomas Soome * When there are no children, nvlist_find() does return
1162da9bf005SToomas Soome * error, reset it because leaf devices have no children.
1163da9bf005SToomas Soome */
1164da9bf005SToomas Soome rc = 0;
1165199767f8SToomas Soome }
1166b713c91eSToomas Soome done:
1167b713c91eSToomas Soome if (kids != NULL) {
1168b713c91eSToomas Soome for (int i = 0; i < nkids; i++)
1169b713c91eSToomas Soome nvlist_destroy(kids[i]);
1170b713c91eSToomas Soome free(kids);
1171b713c91eSToomas Soome }
1172199767f8SToomas Soome
1173da9bf005SToomas Soome return (rc);
1174da9bf005SToomas Soome }
1175da9bf005SToomas Soome
1176da9bf005SToomas Soome static int
vdev_init_from_label(spa_t * spa,const nvlist_t * nvlist)1177b713c91eSToomas Soome vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
1178da9bf005SToomas Soome {
1179da9bf005SToomas Soome uint64_t pool_guid, top_guid;
1180b713c91eSToomas Soome nvlist_t *vdevs;
1181b713c91eSToomas Soome int rc;
1182da9bf005SToomas Soome
1183da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
1184ce5f7fb8SToomas Soome NULL, &pool_guid, NULL) ||
1185da9bf005SToomas Soome nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
1186ce5f7fb8SToomas Soome NULL, &top_guid, NULL) ||
1187da9bf005SToomas Soome nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
1188ce5f7fb8SToomas Soome NULL, &vdevs, NULL)) {
1189da9bf005SToomas Soome printf("ZFS: can't find vdev details\n");
1190da9bf005SToomas Soome return (ENOENT);
1191da9bf005SToomas Soome }
1192da9bf005SToomas Soome
1193b713c91eSToomas Soome rc = vdev_from_nvlist(spa, top_guid, vdevs);
1194b713c91eSToomas Soome nvlist_destroy(vdevs);
1195b713c91eSToomas Soome return (rc);
1196199767f8SToomas Soome }
1197199767f8SToomas Soome
1198199767f8SToomas Soome static void
vdev_set_state(vdev_t * vdev)1199199767f8SToomas Soome vdev_set_state(vdev_t *vdev)
1200199767f8SToomas Soome {
1201199767f8SToomas Soome vdev_t *kid;
1202199767f8SToomas Soome int good_kids;
1203199767f8SToomas Soome int bad_kids;
1204199767f8SToomas Soome
1205da9bf005SToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1206da9bf005SToomas Soome vdev_set_state(kid);
1207da9bf005SToomas Soome }
1208da9bf005SToomas Soome
1209199767f8SToomas Soome /*
1210199767f8SToomas Soome * A mirror or raidz is healthy if all its kids are healthy. A
1211199767f8SToomas Soome * mirror is degraded if any of its kids is healthy; a raidz
1212199767f8SToomas Soome * is degraded if at most nparity kids are offline.
1213199767f8SToomas Soome */
1214199767f8SToomas Soome if (STAILQ_FIRST(&vdev->v_children)) {
1215199767f8SToomas Soome good_kids = 0;
1216199767f8SToomas Soome bad_kids = 0;
1217199767f8SToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1218199767f8SToomas Soome if (kid->v_state == VDEV_STATE_HEALTHY)
1219199767f8SToomas Soome good_kids++;
1220199767f8SToomas Soome else
1221199767f8SToomas Soome bad_kids++;
1222199767f8SToomas Soome }
1223199767f8SToomas Soome if (bad_kids == 0) {
1224199767f8SToomas Soome vdev->v_state = VDEV_STATE_HEALTHY;
1225199767f8SToomas Soome } else {
1226199767f8SToomas Soome if (vdev->v_read == vdev_mirror_read) {
1227199767f8SToomas Soome if (good_kids) {
1228199767f8SToomas Soome vdev->v_state = VDEV_STATE_DEGRADED;
1229199767f8SToomas Soome } else {
1230199767f8SToomas Soome vdev->v_state = VDEV_STATE_OFFLINE;
1231199767f8SToomas Soome }
1232199767f8SToomas Soome } else if (vdev->v_read == vdev_raidz_read) {
1233199767f8SToomas Soome if (bad_kids > vdev->v_nparity) {
1234199767f8SToomas Soome vdev->v_state = VDEV_STATE_OFFLINE;
1235199767f8SToomas Soome } else {
1236199767f8SToomas Soome vdev->v_state = VDEV_STATE_DEGRADED;
1237199767f8SToomas Soome }
1238199767f8SToomas Soome }
1239199767f8SToomas Soome }
1240199767f8SToomas Soome }
1241199767f8SToomas Soome }
1242199767f8SToomas Soome
1243da9bf005SToomas Soome static int
vdev_update_from_nvlist(uint64_t top_guid,const nvlist_t * nvlist)1244b713c91eSToomas Soome vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist)
1245da9bf005SToomas Soome {
1246da9bf005SToomas Soome vdev_t *vdev;
1247b713c91eSToomas Soome nvlist_t **kids = NULL;
1248da9bf005SToomas Soome int rc, nkids;
1249da9bf005SToomas Soome
1250da9bf005SToomas Soome /* Update top vdev. */
1251da9bf005SToomas Soome vdev = vdev_find(top_guid);
1252da9bf005SToomas Soome if (vdev != NULL)
1253da9bf005SToomas Soome vdev_set_initial_state(vdev, nvlist);
1254da9bf005SToomas Soome
1255da9bf005SToomas Soome /* Update children if there are any. */
1256da9bf005SToomas Soome rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
1257ce5f7fb8SToomas Soome &nkids, &kids, NULL);
1258da9bf005SToomas Soome if (rc == 0) {
1259da9bf005SToomas Soome for (int i = 0; i < nkids; i++) {
1260da9bf005SToomas Soome uint64_t guid;
1261da9bf005SToomas Soome
1262b713c91eSToomas Soome rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID,
1263ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &guid, NULL);
1264da9bf005SToomas Soome if (rc != 0)
1265da9bf005SToomas Soome break;
1266da9bf005SToomas Soome
1267da9bf005SToomas Soome vdev = vdev_find(guid);
1268da9bf005SToomas Soome if (vdev != NULL)
1269b713c91eSToomas Soome vdev_set_initial_state(vdev, kids[i]);
1270da9bf005SToomas Soome }
1271da9bf005SToomas Soome } else {
1272da9bf005SToomas Soome rc = 0;
1273da9bf005SToomas Soome }
1274b713c91eSToomas Soome if (kids != NULL) {
1275b713c91eSToomas Soome for (int i = 0; i < nkids; i++)
1276b713c91eSToomas Soome nvlist_destroy(kids[i]);
1277b713c91eSToomas Soome free(kids);
1278b713c91eSToomas Soome }
1279da9bf005SToomas Soome
1280da9bf005SToomas Soome return (rc);
1281da9bf005SToomas Soome }
1282da9bf005SToomas Soome
1283da9bf005SToomas Soome static int
vdev_init_from_nvlist(spa_t * spa,const nvlist_t * nvlist)1284b713c91eSToomas Soome vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
1285da9bf005SToomas Soome {
1286da9bf005SToomas Soome uint64_t pool_guid, vdev_children;
1287b713c91eSToomas Soome nvlist_t *vdevs = NULL, **kids = NULL;
1288da9bf005SToomas Soome int rc, nkids;
1289da9bf005SToomas Soome
1290da9bf005SToomas Soome if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
1291ce5f7fb8SToomas Soome NULL, &pool_guid, NULL) ||
1292da9bf005SToomas Soome nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64,
1293ce5f7fb8SToomas Soome NULL, &vdev_children, NULL) ||
1294da9bf005SToomas Soome nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
1295ce5f7fb8SToomas Soome NULL, &vdevs, NULL)) {
1296da9bf005SToomas Soome printf("ZFS: can't find vdev details\n");
1297da9bf005SToomas Soome return (ENOENT);
1298da9bf005SToomas Soome }
1299da9bf005SToomas Soome
1300da9bf005SToomas Soome /* Wrong guid?! */
1301b713c91eSToomas Soome if (spa->spa_guid != pool_guid) {
1302b713c91eSToomas Soome nvlist_destroy(vdevs);
1303da9bf005SToomas Soome return (EINVAL);
1304b713c91eSToomas Soome }
1305da9bf005SToomas Soome
1306da9bf005SToomas Soome spa->spa_root_vdev->v_nchildren = vdev_children;
1307da9bf005SToomas Soome
1308da9bf005SToomas Soome rc = nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
1309ce5f7fb8SToomas Soome &nkids, &kids, NULL);
1310b713c91eSToomas Soome nvlist_destroy(vdevs);
1311da9bf005SToomas Soome
1312da9bf005SToomas Soome /*
1313da9bf005SToomas Soome * MOS config has at least one child for root vdev.
1314da9bf005SToomas Soome */
1315da9bf005SToomas Soome if (rc != 0)
1316da9bf005SToomas Soome return (rc);
1317da9bf005SToomas Soome
1318da9bf005SToomas Soome for (int i = 0; i < nkids; i++) {
1319da9bf005SToomas Soome uint64_t guid;
1320da9bf005SToomas Soome vdev_t *vdev;
1321da9bf005SToomas Soome
1322b713c91eSToomas Soome rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
1323ce5f7fb8SToomas Soome NULL, &guid, NULL);
1324da9bf005SToomas Soome if (rc != 0)
1325da9bf005SToomas Soome break;
1326da9bf005SToomas Soome vdev = vdev_find(guid);
1327da9bf005SToomas Soome /*
1328da9bf005SToomas Soome * Top level vdev is missing, create it.
1329da9bf005SToomas Soome */
1330da9bf005SToomas Soome if (vdev == NULL)
1331b713c91eSToomas Soome rc = vdev_from_nvlist(spa, guid, kids[i]);
1332da9bf005SToomas Soome else
1333b713c91eSToomas Soome rc = vdev_update_from_nvlist(guid, kids[i]);
1334da9bf005SToomas Soome if (rc != 0)
1335da9bf005SToomas Soome break;
1336b713c91eSToomas Soome }
1337b713c91eSToomas Soome if (kids != NULL) {
1338b713c91eSToomas Soome for (int i = 0; i < nkids; i++)
1339b713c91eSToomas Soome nvlist_destroy(kids[i]);
1340b713c91eSToomas Soome free(kids);
1341da9bf005SToomas Soome }
1342da9bf005SToomas Soome
1343da9bf005SToomas Soome /*
1344da9bf005SToomas Soome * Re-evaluate top-level vdev state.
1345da9bf005SToomas Soome */
1346da9bf005SToomas Soome vdev_set_state(spa->spa_root_vdev);
1347da9bf005SToomas Soome
1348da9bf005SToomas Soome return (rc);
1349da9bf005SToomas Soome }
1350da9bf005SToomas Soome
1351199767f8SToomas Soome static spa_t *
spa_find_by_guid(uint64_t guid)1352199767f8SToomas Soome spa_find_by_guid(uint64_t guid)
1353199767f8SToomas Soome {
1354199767f8SToomas Soome spa_t *spa;
1355199767f8SToomas Soome
1356199767f8SToomas Soome STAILQ_FOREACH(spa, &zfs_pools, spa_link)
1357199767f8SToomas Soome if (spa->spa_guid == guid)
1358199767f8SToomas Soome return (spa);
1359199767f8SToomas Soome
13606fd7fa35SToomas Soome return (NULL);
1361199767f8SToomas Soome }
1362199767f8SToomas Soome
1363199767f8SToomas Soome static spa_t *
spa_find_by_name(const char * name)1364199767f8SToomas Soome spa_find_by_name(const char *name)
1365199767f8SToomas Soome {
1366199767f8SToomas Soome spa_t *spa;
1367199767f8SToomas Soome
1368199767f8SToomas Soome STAILQ_FOREACH(spa, &zfs_pools, spa_link)
13696fd7fa35SToomas Soome if (strcmp(spa->spa_name, name) == 0)
1370199767f8SToomas Soome return (spa);
1371199767f8SToomas Soome
13726fd7fa35SToomas Soome return (NULL);
1373199767f8SToomas Soome }
1374199767f8SToomas Soome
1375b713c91eSToomas Soome static spa_t *
spa_find_by_dev(struct zfs_devdesc * dev)1376b713c91eSToomas Soome spa_find_by_dev(struct zfs_devdesc *dev)
1377199767f8SToomas Soome {
1378199767f8SToomas Soome
1379b713c91eSToomas Soome if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1380199767f8SToomas Soome return (NULL);
1381b713c91eSToomas Soome
1382b713c91eSToomas Soome if (dev->pool_guid == 0)
1383b713c91eSToomas Soome return (STAILQ_FIRST(&zfs_pools));
1384b713c91eSToomas Soome
1385b713c91eSToomas Soome return (spa_find_by_guid(dev->pool_guid));
1386199767f8SToomas Soome }
1387199767f8SToomas Soome
1388199767f8SToomas Soome static spa_t *
spa_create(uint64_t guid,const char * name)1389edb35047SToomas Soome spa_create(uint64_t guid, const char *name)
1390199767f8SToomas Soome {
1391199767f8SToomas Soome spa_t *spa;
1392199767f8SToomas Soome
139302f11668SToomas Soome if ((spa = calloc(1, sizeof (spa_t))) == NULL)
1394edb35047SToomas Soome return (NULL);
1395edb35047SToomas Soome if ((spa->spa_name = strdup(name)) == NULL) {
1396edb35047SToomas Soome free(spa);
1397edb35047SToomas Soome return (NULL);
1398edb35047SToomas Soome }
1399199767f8SToomas Soome spa->spa_guid = guid;
1400da9bf005SToomas Soome spa->spa_root_vdev = vdev_create(guid, NULL);
1401da9bf005SToomas Soome if (spa->spa_root_vdev == NULL) {
1402da9bf005SToomas Soome free(spa->spa_name);
1403da9bf005SToomas Soome free(spa);
1404da9bf005SToomas Soome return (NULL);
1405da9bf005SToomas Soome }
1406da9bf005SToomas Soome spa->spa_root_vdev->v_name = strdup("root");
1407199767f8SToomas Soome STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link);
1408199767f8SToomas Soome
1409199767f8SToomas Soome return (spa);
1410199767f8SToomas Soome }
1411199767f8SToomas Soome
1412199767f8SToomas Soome static const char *
state_name(vdev_state_t state)1413199767f8SToomas Soome state_name(vdev_state_t state)
1414199767f8SToomas Soome {
14156fd7fa35SToomas Soome static const char *names[] = {
1416199767f8SToomas Soome "UNKNOWN",
1417199767f8SToomas Soome "CLOSED",
1418199767f8SToomas Soome "OFFLINE",
1419199767f8SToomas Soome "REMOVED",
1420199767f8SToomas Soome "CANT_OPEN",
1421199767f8SToomas Soome "FAULTED",
1422199767f8SToomas Soome "DEGRADED",
1423199767f8SToomas Soome "ONLINE"
1424199767f8SToomas Soome };
14256fd7fa35SToomas Soome return (names[state]);
1426199767f8SToomas Soome }
1427199767f8SToomas Soome
1428199767f8SToomas Soome static int
pager_printf(const char * fmt,...)1429199767f8SToomas Soome pager_printf(const char *fmt, ...)
1430199767f8SToomas Soome {
1431199767f8SToomas Soome char line[80];
1432199767f8SToomas Soome va_list args;
1433199767f8SToomas Soome
1434199767f8SToomas Soome va_start(args, fmt);
1435199767f8SToomas Soome vsnprintf(line, sizeof (line), fmt, args);
1436199767f8SToomas Soome va_end(args);
1437199767f8SToomas Soome return (pager_output(line));
1438199767f8SToomas Soome }
1439199767f8SToomas Soome
14406fd7fa35SToomas Soome #define STATUS_FORMAT " %s %s\n"
1441199767f8SToomas Soome
1442199767f8SToomas Soome static int
print_state(int indent,const char * name,vdev_state_t state)1443199767f8SToomas Soome print_state(int indent, const char *name, vdev_state_t state)
1444199767f8SToomas Soome {
1445199767f8SToomas Soome int i;
1446199767f8SToomas Soome char buf[512];
1447199767f8SToomas Soome
1448199767f8SToomas Soome buf[0] = 0;
1449199767f8SToomas Soome for (i = 0; i < indent; i++)
1450199767f8SToomas Soome strcat(buf, " ");
1451199767f8SToomas Soome strcat(buf, name);
1452199767f8SToomas Soome return (pager_printf(STATUS_FORMAT, buf, state_name(state)));
1453199767f8SToomas Soome }
1454199767f8SToomas Soome
1455199767f8SToomas Soome static int
vdev_status(vdev_t * vdev,int indent)1456199767f8SToomas Soome vdev_status(vdev_t *vdev, int indent)
1457199767f8SToomas Soome {
1458199767f8SToomas Soome vdev_t *kid;
1459199767f8SToomas Soome int ret;
146067806cd7SToomas Soome
146167806cd7SToomas Soome if (vdev->v_islog) {
14626fd7fa35SToomas Soome (void) pager_output(" logs\n");
146367806cd7SToomas Soome indent++;
146467806cd7SToomas Soome }
146567806cd7SToomas Soome
1466199767f8SToomas Soome ret = print_state(indent, vdev->v_name, vdev->v_state);
1467199767f8SToomas Soome if (ret != 0)
1468199767f8SToomas Soome return (ret);
1469199767f8SToomas Soome
1470199767f8SToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1471199767f8SToomas Soome ret = vdev_status(kid, indent + 1);
1472199767f8SToomas Soome if (ret != 0)
1473199767f8SToomas Soome return (ret);
1474199767f8SToomas Soome }
1475199767f8SToomas Soome return (ret);
1476199767f8SToomas Soome }
1477199767f8SToomas Soome
1478199767f8SToomas Soome static int
spa_status(spa_t * spa)1479199767f8SToomas Soome spa_status(spa_t *spa)
1480199767f8SToomas Soome {
1481199767f8SToomas Soome static char bootfs[ZFS_MAXNAMELEN];
1482199767f8SToomas Soome uint64_t rootid;
1483da9bf005SToomas Soome vdev_list_t *vlist;
1484199767f8SToomas Soome vdev_t *vdev;
1485199767f8SToomas Soome int good_kids, bad_kids, degraded_kids, ret;
1486199767f8SToomas Soome vdev_state_t state;
1487199767f8SToomas Soome
1488199767f8SToomas Soome ret = pager_printf(" pool: %s\n", spa->spa_name);
1489199767f8SToomas Soome if (ret != 0)
1490199767f8SToomas Soome return (ret);
1491199767f8SToomas Soome
1492199767f8SToomas Soome if (zfs_get_root(spa, &rootid) == 0 &&
1493199767f8SToomas Soome zfs_rlookup(spa, rootid, bootfs) == 0) {
1494199767f8SToomas Soome if (bootfs[0] == '\0')
1495199767f8SToomas Soome ret = pager_printf("bootfs: %s\n", spa->spa_name);
1496199767f8SToomas Soome else
1497199767f8SToomas Soome ret = pager_printf("bootfs: %s/%s\n", spa->spa_name,
1498199767f8SToomas Soome bootfs);
1499199767f8SToomas Soome if (ret != 0)
1500199767f8SToomas Soome return (ret);
1501199767f8SToomas Soome }
1502199767f8SToomas Soome ret = pager_printf("config:\n\n");
1503199767f8SToomas Soome if (ret != 0)
1504199767f8SToomas Soome return (ret);
1505199767f8SToomas Soome ret = pager_printf(STATUS_FORMAT, "NAME", "STATE");
1506199767f8SToomas Soome if (ret != 0)
1507199767f8SToomas Soome return (ret);
1508199767f8SToomas Soome
1509199767f8SToomas Soome good_kids = 0;
1510199767f8SToomas Soome degraded_kids = 0;
1511199767f8SToomas Soome bad_kids = 0;
1512da9bf005SToomas Soome vlist = &spa->spa_root_vdev->v_children;
1513da9bf005SToomas Soome STAILQ_FOREACH(vdev, vlist, v_childlink) {
1514199767f8SToomas Soome if (vdev->v_state == VDEV_STATE_HEALTHY)
1515199767f8SToomas Soome good_kids++;
1516199767f8SToomas Soome else if (vdev->v_state == VDEV_STATE_DEGRADED)
1517199767f8SToomas Soome degraded_kids++;
1518199767f8SToomas Soome else
1519199767f8SToomas Soome bad_kids++;
1520199767f8SToomas Soome }
1521199767f8SToomas Soome
1522199767f8SToomas Soome state = VDEV_STATE_CLOSED;
1523199767f8SToomas Soome if (good_kids > 0 && (degraded_kids + bad_kids) == 0)
1524199767f8SToomas Soome state = VDEV_STATE_HEALTHY;
1525199767f8SToomas Soome else if ((good_kids + degraded_kids) > 0)
1526199767f8SToomas Soome state = VDEV_STATE_DEGRADED;
1527199767f8SToomas Soome
1528199767f8SToomas Soome ret = print_state(0, spa->spa_name, state);
1529199767f8SToomas Soome if (ret != 0)
1530199767f8SToomas Soome return (ret);
1531da9bf005SToomas Soome
1532da9bf005SToomas Soome STAILQ_FOREACH(vdev, vlist, v_childlink) {
1533199767f8SToomas Soome ret = vdev_status(vdev, 1);
1534199767f8SToomas Soome if (ret != 0)
1535199767f8SToomas Soome return (ret);
1536199767f8SToomas Soome }
1537199767f8SToomas Soome return (ret);
1538199767f8SToomas Soome }
1539199767f8SToomas Soome
1540199767f8SToomas Soome int
spa_all_status(void)1541199767f8SToomas Soome spa_all_status(void)
1542199767f8SToomas Soome {
1543199767f8SToomas Soome spa_t *spa;
1544199767f8SToomas Soome int first = 1, ret = 0;
1545199767f8SToomas Soome
1546199767f8SToomas Soome STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1547199767f8SToomas Soome if (!first) {
1548199767f8SToomas Soome ret = pager_printf("\n");
1549199767f8SToomas Soome if (ret != 0)
1550199767f8SToomas Soome return (ret);
1551199767f8SToomas Soome }
1552199767f8SToomas Soome first = 0;
1553199767f8SToomas Soome ret = spa_status(spa);
1554199767f8SToomas Soome if (ret != 0)
1555199767f8SToomas Soome return (ret);
1556199767f8SToomas Soome }
1557199767f8SToomas Soome return (ret);
1558199767f8SToomas Soome }
1559199767f8SToomas Soome
1560edb35047SToomas Soome uint64_t
vdev_label_offset(uint64_t psize,int l,uint64_t offset)1561edb35047SToomas Soome vdev_label_offset(uint64_t psize, int l, uint64_t offset)
1562edb35047SToomas Soome {
1563edb35047SToomas Soome uint64_t label_offset;
1564edb35047SToomas Soome
1565edb35047SToomas Soome if (l < VDEV_LABELS / 2)
1566edb35047SToomas Soome label_offset = 0;
1567edb35047SToomas Soome else
1568edb35047SToomas Soome label_offset = psize - VDEV_LABELS * sizeof (vdev_label_t);
1569edb35047SToomas Soome
1570edb35047SToomas Soome return (offset + l * sizeof (vdev_label_t) + label_offset);
1571edb35047SToomas Soome }
1572edb35047SToomas Soome
1573199767f8SToomas Soome static int
vdev_uberblock_compare(const uberblock_t * ub1,const uberblock_t * ub2)15747dcf02b3SToomas Soome vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
15757dcf02b3SToomas Soome {
15767dcf02b3SToomas Soome unsigned int seq1 = 0;
15777dcf02b3SToomas Soome unsigned int seq2 = 0;
15787dcf02b3SToomas Soome int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
15797dcf02b3SToomas Soome
15807dcf02b3SToomas Soome if (cmp != 0)
15817dcf02b3SToomas Soome return (cmp);
15827dcf02b3SToomas Soome
15837dcf02b3SToomas Soome cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
15847dcf02b3SToomas Soome if (cmp != 0)
15857dcf02b3SToomas Soome return (cmp);
15867dcf02b3SToomas Soome
15877dcf02b3SToomas Soome if (MMP_VALID(ub1) && MMP_SEQ_VALID(ub1))
15887dcf02b3SToomas Soome seq1 = MMP_SEQ(ub1);
15897dcf02b3SToomas Soome
15907dcf02b3SToomas Soome if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2))
15917dcf02b3SToomas Soome seq2 = MMP_SEQ(ub2);
15927dcf02b3SToomas Soome
15937dcf02b3SToomas Soome return (AVL_CMP(seq1, seq2));
15947dcf02b3SToomas Soome }
15957dcf02b3SToomas Soome
15967dcf02b3SToomas Soome static int
uberblock_verify(uberblock_t * ub)15977dcf02b3SToomas Soome uberblock_verify(uberblock_t *ub)
15987dcf02b3SToomas Soome {
15997dcf02b3SToomas Soome if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC)) {
16007dcf02b3SToomas Soome byteswap_uint64_array(ub, sizeof (uberblock_t));
16017dcf02b3SToomas Soome }
16027dcf02b3SToomas Soome
16037dcf02b3SToomas Soome if (ub->ub_magic != UBERBLOCK_MAGIC ||
16047dcf02b3SToomas Soome !SPA_VERSION_IS_SUPPORTED(ub->ub_version))
16057dcf02b3SToomas Soome return (EINVAL);
16067dcf02b3SToomas Soome
16077dcf02b3SToomas Soome return (0);
16087dcf02b3SToomas Soome }
16097dcf02b3SToomas Soome
16107dcf02b3SToomas Soome static int
vdev_label_read(vdev_t * vd,int l,void * buf,uint64_t offset,size_t size)16117dcf02b3SToomas Soome vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset,
16127dcf02b3SToomas Soome size_t size)
1613199767f8SToomas Soome {
1614199767f8SToomas Soome blkptr_t bp;
16157dcf02b3SToomas Soome off_t off;
1616199767f8SToomas Soome
16177dcf02b3SToomas Soome off = vdev_label_offset(vd->v_psize, l, offset);
1618199767f8SToomas Soome
16197dcf02b3SToomas Soome BP_ZERO(&bp);
16207dcf02b3SToomas Soome BP_SET_LSIZE(&bp, size);
16217dcf02b3SToomas Soome BP_SET_PSIZE(&bp, size);
16227dcf02b3SToomas Soome BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
16237dcf02b3SToomas Soome BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
16247dcf02b3SToomas Soome DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
16257dcf02b3SToomas Soome ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
16266617bbf6SToomas Soome
16277dcf02b3SToomas Soome return (vdev_read_phys(vd, &bp, buf, off, size));
16287dcf02b3SToomas Soome }
16296617bbf6SToomas Soome
1630b713c91eSToomas Soome /*
1631b713c91eSToomas Soome * We do need to be sure we write to correct location.
1632b713c91eSToomas Soome * Our vdev label does consist of 4 fields:
1633b713c91eSToomas Soome * pad1 (8k), reserved.
1634b713c91eSToomas Soome * bootenv (8k), checksummed, previously reserved, may contain garbage.
1635b713c91eSToomas Soome * vdev_phys (112k), checksummed
1636b713c91eSToomas Soome * uberblock ring (128k), checksummed.
1637b713c91eSToomas Soome *
1638b713c91eSToomas Soome * Since bootenv area may contain garbage, we can not reliably read it, as
1639b713c91eSToomas Soome * we can get checksum errors.
1640b713c91eSToomas Soome * Next best thing is vdev_phys - it is just after bootenv. It still may
1641b713c91eSToomas Soome * be corrupted, but in such case we will miss this one write.
1642b713c91eSToomas Soome */
1643b713c91eSToomas Soome static int
vdev_label_write_validate(vdev_t * vd,int l,uint64_t offset)1644b713c91eSToomas Soome vdev_label_write_validate(vdev_t *vd, int l, uint64_t offset)
1645b713c91eSToomas Soome {
1646b713c91eSToomas Soome uint64_t off, o_phys;
1647b713c91eSToomas Soome void *buf;
1648b713c91eSToomas Soome size_t size = VDEV_PHYS_SIZE;
1649b713c91eSToomas Soome int rc;
1650b713c91eSToomas Soome
1651b713c91eSToomas Soome o_phys = offsetof(vdev_label_t, vl_vdev_phys);
1652b713c91eSToomas Soome off = vdev_label_offset(vd->v_psize, l, o_phys);
1653b713c91eSToomas Soome
1654b713c91eSToomas Soome /* off should be 8K from bootenv */
1655b713c91eSToomas Soome if (vdev_label_offset(vd->v_psize, l, offset) + VDEV_PAD_SIZE != off)
1656b713c91eSToomas Soome return (EINVAL);
1657b713c91eSToomas Soome
1658b713c91eSToomas Soome buf = malloc(size);
1659b713c91eSToomas Soome if (buf == NULL)
1660b713c91eSToomas Soome return (ENOMEM);
1661b713c91eSToomas Soome
1662b713c91eSToomas Soome /* Read vdev_phys */
1663b713c91eSToomas Soome rc = vdev_label_read(vd, l, buf, o_phys, size);
1664b713c91eSToomas Soome free(buf);
1665b713c91eSToomas Soome return (rc);
1666b713c91eSToomas Soome }
1667b713c91eSToomas Soome
1668b713c91eSToomas Soome static int
vdev_label_write(vdev_t * vd,int l,vdev_boot_envblock_t * be,uint64_t offset)1669b713c91eSToomas Soome vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset)
1670b713c91eSToomas Soome {
1671b713c91eSToomas Soome zio_checksum_info_t *ci;
1672b713c91eSToomas Soome zio_cksum_t cksum;
1673b713c91eSToomas Soome off_t off;
1674b713c91eSToomas Soome size_t size = VDEV_PAD_SIZE;
1675b713c91eSToomas Soome int rc;
1676b713c91eSToomas Soome
1677b713c91eSToomas Soome if (vd->v_phys_write == NULL)
1678b713c91eSToomas Soome return (ENOTSUP);
1679b713c91eSToomas Soome
1680b713c91eSToomas Soome off = vdev_label_offset(vd->v_psize, l, offset);
1681b713c91eSToomas Soome
1682b713c91eSToomas Soome rc = vdev_label_write_validate(vd, l, offset);
1683b713c91eSToomas Soome if (rc != 0) {
1684b713c91eSToomas Soome return (rc);
1685b713c91eSToomas Soome }
1686b713c91eSToomas Soome
1687b713c91eSToomas Soome ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
1688b713c91eSToomas Soome be->vbe_zbt.zec_magic = ZEC_MAGIC;
1689b713c91eSToomas Soome zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off);
1690b713c91eSToomas Soome ci->ci_func[0](be, size, NULL, &cksum);
1691b713c91eSToomas Soome be->vbe_zbt.zec_cksum = cksum;
1692b713c91eSToomas Soome
1693b713c91eSToomas Soome return (vdev_write_phys(vd, be, off, size));
1694b713c91eSToomas Soome }
1695b713c91eSToomas Soome
1696b713c91eSToomas Soome static int
vdev_write_bootenv_impl(vdev_t * vdev,vdev_boot_envblock_t * be)1697b713c91eSToomas Soome vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be)
1698b713c91eSToomas Soome {
1699b713c91eSToomas Soome vdev_t *kid;
1700*b7a4a577SWarner Losh int rv = 0, err;
1701b713c91eSToomas Soome
1702b713c91eSToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1703b713c91eSToomas Soome if (kid->v_state != VDEV_STATE_HEALTHY)
1704b713c91eSToomas Soome continue;
1705*b7a4a577SWarner Losh err = vdev_write_bootenv_impl(kid, be);
1706*b7a4a577SWarner Losh if (err != 0)
1707*b7a4a577SWarner Losh rv = err;
1708b713c91eSToomas Soome }
1709b713c91eSToomas Soome
1710b713c91eSToomas Soome /*
1711b713c91eSToomas Soome * Non-leaf vdevs do not have v_phys_write.
1712b713c91eSToomas Soome */
1713b713c91eSToomas Soome if (vdev->v_phys_write == NULL)
1714b713c91eSToomas Soome return (rv);
1715b713c91eSToomas Soome
1716b713c91eSToomas Soome for (int l = 0; l < VDEV_LABELS; l++) {
1717*b7a4a577SWarner Losh err = vdev_label_write(vdev, l, be,
1718b713c91eSToomas Soome offsetof(vdev_label_t, vl_be));
1719*b7a4a577SWarner Losh if (err != 0) {
1720b713c91eSToomas Soome printf("failed to write bootenv to %s label %d: %d\n",
1721*b7a4a577SWarner Losh vdev->v_name ? vdev->v_name : "unknown", l, err);
1722*b7a4a577SWarner Losh rv = err;
1723b713c91eSToomas Soome }
1724b713c91eSToomas Soome }
1725b713c91eSToomas Soome return (rv);
1726b713c91eSToomas Soome }
1727b713c91eSToomas Soome
1728b713c91eSToomas Soome int
vdev_write_bootenv(vdev_t * vdev,nvlist_t * nvl)1729b713c91eSToomas Soome vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl)
1730b713c91eSToomas Soome {
1731b713c91eSToomas Soome vdev_boot_envblock_t *be;
1732b713c91eSToomas Soome nvlist_t nv, *nvp;
1733b713c91eSToomas Soome uint64_t version;
1734b713c91eSToomas Soome int rv;
1735b713c91eSToomas Soome
1736b713c91eSToomas Soome if (nvl->nv_size > sizeof (be->vbe_bootenv))
1737b713c91eSToomas Soome return (E2BIG);
1738b713c91eSToomas Soome
1739b713c91eSToomas Soome version = VB_RAW;
1740b713c91eSToomas Soome nvp = vdev_read_bootenv(vdev);
1741b713c91eSToomas Soome if (nvp != NULL) {
1742b713c91eSToomas Soome nvlist_find(nvp, BOOTENV_VERSION, DATA_TYPE_UINT64, NULL,
1743b713c91eSToomas Soome &version, NULL);
1744b713c91eSToomas Soome nvlist_destroy(nvp);
1745b713c91eSToomas Soome }
1746b713c91eSToomas Soome
1747b713c91eSToomas Soome be = calloc(1, sizeof (*be));
1748b713c91eSToomas Soome if (be == NULL)
1749b713c91eSToomas Soome return (ENOMEM);
1750b713c91eSToomas Soome
1751b713c91eSToomas Soome be->vbe_version = version;
1752b713c91eSToomas Soome switch (version) {
1753b713c91eSToomas Soome case VB_RAW:
1754b713c91eSToomas Soome /*
1755b713c91eSToomas Soome * If there is no envmap, we will just wipe bootenv.
1756b713c91eSToomas Soome */
1757b713c91eSToomas Soome nvlist_find(nvl, GRUB_ENVMAP, DATA_TYPE_STRING, NULL,
1758b713c91eSToomas Soome be->vbe_bootenv, NULL);
1759b713c91eSToomas Soome rv = 0;
1760b713c91eSToomas Soome break;
1761b713c91eSToomas Soome
1762b713c91eSToomas Soome case VB_NVLIST:
1763b713c91eSToomas Soome nv.nv_header = nvl->nv_header;
1764b713c91eSToomas Soome nv.nv_asize = nvl->nv_asize;
1765b713c91eSToomas Soome nv.nv_size = nvl->nv_size;
1766b713c91eSToomas Soome
1767b713c91eSToomas Soome bcopy(&nv.nv_header, be->vbe_bootenv, sizeof (nv.nv_header));
1768b713c91eSToomas Soome nv.nv_data = (uint8_t *)be->vbe_bootenv + sizeof (nvs_header_t);
1769b713c91eSToomas Soome bcopy(nvl->nv_data, nv.nv_data, nv.nv_size);
1770b713c91eSToomas Soome rv = nvlist_export(&nv);
1771b713c91eSToomas Soome break;
1772b713c91eSToomas Soome
1773b713c91eSToomas Soome default:
1774b713c91eSToomas Soome rv = EINVAL;
1775b713c91eSToomas Soome break;
1776b713c91eSToomas Soome }
1777b713c91eSToomas Soome
1778b713c91eSToomas Soome if (rv == 0) {
1779b713c91eSToomas Soome be->vbe_version = htobe64(be->vbe_version);
1780b713c91eSToomas Soome rv = vdev_write_bootenv_impl(vdev, be);
1781b713c91eSToomas Soome }
1782b713c91eSToomas Soome free(be);
1783b713c91eSToomas Soome return (rv);
1784b713c91eSToomas Soome }
1785b713c91eSToomas Soome
1786b713c91eSToomas Soome /*
1787b713c91eSToomas Soome * Read the bootenv area from pool label, return the nvlist from it.
1788b713c91eSToomas Soome * We return from first successful read.
1789b713c91eSToomas Soome */
1790b713c91eSToomas Soome nvlist_t *
vdev_read_bootenv(vdev_t * vdev)1791b713c91eSToomas Soome vdev_read_bootenv(vdev_t *vdev)
1792b713c91eSToomas Soome {
1793b713c91eSToomas Soome vdev_t *kid;
1794b713c91eSToomas Soome nvlist_t *benv;
1795b713c91eSToomas Soome vdev_boot_envblock_t *be;
1796b713c91eSToomas Soome char *command;
1797b713c91eSToomas Soome bool ok;
1798b713c91eSToomas Soome int rv;
1799b713c91eSToomas Soome
1800b713c91eSToomas Soome STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1801b713c91eSToomas Soome if (kid->v_state != VDEV_STATE_HEALTHY)
1802b713c91eSToomas Soome continue;
1803b713c91eSToomas Soome
1804b713c91eSToomas Soome benv = vdev_read_bootenv(kid);
1805b713c91eSToomas Soome if (benv != NULL)
1806b713c91eSToomas Soome return (benv);
1807b713c91eSToomas Soome }
1808b713c91eSToomas Soome
1809b713c91eSToomas Soome be = malloc(sizeof (*be));
1810b713c91eSToomas Soome if (be == NULL)
1811b713c91eSToomas Soome return (NULL);
1812b713c91eSToomas Soome
1813b713c91eSToomas Soome rv = 0;
1814b713c91eSToomas Soome for (int l = 0; l < VDEV_LABELS; l++) {
1815b713c91eSToomas Soome rv = vdev_label_read(vdev, l, be,
1816b713c91eSToomas Soome offsetof(vdev_label_t, vl_be),
1817b713c91eSToomas Soome sizeof (*be));
1818b713c91eSToomas Soome if (rv == 0)
1819b713c91eSToomas Soome break;
1820b713c91eSToomas Soome }
1821b713c91eSToomas Soome if (rv != 0) {
1822b713c91eSToomas Soome free(be);
1823b713c91eSToomas Soome return (NULL);
1824b713c91eSToomas Soome }
1825b713c91eSToomas Soome
1826b713c91eSToomas Soome be->vbe_version = be64toh(be->vbe_version);
1827b713c91eSToomas Soome switch (be->vbe_version) {
1828b713c91eSToomas Soome case VB_RAW:
1829b713c91eSToomas Soome /*
1830b713c91eSToomas Soome * if we have textual data in vbe_bootenv, create nvlist
1831b713c91eSToomas Soome * with key "envmap".
1832b713c91eSToomas Soome */
1833b713c91eSToomas Soome benv = nvlist_create(NV_UNIQUE_NAME);
1834b713c91eSToomas Soome if (benv != NULL) {
1835b713c91eSToomas Soome if (*be->vbe_bootenv == '\0') {
1836b713c91eSToomas Soome nvlist_add_uint64(benv, BOOTENV_VERSION,
1837b713c91eSToomas Soome VB_NVLIST);
1838b713c91eSToomas Soome break;
1839b713c91eSToomas Soome }
1840b713c91eSToomas Soome nvlist_add_uint64(benv, BOOTENV_VERSION, VB_RAW);
1841b713c91eSToomas Soome be->vbe_bootenv[sizeof (be->vbe_bootenv) - 1] = '\0';
1842b713c91eSToomas Soome nvlist_add_string(benv, GRUB_ENVMAP, be->vbe_bootenv);
1843b713c91eSToomas Soome }
1844b713c91eSToomas Soome break;
1845b713c91eSToomas Soome
1846b713c91eSToomas Soome case VB_NVLIST:
1847b713c91eSToomas Soome benv = nvlist_import(be->vbe_bootenv, sizeof (be->vbe_bootenv));
1848b713c91eSToomas Soome break;
1849b713c91eSToomas Soome
1850b713c91eSToomas Soome default:
1851b713c91eSToomas Soome command = (char *)be;
1852b713c91eSToomas Soome ok = false;
1853b713c91eSToomas Soome
1854b713c91eSToomas Soome /* Check for legacy zfsbootcfg command string */
1855b713c91eSToomas Soome for (int i = 0; command[i] != '\0'; i++) {
1856b713c91eSToomas Soome if (iscntrl(command[i])) {
1857b713c91eSToomas Soome ok = false;
1858b713c91eSToomas Soome break;
1859b713c91eSToomas Soome } else {
1860b713c91eSToomas Soome ok = true;
1861b713c91eSToomas Soome }
1862b713c91eSToomas Soome }
1863b713c91eSToomas Soome benv = nvlist_create(NV_UNIQUE_NAME);
1864b713c91eSToomas Soome if (benv != NULL) {
1865b713c91eSToomas Soome if (ok)
1866b713c91eSToomas Soome nvlist_add_string(benv, FREEBSD_BOOTONCE,
1867b713c91eSToomas Soome command);
1868b713c91eSToomas Soome else
1869b713c91eSToomas Soome nvlist_add_uint64(benv, BOOTENV_VERSION,
1870b713c91eSToomas Soome VB_NVLIST);
1871b713c91eSToomas Soome }
1872b713c91eSToomas Soome break;
1873b713c91eSToomas Soome }
1874b713c91eSToomas Soome free(be);
1875b713c91eSToomas Soome return (benv);
1876b713c91eSToomas Soome }
1877b713c91eSToomas Soome
1878eb8e4816SToomas Soome static uint64_t
vdev_get_label_asize(nvlist_t * nvl)1879b713c91eSToomas Soome vdev_get_label_asize(nvlist_t *nvl)
1880eb8e4816SToomas Soome {
1881b713c91eSToomas Soome nvlist_t *vdevs;
1882eb8e4816SToomas Soome uint64_t asize;
1883eb8e4816SToomas Soome const char *type;
1884eb8e4816SToomas Soome int len;
1885eb8e4816SToomas Soome
1886eb8e4816SToomas Soome asize = 0;
1887eb8e4816SToomas Soome /* Get vdev tree */
1888eb8e4816SToomas Soome if (nvlist_find(nvl, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
1889eb8e4816SToomas Soome NULL, &vdevs, NULL) != 0)
1890eb8e4816SToomas Soome return (asize);
1891eb8e4816SToomas Soome
1892eb8e4816SToomas Soome /*
1893eb8e4816SToomas Soome * Get vdev type. We will calculate asize for raidz, mirror and disk.
1894eb8e4816SToomas Soome * For raidz, the asize is raw size of all children.
1895eb8e4816SToomas Soome */
1896eb8e4816SToomas Soome if (nvlist_find(vdevs, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING,
1897eb8e4816SToomas Soome NULL, &type, &len) != 0)
1898eb8e4816SToomas Soome goto done;
1899eb8e4816SToomas Soome
1900eb8e4816SToomas Soome if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 &&
1901eb8e4816SToomas Soome memcmp(type, VDEV_TYPE_DISK, len) != 0 &&
1902eb8e4816SToomas Soome memcmp(type, VDEV_TYPE_RAIDZ, len) != 0)
1903eb8e4816SToomas Soome goto done;
1904eb8e4816SToomas Soome
1905eb8e4816SToomas Soome if (nvlist_find(vdevs, ZPOOL_CONFIG_ASIZE, DATA_TYPE_UINT64,
1906eb8e4816SToomas Soome NULL, &asize, NULL) != 0)
1907eb8e4816SToomas Soome goto done;
1908eb8e4816SToomas Soome
1909eb8e4816SToomas Soome if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) {
1910b713c91eSToomas Soome nvlist_t **kids;
1911eb8e4816SToomas Soome int nkids;
1912eb8e4816SToomas Soome
1913eb8e4816SToomas Soome if (nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN,
1914eb8e4816SToomas Soome DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL) != 0) {
1915eb8e4816SToomas Soome asize = 0;
1916eb8e4816SToomas Soome goto done;
1917eb8e4816SToomas Soome }
1918eb8e4816SToomas Soome
1919eb8e4816SToomas Soome asize /= nkids;
1920b713c91eSToomas Soome for (int i = 0; i < nkids; i++)
1921b713c91eSToomas Soome nvlist_destroy(kids[i]);
1922b713c91eSToomas Soome free(kids);
1923eb8e4816SToomas Soome }
1924eb8e4816SToomas Soome
1925eb8e4816SToomas Soome asize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
1926eb8e4816SToomas Soome done:
1927eb8e4816SToomas Soome return (asize);
1928eb8e4816SToomas Soome }
1929eb8e4816SToomas Soome
1930b713c91eSToomas Soome static nvlist_t *
vdev_label_read_config(vdev_t * vd,uint64_t txg)19317dcf02b3SToomas Soome vdev_label_read_config(vdev_t *vd, uint64_t txg)
19327dcf02b3SToomas Soome {
19337dcf02b3SToomas Soome vdev_phys_t *label;
19347dcf02b3SToomas Soome uint64_t best_txg = 0;
19357dcf02b3SToomas Soome uint64_t label_txg = 0;
19367dcf02b3SToomas Soome uint64_t asize;
1937b713c91eSToomas Soome nvlist_t *nvl = NULL, *tmp;
19387dcf02b3SToomas Soome int error;
19397dcf02b3SToomas Soome
19407dcf02b3SToomas Soome label = malloc(sizeof (vdev_phys_t));
19417dcf02b3SToomas Soome if (label == NULL)
19427dcf02b3SToomas Soome return (NULL);
1943edb35047SToomas Soome
19447dcf02b3SToomas Soome for (int l = 0; l < VDEV_LABELS; l++) {
19457dcf02b3SToomas Soome if (vdev_label_read(vd, l, label,
19467dcf02b3SToomas Soome offsetof(vdev_label_t, vl_vdev_phys),
19477dcf02b3SToomas Soome sizeof (vdev_phys_t)))
1948edb35047SToomas Soome continue;
1949edb35047SToomas Soome
1950b713c91eSToomas Soome tmp = nvlist_import(label->vp_nvlist,
1951b713c91eSToomas Soome sizeof (label->vp_nvlist));
1952b713c91eSToomas Soome if (tmp == NULL)
1953edb35047SToomas Soome continue;
1954edb35047SToomas Soome
1955b713c91eSToomas Soome error = nvlist_find(tmp, ZPOOL_CONFIG_POOL_TXG,
1956ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &label_txg, NULL);
19577dcf02b3SToomas Soome if (error != 0 || label_txg == 0) {
1958b713c91eSToomas Soome nvlist_destroy(nvl);
1959b713c91eSToomas Soome nvl = tmp;
19607dcf02b3SToomas Soome goto done;
19617dcf02b3SToomas Soome }
19624c2b14fdSToomas Soome
19637dcf02b3SToomas Soome if (label_txg <= txg && label_txg > best_txg) {
19647dcf02b3SToomas Soome best_txg = label_txg;
1965b713c91eSToomas Soome nvlist_destroy(nvl);
1966b713c91eSToomas Soome nvl = tmp;
1967b713c91eSToomas Soome tmp = NULL;
19684c2b14fdSToomas Soome
19694c2b14fdSToomas Soome /*
19704c2b14fdSToomas Soome * Use asize from pool config. We need this
19714c2b14fdSToomas Soome * because we can get bad value from BIOS.
19724c2b14fdSToomas Soome */
1973eb8e4816SToomas Soome asize = vdev_get_label_asize(nvl);
1974eb8e4816SToomas Soome if (asize != 0) {
1975eb8e4816SToomas Soome vd->v_psize = asize;
19764c2b14fdSToomas Soome }
1977edb35047SToomas Soome }
1978b713c91eSToomas Soome nvlist_destroy(tmp);
1979199767f8SToomas Soome }
1980199767f8SToomas Soome
19817dcf02b3SToomas Soome if (best_txg == 0) {
1982b713c91eSToomas Soome nvlist_destroy(nvl);
19837dcf02b3SToomas Soome nvl = NULL;
19847dcf02b3SToomas Soome }
19857dcf02b3SToomas Soome done:
19867dcf02b3SToomas Soome free(label);
19877dcf02b3SToomas Soome return (nvl);
19887dcf02b3SToomas Soome }
19897dcf02b3SToomas Soome
19907dcf02b3SToomas Soome static void
vdev_uberblock_load(vdev_t * vd,uberblock_t * ub)19917dcf02b3SToomas Soome vdev_uberblock_load(vdev_t *vd, uberblock_t *ub)
19927dcf02b3SToomas Soome {
19937dcf02b3SToomas Soome uberblock_t *buf;
19947dcf02b3SToomas Soome
19957dcf02b3SToomas Soome buf = malloc(VDEV_UBERBLOCK_SIZE(vd));
19967dcf02b3SToomas Soome if (buf == NULL)
19977dcf02b3SToomas Soome return;
19987dcf02b3SToomas Soome
19997dcf02b3SToomas Soome for (int l = 0; l < VDEV_LABELS; l++) {
20007dcf02b3SToomas Soome for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
20017dcf02b3SToomas Soome if (vdev_label_read(vd, l, buf,
20027dcf02b3SToomas Soome VDEV_UBERBLOCK_OFFSET(vd, n),
20037dcf02b3SToomas Soome VDEV_UBERBLOCK_SIZE(vd)))
20047dcf02b3SToomas Soome continue;
20057dcf02b3SToomas Soome if (uberblock_verify(buf) != 0)
20067dcf02b3SToomas Soome continue;
20077dcf02b3SToomas Soome
20087dcf02b3SToomas Soome if (vdev_uberblock_compare(buf, ub) > 0)
20097dcf02b3SToomas Soome *ub = *buf;
20107dcf02b3SToomas Soome }
20117dcf02b3SToomas Soome }
20127dcf02b3SToomas Soome free(buf);
20137dcf02b3SToomas Soome }
20147dcf02b3SToomas Soome
20157dcf02b3SToomas Soome static int
vdev_probe(vdev_phys_read_t * _read,vdev_phys_write_t * _write,void * priv,spa_t ** spap)2016b713c91eSToomas Soome vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
2017b713c91eSToomas Soome spa_t **spap)
20187dcf02b3SToomas Soome {
20197dcf02b3SToomas Soome vdev_t vtmp;
20207dcf02b3SToomas Soome spa_t *spa;
2021da9bf005SToomas Soome vdev_t *vdev;
2022b713c91eSToomas Soome nvlist_t *nvl;
20237dcf02b3SToomas Soome uint64_t val;
2024da9bf005SToomas Soome uint64_t guid, vdev_children;
20257dcf02b3SToomas Soome uint64_t pool_txg, pool_guid;
20267dcf02b3SToomas Soome const char *pool_name;
2027ce5f7fb8SToomas Soome int rc, namelen;
20287dcf02b3SToomas Soome
20297dcf02b3SToomas Soome /*
20307dcf02b3SToomas Soome * Load the vdev label and figure out which
20317dcf02b3SToomas Soome * uberblock is most current.
20327dcf02b3SToomas Soome */
20337dcf02b3SToomas Soome memset(&vtmp, 0, sizeof (vtmp));
2034b713c91eSToomas Soome vtmp.v_phys_read = _read;
2035b713c91eSToomas Soome vtmp.v_phys_write = _write;
2036b713c91eSToomas Soome vtmp.v_priv = priv;
2037b713c91eSToomas Soome vtmp.v_psize = P2ALIGN(ldi_get_size(priv),
20387dcf02b3SToomas Soome (uint64_t)sizeof (vdev_label_t));
2039edb35047SToomas Soome
20407dcf02b3SToomas Soome /* Test for minimum device size. */
20417dcf02b3SToomas Soome if (vtmp.v_psize < SPA_MINDEVSIZE)
20426617bbf6SToomas Soome return (EIO);
20436617bbf6SToomas Soome
2044b713c91eSToomas Soome nvl = vdev_label_read_config(&vtmp, UINT64_MAX);
2045b713c91eSToomas Soome if (nvl == NULL)
2046edb35047SToomas Soome return (EIO);
2047edb35047SToomas Soome
2048b713c91eSToomas Soome if (nvlist_find(nvl, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
2049ce5f7fb8SToomas Soome NULL, &val, NULL) != 0) {
2050b713c91eSToomas Soome nvlist_destroy(nvl);
2051199767f8SToomas Soome return (EIO);
2052199767f8SToomas Soome }
2053199767f8SToomas Soome
2054199767f8SToomas Soome if (!SPA_VERSION_IS_SUPPORTED(val)) {
2055199767f8SToomas Soome printf("ZFS: unsupported ZFS version %u (should be %u)\n",
20566fd7fa35SToomas Soome (unsigned)val, (unsigned)SPA_VERSION);
2057b713c91eSToomas Soome nvlist_destroy(nvl);
2058199767f8SToomas Soome return (EIO);
2059199767f8SToomas Soome }
2060199767f8SToomas Soome
2061199767f8SToomas Soome /* Check ZFS features for read */
2062b713c91eSToomas Soome rc = nvlist_check_features_for_read(nvl);
2063b713c91eSToomas Soome if (rc != 0) {
2064b713c91eSToomas Soome nvlist_destroy(nvl);
2065199767f8SToomas Soome return (EIO);
2066edb35047SToomas Soome }
2067199767f8SToomas Soome
2068b713c91eSToomas Soome if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
2069ce5f7fb8SToomas Soome NULL, &val, NULL) != 0) {
2070b713c91eSToomas Soome nvlist_destroy(nvl);
2071199767f8SToomas Soome return (EIO);
2072199767f8SToomas Soome }
2073199767f8SToomas Soome
2074199767f8SToomas Soome if (val == POOL_STATE_DESTROYED) {
2075199767f8SToomas Soome /* We don't boot only from destroyed pools. */
2076b713c91eSToomas Soome nvlist_destroy(nvl);
2077199767f8SToomas Soome return (EIO);
2078199767f8SToomas Soome }
2079199767f8SToomas Soome
2080b713c91eSToomas Soome if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
2081ce5f7fb8SToomas Soome NULL, &pool_txg, NULL) != 0 ||
2082b713c91eSToomas Soome nvlist_find(nvl, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
2083ce5f7fb8SToomas Soome NULL, &pool_guid, NULL) != 0 ||
2084b713c91eSToomas Soome nvlist_find(nvl, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING,
2085ce5f7fb8SToomas Soome NULL, &pool_name, &namelen) != 0) {
2086199767f8SToomas Soome /*
2087199767f8SToomas Soome * Cache and spare devices end up here - just ignore
2088199767f8SToomas Soome * them.
2089199767f8SToomas Soome */
2090b713c91eSToomas Soome nvlist_destroy(nvl);
2091199767f8SToomas Soome return (EIO);
2092199767f8SToomas Soome }
2093199767f8SToomas Soome
2094199767f8SToomas Soome /*
2095199767f8SToomas Soome * Create the pool if this is the first time we've seen it.
2096199767f8SToomas Soome */
2097199767f8SToomas Soome spa = spa_find_by_guid(pool_guid);
2098edb35047SToomas Soome if (spa == NULL) {
2099ce5f7fb8SToomas Soome char *name;
2100ce5f7fb8SToomas Soome
2101b713c91eSToomas Soome nvlist_find(nvl, ZPOOL_CONFIG_VDEV_CHILDREN,
2102ce5f7fb8SToomas Soome DATA_TYPE_UINT64, NULL, &vdev_children, NULL);
2103ce5f7fb8SToomas Soome name = malloc(namelen + 1);
2104ce5f7fb8SToomas Soome if (name == NULL) {
2105b713c91eSToomas Soome nvlist_destroy(nvl);
2106ce5f7fb8SToomas Soome return (ENOMEM);
2107ce5f7fb8SToomas Soome }
2108ce5f7fb8SToomas Soome bcopy(pool_name, name, namelen);
2109ce5f7fb8SToomas Soome name[namelen] = '\0';
2110ce5f7fb8SToomas Soome spa = spa_create(pool_guid, name);
2111ce5f7fb8SToomas Soome free(name);
21127dcf02b3SToomas Soome if (spa == NULL) {
2113b713c91eSToomas Soome nvlist_destroy(nvl);
2114edb35047SToomas Soome return (ENOMEM);
21157dcf02b3SToomas Soome }
2116da9bf005SToomas Soome spa->spa_root_vdev->v_nchildren = vdev_children;
2117199767f8SToomas Soome }
2118da9bf005SToomas Soome if (pool_txg > spa->spa_txg)
2119199767f8SToomas Soome spa->spa_txg = pool_txg;
2120199767f8SToomas Soome
2121199767f8SToomas Soome /*
2122199767f8SToomas Soome * Get the vdev tree and create our in-core copy of it.
2123199767f8SToomas Soome * If we already have a vdev with this guid, this must
2124199767f8SToomas Soome * be some kind of alias (overlapping slices, dangerously dedicated
2125199767f8SToomas Soome * disks etc).
2126199767f8SToomas Soome */
2127b713c91eSToomas Soome if (nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
2128ce5f7fb8SToomas Soome NULL, &guid, NULL) != 0) {
2129b713c91eSToomas Soome nvlist_destroy(nvl);
2130199767f8SToomas Soome return (EIO);
2131199767f8SToomas Soome }
2132199767f8SToomas Soome vdev = vdev_find(guid);
21337dcf02b3SToomas Soome /* Has this vdev already been inited? */
21347dcf02b3SToomas Soome if (vdev && vdev->v_phys_read) {
2135b713c91eSToomas Soome nvlist_destroy(nvl);
2136199767f8SToomas Soome return (EIO);
21377dcf02b3SToomas Soome }
2138199767f8SToomas Soome
2139b713c91eSToomas Soome rc = vdev_init_from_label(spa, nvl);
2140b713c91eSToomas Soome nvlist_destroy(nvl);
2141edb35047SToomas Soome if (rc != 0)
2142199767f8SToomas Soome return (rc);
2143199767f8SToomas Soome
2144199767f8SToomas Soome /*
2145199767f8SToomas Soome * We should already have created an incomplete vdev for this
2146199767f8SToomas Soome * vdev. Find it and initialise it with our read proc.
2147199767f8SToomas Soome */
2148199767f8SToomas Soome vdev = vdev_find(guid);
2149da9bf005SToomas Soome if (vdev != NULL) {
2150b713c91eSToomas Soome vdev->v_phys_read = _read;
2151b713c91eSToomas Soome vdev->v_phys_write = _write;
2152b713c91eSToomas Soome vdev->v_priv = priv;
21534c2b14fdSToomas Soome vdev->v_psize = vtmp.v_psize;
2154da9bf005SToomas Soome /*
2155da9bf005SToomas Soome * If no other state is set, mark vdev healthy.
2156da9bf005SToomas Soome */
2157da9bf005SToomas Soome if (vdev->v_state == VDEV_STATE_UNKNOWN)
2158da9bf005SToomas Soome vdev->v_state = VDEV_STATE_HEALTHY;
2159199767f8SToomas Soome } else {
2160199767f8SToomas Soome printf("ZFS: inconsistent nvlist contents\n");
2161199767f8SToomas Soome return (EIO);
2162199767f8SToomas Soome }
2163199767f8SToomas Soome
216467806cd7SToomas Soome if (vdev->v_islog)
216567806cd7SToomas Soome spa->spa_with_log = vdev->v_islog;
216667806cd7SToomas Soome
216745137058SToomas Soome /* Record boot vdev for spa. */
2168da9bf005SToomas Soome if (spa->spa_boot_vdev == NULL)
216945137058SToomas Soome spa->spa_boot_vdev = vdev;
217045137058SToomas Soome
2171199767f8SToomas Soome /*
2172199767f8SToomas Soome * Re-evaluate top-level vdev state.
2173199767f8SToomas Soome */
2174da9bf005SToomas Soome vdev_set_state(vdev->v_top);
2175199767f8SToomas Soome
2176199767f8SToomas Soome /*
2177199767f8SToomas Soome * Ok, we are happy with the pool so far. Lets find
2178199767f8SToomas Soome * the best uberblock and then we can actually access
2179199767f8SToomas Soome * the contents of the pool.
2180199767f8SToomas Soome */
21817dcf02b3SToomas Soome vdev_uberblock_load(vdev, &spa->spa_uberblock);
2182199767f8SToomas Soome
2183edb35047SToomas Soome if (spap != NULL)
2184199767f8SToomas Soome *spap = spa;
2185199767f8SToomas Soome return (0);
2186199767f8SToomas Soome }
2187199767f8SToomas Soome
2188199767f8SToomas Soome static int
ilog2(int n)2189199767f8SToomas Soome ilog2(int n)
2190199767f8SToomas Soome {
2191199767f8SToomas Soome int v;
2192199767f8SToomas Soome
2193199767f8SToomas Soome for (v = 0; v < 32; v++)
2194199767f8SToomas Soome if (n == (1 << v))
21956fd7fa35SToomas Soome return (v);
21966fd7fa35SToomas Soome return (-1);
2197199767f8SToomas Soome }
2198199767f8SToomas Soome
2199199767f8SToomas Soome static int
zio_read_gang(const spa_t * spa,const blkptr_t * bp,void * buf)2200199767f8SToomas Soome zio_read_gang(const spa_t *spa, const blkptr_t *bp, void *buf)
2201199767f8SToomas Soome {
2202199767f8SToomas Soome blkptr_t gbh_bp;
2203199767f8SToomas Soome zio_gbh_phys_t zio_gb;
2204199767f8SToomas Soome char *pbuf;
2205199767f8SToomas Soome int i;
2206199767f8SToomas Soome
2207199767f8SToomas Soome /* Artificial BP for gang block header. */
2208199767f8SToomas Soome gbh_bp = *bp;
2209199767f8SToomas Soome BP_SET_PSIZE(&gbh_bp, SPA_GANGBLOCKSIZE);
2210199767f8SToomas Soome BP_SET_LSIZE(&gbh_bp, SPA_GANGBLOCKSIZE);
2211199767f8SToomas Soome BP_SET_CHECKSUM(&gbh_bp, ZIO_CHECKSUM_GANG_HEADER);
2212199767f8SToomas Soome BP_SET_COMPRESS(&gbh_bp, ZIO_COMPRESS_OFF);
2213199767f8SToomas Soome for (i = 0; i < SPA_DVAS_PER_BP; i++)
2214199767f8SToomas Soome DVA_SET_GANG(&gbh_bp.blk_dva[i], 0);
2215199767f8SToomas Soome
2216199767f8SToomas Soome /* Read gang header block using the artificial BP. */
2217199767f8SToomas Soome if (zio_read(spa, &gbh_bp, &zio_gb))
2218199767f8SToomas Soome return (EIO);
2219199767f8SToomas Soome
2220199767f8SToomas Soome pbuf = buf;
2221199767f8SToomas Soome for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
2222199767f8SToomas Soome blkptr_t *gbp = &zio_gb.zg_blkptr[i];
2223199767f8SToomas Soome
2224199767f8SToomas Soome if (BP_IS_HOLE(gbp))
2225199767f8SToomas Soome continue;
2226199767f8SToomas Soome if (zio_read(spa, gbp, pbuf))
2227199767f8SToomas Soome return (EIO);
2228199767f8SToomas Soome pbuf += BP_GET_PSIZE(gbp);
2229199767f8SToomas Soome }
2230199767f8SToomas Soome
22314a04e8dbSToomas Soome if (zio_checksum_verify(spa, bp, buf))
2232199767f8SToomas Soome return (EIO);
2233199767f8SToomas Soome return (0);
2234199767f8SToomas Soome }
2235199767f8SToomas Soome
2236199767f8SToomas Soome static int
zio_read(const spa_t * spa,const blkptr_t * bp,void * buf)2237199767f8SToomas Soome zio_read(const spa_t *spa, const blkptr_t *bp, void *buf)
2238199767f8SToomas Soome {
2239199767f8SToomas Soome int cpfunc = BP_GET_COMPRESS(bp);
2240199767f8SToomas Soome uint64_t align, size;
2241199767f8SToomas Soome void *pbuf;
2242199767f8SToomas Soome int i, error;
2243199767f8SToomas Soome
2244199767f8SToomas Soome /*
2245199767f8SToomas Soome * Process data embedded in block pointer
2246199767f8SToomas Soome */
2247199767f8SToomas Soome if (BP_IS_EMBEDDED(bp)) {
2248199767f8SToomas Soome ASSERT(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
2249199767f8SToomas Soome
2250199767f8SToomas Soome size = BPE_GET_PSIZE(bp);
2251199767f8SToomas Soome ASSERT(size <= BPE_PAYLOAD_SIZE);
2252199767f8SToomas Soome
2253199767f8SToomas Soome if (cpfunc != ZIO_COMPRESS_OFF)
22543e8c7f16SToomas Soome pbuf = malloc(size);
2255199767f8SToomas Soome else
2256199767f8SToomas Soome pbuf = buf;
2257199767f8SToomas Soome
22583e8c7f16SToomas Soome if (pbuf == NULL)
22593e8c7f16SToomas Soome return (ENOMEM);
22603e8c7f16SToomas Soome
2261199767f8SToomas Soome decode_embedded_bp_compressed(bp, pbuf);
2262199767f8SToomas Soome error = 0;
2263199767f8SToomas Soome
2264199767f8SToomas Soome if (cpfunc != ZIO_COMPRESS_OFF) {
2265199767f8SToomas Soome error = zio_decompress_data(cpfunc, pbuf,
2266199767f8SToomas Soome size, buf, BP_GET_LSIZE(bp));
22673e8c7f16SToomas Soome free(pbuf);
2268199767f8SToomas Soome }
2269199767f8SToomas Soome if (error != 0)
22706fd7fa35SToomas Soome printf("ZFS: i/o error - unable to decompress "
22716fd7fa35SToomas Soome "block pointer data, error %d\n", error);
2272199767f8SToomas Soome return (error);
2273199767f8SToomas Soome }
2274199767f8SToomas Soome
2275199767f8SToomas Soome error = EIO;
2276199767f8SToomas Soome
2277199767f8SToomas Soome for (i = 0; i < SPA_DVAS_PER_BP; i++) {
2278199767f8SToomas Soome const dva_t *dva = &bp->blk_dva[i];
2279199767f8SToomas Soome vdev_t *vdev;
2280da9bf005SToomas Soome vdev_list_t *vlist;
2281da9bf005SToomas Soome uint64_t vdevid;
2282199767f8SToomas Soome off_t offset;
2283199767f8SToomas Soome
2284199767f8SToomas Soome if (!dva->dva_word[0] && !dva->dva_word[1])
2285199767f8SToomas Soome continue;
2286199767f8SToomas Soome
2287199767f8SToomas Soome vdevid = DVA_GET_VDEV(dva);
2288199767f8SToomas Soome offset = DVA_GET_OFFSET(dva);
2289da9bf005SToomas Soome vlist = &spa->spa_root_vdev->v_children;
2290da9bf005SToomas Soome STAILQ_FOREACH(vdev, vlist, v_childlink) {
2291199767f8SToomas Soome if (vdev->v_id == vdevid)
2292199767f8SToomas Soome break;
2293199767f8SToomas Soome }
2294199767f8SToomas Soome if (!vdev || !vdev->v_read)
2295199767f8SToomas Soome continue;
2296199767f8SToomas Soome
2297199767f8SToomas Soome size = BP_GET_PSIZE(bp);
2298199767f8SToomas Soome if (vdev->v_read == vdev_raidz_read) {
2299da9bf005SToomas Soome align = 1ULL << vdev->v_ashift;
2300199767f8SToomas Soome if (P2PHASE(size, align) != 0)
2301199767f8SToomas Soome size = P2ROUNDUP(size, align);
2302199767f8SToomas Soome }
2303199767f8SToomas Soome if (size != BP_GET_PSIZE(bp) || cpfunc != ZIO_COMPRESS_OFF)
23043e8c7f16SToomas Soome pbuf = malloc(size);
2305199767f8SToomas Soome else
2306199767f8SToomas Soome pbuf = buf;
2307199767f8SToomas Soome
23083e8c7f16SToomas Soome if (pbuf == NULL) {
23093e8c7f16SToomas Soome error = ENOMEM;
23103e8c7f16SToomas Soome break;
23113e8c7f16SToomas Soome }
23123e8c7f16SToomas Soome
2313199767f8SToomas Soome if (DVA_GET_GANG(dva))
2314199767f8SToomas Soome error = zio_read_gang(spa, bp, pbuf);
2315199767f8SToomas Soome else
2316199767f8SToomas Soome error = vdev->v_read(vdev, bp, pbuf, offset, size);
2317199767f8SToomas Soome if (error == 0) {
2318199767f8SToomas Soome if (cpfunc != ZIO_COMPRESS_OFF)
2319199767f8SToomas Soome error = zio_decompress_data(cpfunc, pbuf,
2320199767f8SToomas Soome BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp));
2321199767f8SToomas Soome else if (size != BP_GET_PSIZE(bp))
2322199767f8SToomas Soome bcopy(pbuf, buf, BP_GET_PSIZE(bp));
2323199767f8SToomas Soome }
2324199767f8SToomas Soome if (buf != pbuf)
23253e8c7f16SToomas Soome free(pbuf);
2326199767f8SToomas Soome if (error == 0)
2327199767f8SToomas Soome break;
2328199767f8SToomas Soome }
2329199767f8SToomas Soome if (error != 0)
2330199767f8SToomas Soome printf("ZFS: i/o error - all block copies unavailable\n");
23313e8c7f16SToomas Soome
2332199767f8SToomas Soome return (error);
2333199767f8SToomas Soome }
2334199767f8SToomas Soome
2335199767f8SToomas Soome static int
dnode_read(const spa_t * spa,const dnode_phys_t * dnode,off_t offset,void * buf,size_t buflen)23366fd7fa35SToomas Soome dnode_read(const spa_t *spa, const dnode_phys_t *dnode, off_t offset,
23376fd7fa35SToomas Soome void *buf, size_t buflen)
2338199767f8SToomas Soome {
2339199767f8SToomas Soome int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
2340199767f8SToomas Soome int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2341199767f8SToomas Soome int nlevels = dnode->dn_nlevels;
2342199767f8SToomas Soome int i, rc;
2343199767f8SToomas Soome
2344199767f8SToomas Soome if (bsize > SPA_MAXBLOCKSIZE) {
2345199767f8SToomas Soome printf("ZFS: I/O error - blocks larger than %llu are not "
2346199767f8SToomas Soome "supported\n", SPA_MAXBLOCKSIZE);
2347199767f8SToomas Soome return (EIO);
2348199767f8SToomas Soome }
2349199767f8SToomas Soome
235077570342SKyle Evans /*
235177570342SKyle Evans * Handle odd block sizes, mirrors dmu_read_impl(). Data can't exist
235277570342SKyle Evans * past the first block, so we'll clip the read to the portion of the
235377570342SKyle Evans * buffer within bsize and zero out the remainder.
235477570342SKyle Evans */
235577570342SKyle Evans if (dnode->dn_maxblkid == 0) {
235677570342SKyle Evans size_t newbuflen;
235777570342SKyle Evans
235877570342SKyle Evans newbuflen = offset > bsize ? 0 : MIN(buflen, bsize - offset);
235977570342SKyle Evans bzero((char *)buf + newbuflen, buflen - newbuflen);
236077570342SKyle Evans buflen = newbuflen;
236177570342SKyle Evans }
236277570342SKyle Evans
2363199767f8SToomas Soome /*
2364199767f8SToomas Soome * Note: bsize may not be a power of two here so we need to do an
2365199767f8SToomas Soome * actual divide rather than a bitshift.
2366199767f8SToomas Soome */
2367199767f8SToomas Soome while (buflen > 0) {
2368199767f8SToomas Soome uint64_t bn = offset / bsize;
2369199767f8SToomas Soome int boff = offset % bsize;
2370199767f8SToomas Soome int ibn;
2371199767f8SToomas Soome const blkptr_t *indbp;
2372199767f8SToomas Soome blkptr_t bp;
2373199767f8SToomas Soome
2374199767f8SToomas Soome if (bn > dnode->dn_maxblkid) {
2375199767f8SToomas Soome printf("warning: zfs bug: bn %llx > dn_maxblkid %llx\n",
2376199767f8SToomas Soome (unsigned long long)bn,
2377199767f8SToomas Soome (unsigned long long)dnode->dn_maxblkid);
2378199767f8SToomas Soome /*
2379199767f8SToomas Soome * zfs bug, will not return error
2380199767f8SToomas Soome * return (EIO);
2381199767f8SToomas Soome */
2382199767f8SToomas Soome }
2383199767f8SToomas Soome
2384199767f8SToomas Soome if (dnode == dnode_cache_obj && bn == dnode_cache_bn)
2385199767f8SToomas Soome goto cached;
2386199767f8SToomas Soome
2387199767f8SToomas Soome indbp = dnode->dn_blkptr;
2388199767f8SToomas Soome for (i = 0; i < nlevels; i++) {
2389199767f8SToomas Soome /*
2390199767f8SToomas Soome * Copy the bp from the indirect array so that
2391199767f8SToomas Soome * we can re-use the scratch buffer for multi-level
2392199767f8SToomas Soome * objects.
2393199767f8SToomas Soome */
2394199767f8SToomas Soome ibn = bn >> ((nlevels - i - 1) * ibshift);
2395199767f8SToomas Soome ibn &= ((1 << ibshift) - 1);
2396199767f8SToomas Soome bp = indbp[ibn];
2397199767f8SToomas Soome if (BP_IS_HOLE(&bp)) {
2398199767f8SToomas Soome memset(dnode_cache_buf, 0, bsize);
2399199767f8SToomas Soome break;
2400199767f8SToomas Soome }
2401199767f8SToomas Soome rc = zio_read(spa, &bp, dnode_cache_buf);
2402199767f8SToomas Soome if (rc)
2403199767f8SToomas Soome return (rc);
2404199767f8SToomas Soome indbp = (const blkptr_t *) dnode_cache_buf;
2405199767f8SToomas Soome }
2406199767f8SToomas Soome dnode_cache_obj = dnode;
2407199767f8SToomas Soome dnode_cache_bn = bn;
2408199767f8SToomas Soome cached:
2409199767f8SToomas Soome
2410199767f8SToomas Soome /*
2411199767f8SToomas Soome * The buffer contains our data block. Copy what we
2412199767f8SToomas Soome * need from it and loop.
241342b4b09eSAndy Fiddaman */
2414199767f8SToomas Soome i = bsize - boff;
2415199767f8SToomas Soome if (i > buflen) i = buflen;
2416199767f8SToomas Soome memcpy(buf, &dnode_cache_buf[boff], i);
24176fd7fa35SToomas Soome buf = ((char *)buf) + i;
2418199767f8SToomas Soome offset += i;
2419199767f8SToomas Soome buflen -= i;
2420199767f8SToomas Soome }
2421199767f8SToomas Soome
2422199767f8SToomas Soome return (0);
2423199767f8SToomas Soome }
2424199767f8SToomas Soome
2425199767f8SToomas Soome /*
2426fdefee4cSToomas Soome * Lookup a value in a microzap directory.
2427199767f8SToomas Soome */
2428199767f8SToomas Soome static int
mzap_lookup(const mzap_phys_t * mz,size_t size,const char * name,uint64_t * value)2429fdefee4cSToomas Soome mzap_lookup(const mzap_phys_t *mz, size_t size, const char *name,
2430fdefee4cSToomas Soome uint64_t *value)
2431199767f8SToomas Soome {
2432199767f8SToomas Soome const mzap_ent_phys_t *mze;
2433199767f8SToomas Soome int chunks, i;
2434199767f8SToomas Soome
2435199767f8SToomas Soome /*
2436199767f8SToomas Soome * Microzap objects use exactly one block. Read the whole
2437199767f8SToomas Soome * thing.
2438199767f8SToomas Soome */
2439199767f8SToomas Soome chunks = size / MZAP_ENT_LEN - 1;
2440199767f8SToomas Soome for (i = 0; i < chunks; i++) {
2441199767f8SToomas Soome mze = &mz->mz_chunk[i];
24426fd7fa35SToomas Soome if (strcmp(mze->mze_name, name) == 0) {
2443199767f8SToomas Soome *value = mze->mze_value;
2444199767f8SToomas Soome return (0);
2445199767f8SToomas Soome }
2446199767f8SToomas Soome }
2447199767f8SToomas Soome
2448199767f8SToomas Soome return (ENOENT);
2449199767f8SToomas Soome }
2450199767f8SToomas Soome
2451199767f8SToomas Soome /*
2452199767f8SToomas Soome * Compare a name with a zap leaf entry. Return non-zero if the name
2453199767f8SToomas Soome * matches.
2454199767f8SToomas Soome */
2455199767f8SToomas Soome static int
fzap_name_equal(const zap_leaf_t * zl,const zap_leaf_chunk_t * zc,const char * name)24566fd7fa35SToomas Soome fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc,
24576fd7fa35SToomas Soome const char *name)
2458199767f8SToomas Soome {
2459199767f8SToomas Soome size_t namelen;
2460199767f8SToomas Soome const zap_leaf_chunk_t *nc;
2461199767f8SToomas Soome const char *p;
2462199767f8SToomas Soome
2463199767f8SToomas Soome namelen = zc->l_entry.le_name_numints;
246442b4b09eSAndy Fiddaman
2465199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
2466199767f8SToomas Soome p = name;
2467199767f8SToomas Soome while (namelen > 0) {
2468199767f8SToomas Soome size_t len;
24696fd7fa35SToomas Soome
2470199767f8SToomas Soome len = namelen;
2471199767f8SToomas Soome if (len > ZAP_LEAF_ARRAY_BYTES)
2472199767f8SToomas Soome len = ZAP_LEAF_ARRAY_BYTES;
2473199767f8SToomas Soome if (memcmp(p, nc->l_array.la_array, len))
2474199767f8SToomas Soome return (0);
2475199767f8SToomas Soome p += len;
2476199767f8SToomas Soome namelen -= len;
2477199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
2478199767f8SToomas Soome }
2479199767f8SToomas Soome
24806fd7fa35SToomas Soome return (1);
2481199767f8SToomas Soome }
2482199767f8SToomas Soome
2483199767f8SToomas Soome /*
2484199767f8SToomas Soome * Extract a uint64_t value from a zap leaf entry.
2485199767f8SToomas Soome */
2486199767f8SToomas Soome static uint64_t
fzap_leaf_value(const zap_leaf_t * zl,const zap_leaf_chunk_t * zc)2487199767f8SToomas Soome fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc)
2488199767f8SToomas Soome {
2489199767f8SToomas Soome const zap_leaf_chunk_t *vc;
2490199767f8SToomas Soome int i;
2491199767f8SToomas Soome uint64_t value;
2492199767f8SToomas Soome const uint8_t *p;
2493199767f8SToomas Soome
2494199767f8SToomas Soome vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk);
2495199767f8SToomas Soome for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) {
2496199767f8SToomas Soome value = (value << 8) | p[i];
2497199767f8SToomas Soome }
2498199767f8SToomas Soome
24996fd7fa35SToomas Soome return (value);
2500199767f8SToomas Soome }
2501199767f8SToomas Soome
25024a04e8dbSToomas Soome static void
stv(int len,void * addr,uint64_t value)25034a04e8dbSToomas Soome stv(int len, void *addr, uint64_t value)
25044a04e8dbSToomas Soome {
25054a04e8dbSToomas Soome switch (len) {
25064a04e8dbSToomas Soome case 1:
25074a04e8dbSToomas Soome *(uint8_t *)addr = value;
25084a04e8dbSToomas Soome return;
25094a04e8dbSToomas Soome case 2:
25104a04e8dbSToomas Soome *(uint16_t *)addr = value;
25114a04e8dbSToomas Soome return;
25124a04e8dbSToomas Soome case 4:
25134a04e8dbSToomas Soome *(uint32_t *)addr = value;
25144a04e8dbSToomas Soome return;
25154a04e8dbSToomas Soome case 8:
25164a04e8dbSToomas Soome *(uint64_t *)addr = value;
25174a04e8dbSToomas Soome return;
25184a04e8dbSToomas Soome }
25194a04e8dbSToomas Soome }
25204a04e8dbSToomas Soome
25214a04e8dbSToomas Soome /*
25224a04e8dbSToomas Soome * Extract a array from a zap leaf entry.
25234a04e8dbSToomas Soome */
25244a04e8dbSToomas Soome static void
fzap_leaf_array(const zap_leaf_t * zl,const zap_leaf_chunk_t * zc,uint64_t integer_size,uint64_t num_integers,void * buf)25254a04e8dbSToomas Soome fzap_leaf_array(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc,
25264a04e8dbSToomas Soome uint64_t integer_size, uint64_t num_integers, void *buf)
25274a04e8dbSToomas Soome {
25284a04e8dbSToomas Soome uint64_t array_int_len = zc->l_entry.le_value_intlen;
25294a04e8dbSToomas Soome uint64_t value = 0;
25304a04e8dbSToomas Soome uint64_t *u64 = buf;
25314a04e8dbSToomas Soome char *p = buf;
25324a04e8dbSToomas Soome int len = MIN(zc->l_entry.le_value_numints, num_integers);
25334a04e8dbSToomas Soome int chunk = zc->l_entry.le_value_chunk;
25344a04e8dbSToomas Soome int byten = 0;
25354a04e8dbSToomas Soome
25364a04e8dbSToomas Soome if (integer_size == 8 && len == 1) {
25374a04e8dbSToomas Soome *u64 = fzap_leaf_value(zl, zc);
25384a04e8dbSToomas Soome return;
25394a04e8dbSToomas Soome }
25404a04e8dbSToomas Soome
25414a04e8dbSToomas Soome while (len > 0) {
25424a04e8dbSToomas Soome struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(zl, chunk).l_array;
25434a04e8dbSToomas Soome int i;
25444a04e8dbSToomas Soome
25454a04e8dbSToomas Soome ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(zl));
25464a04e8dbSToomas Soome for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
25474a04e8dbSToomas Soome value = (value << 8) | la->la_array[i];
25484a04e8dbSToomas Soome byten++;
25494a04e8dbSToomas Soome if (byten == array_int_len) {
25504a04e8dbSToomas Soome stv(integer_size, p, value);
25514a04e8dbSToomas Soome byten = 0;
25524a04e8dbSToomas Soome len--;
25534a04e8dbSToomas Soome if (len == 0)
25544a04e8dbSToomas Soome return;
25554a04e8dbSToomas Soome p += integer_size;
25564a04e8dbSToomas Soome }
25574a04e8dbSToomas Soome }
25584a04e8dbSToomas Soome chunk = la->la_next;
25594a04e8dbSToomas Soome }
25604a04e8dbSToomas Soome }
25614a04e8dbSToomas Soome
25624a04e8dbSToomas Soome static int
fzap_check_size(uint64_t integer_size,uint64_t num_integers)25634a04e8dbSToomas Soome fzap_check_size(uint64_t integer_size, uint64_t num_integers)
25644a04e8dbSToomas Soome {
25654a04e8dbSToomas Soome
25664a04e8dbSToomas Soome switch (integer_size) {
25674a04e8dbSToomas Soome case 1:
25684a04e8dbSToomas Soome case 2:
25694a04e8dbSToomas Soome case 4:
25704a04e8dbSToomas Soome case 8:
25714a04e8dbSToomas Soome break;
25724a04e8dbSToomas Soome default:
25734a04e8dbSToomas Soome return (EINVAL);
25744a04e8dbSToomas Soome }
25754a04e8dbSToomas Soome
25764a04e8dbSToomas Soome if (integer_size * num_integers > ZAP_MAXVALUELEN)
25774a04e8dbSToomas Soome return (E2BIG);
25784a04e8dbSToomas Soome
25794a04e8dbSToomas Soome return (0);
25804a04e8dbSToomas Soome }
25814a04e8dbSToomas Soome
2582fdefee4cSToomas Soome static void
zap_leaf_free(zap_leaf_t * leaf)2583fdefee4cSToomas Soome zap_leaf_free(zap_leaf_t *leaf)
2584fdefee4cSToomas Soome {
2585fdefee4cSToomas Soome free(leaf->l_phys);
2586fdefee4cSToomas Soome free(leaf);
2587fdefee4cSToomas Soome }
2588fdefee4cSToomas Soome
2589199767f8SToomas Soome static int
zap_get_leaf_byblk(fat_zap_t * zap,uint64_t blk,zap_leaf_t ** lp)2590fdefee4cSToomas Soome zap_get_leaf_byblk(fat_zap_t *zap, uint64_t blk, zap_leaf_t **lp)
2591199767f8SToomas Soome {
2592fdefee4cSToomas Soome int bs = FZAP_BLOCK_SHIFT(zap);
2593fdefee4cSToomas Soome int err;
2594199767f8SToomas Soome
2595fdefee4cSToomas Soome *lp = malloc(sizeof (**lp));
2596fdefee4cSToomas Soome if (*lp == NULL)
2597fdefee4cSToomas Soome return (ENOMEM);
2598199767f8SToomas Soome
2599fdefee4cSToomas Soome (*lp)->l_bs = bs;
2600fdefee4cSToomas Soome (*lp)->l_phys = malloc(1 << bs);
26014a04e8dbSToomas Soome
2602fdefee4cSToomas Soome if ((*lp)->l_phys == NULL) {
2603fdefee4cSToomas Soome free(*lp);
2604fdefee4cSToomas Soome return (ENOMEM);
2605fdefee4cSToomas Soome }
2606fdefee4cSToomas Soome err = dnode_read(zap->zap_spa, zap->zap_dnode, blk << bs, (*lp)->l_phys,
2607fdefee4cSToomas Soome 1 << bs);
2608fdefee4cSToomas Soome if (err != 0) {
2609fdefee4cSToomas Soome zap_leaf_free(*lp);
2610fdefee4cSToomas Soome }
2611fdefee4cSToomas Soome return (err);
2612fdefee4cSToomas Soome }
2613199767f8SToomas Soome
2614fdefee4cSToomas Soome static int
zap_table_load(fat_zap_t * zap,zap_table_phys_t * tbl,uint64_t idx,uint64_t * valp)2615fdefee4cSToomas Soome zap_table_load(fat_zap_t *zap, zap_table_phys_t *tbl, uint64_t idx,
2616fdefee4cSToomas Soome uint64_t *valp)
2617fdefee4cSToomas Soome {
2618fdefee4cSToomas Soome int bs = FZAP_BLOCK_SHIFT(zap);
2619fdefee4cSToomas Soome uint64_t blk = idx >> (bs - 3);
2620fdefee4cSToomas Soome uint64_t off = idx & ((1 << (bs - 3)) - 1);
2621fdefee4cSToomas Soome uint64_t *buf;
2622fdefee4cSToomas Soome int rc;
2623fdefee4cSToomas Soome
2624fdefee4cSToomas Soome buf = malloc(1 << zap->zap_block_shift);
2625fdefee4cSToomas Soome if (buf == NULL)
2626fdefee4cSToomas Soome return (ENOMEM);
2627fdefee4cSToomas Soome rc = dnode_read(zap->zap_spa, zap->zap_dnode, (tbl->zt_blk + blk) << bs,
2628fdefee4cSToomas Soome buf, 1 << zap->zap_block_shift);
2629fdefee4cSToomas Soome if (rc == 0)
2630fdefee4cSToomas Soome *valp = buf[off];
2631fdefee4cSToomas Soome free(buf);
2632fdefee4cSToomas Soome return (rc);
2633fdefee4cSToomas Soome }
2634fdefee4cSToomas Soome
2635fdefee4cSToomas Soome static int
zap_idx_to_blk(fat_zap_t * zap,uint64_t idx,uint64_t * valp)2636fdefee4cSToomas Soome zap_idx_to_blk(fat_zap_t *zap, uint64_t idx, uint64_t *valp)
2637fdefee4cSToomas Soome {
2638fdefee4cSToomas Soome if (zap->zap_phys->zap_ptrtbl.zt_numblks == 0) {
2639fdefee4cSToomas Soome *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
2640fdefee4cSToomas Soome return (0);
2641199767f8SToomas Soome } else {
2642fdefee4cSToomas Soome return (zap_table_load(zap, &zap->zap_phys->zap_ptrtbl,
2643fdefee4cSToomas Soome idx, valp));
2644199767f8SToomas Soome }
2645fdefee4cSToomas Soome }
2646199767f8SToomas Soome
2647fdefee4cSToomas Soome #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
2648fdefee4cSToomas Soome static int
zap_deref_leaf(fat_zap_t * zap,uint64_t h,zap_leaf_t ** lp)2649fdefee4cSToomas Soome zap_deref_leaf(fat_zap_t *zap, uint64_t h, zap_leaf_t **lp)
2650fdefee4cSToomas Soome {
2651fdefee4cSToomas Soome uint64_t idx, blk;
2652fdefee4cSToomas Soome int err;
2653199767f8SToomas Soome
2654fdefee4cSToomas Soome idx = ZAP_HASH_IDX(h, zap->zap_phys->zap_ptrtbl.zt_shift);
2655fdefee4cSToomas Soome err = zap_idx_to_blk(zap, idx, &blk);
2656fdefee4cSToomas Soome if (err != 0)
2657fdefee4cSToomas Soome return (err);
2658fdefee4cSToomas Soome return (zap_get_leaf_byblk(zap, blk, lp));
2659fdefee4cSToomas Soome }
2660199767f8SToomas Soome
2661fdefee4cSToomas Soome #define CHAIN_END 0xffff /* end of the chunk chain */
2662fdefee4cSToomas Soome #define LEAF_HASH(l, h) \
2663fdefee4cSToomas Soome ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
2664fdefee4cSToomas Soome ((h) >> \
2665fdefee4cSToomas Soome (64 - ZAP_LEAF_HASH_SHIFT(l) - (l)->l_phys->l_hdr.lh_prefix_len)))
2666fdefee4cSToomas Soome #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
2667199767f8SToomas Soome
2668fdefee4cSToomas Soome static int
zap_leaf_lookup(zap_leaf_t * zl,uint64_t hash,const char * name,uint64_t integer_size,uint64_t num_integers,void * value)2669fdefee4cSToomas Soome zap_leaf_lookup(zap_leaf_t *zl, uint64_t hash, const char *name,
2670fdefee4cSToomas Soome uint64_t integer_size, uint64_t num_integers, void *value)
2671fdefee4cSToomas Soome {
2672fdefee4cSToomas Soome int rc;
2673fdefee4cSToomas Soome uint16_t *chunkp;
2674fdefee4cSToomas Soome struct zap_leaf_entry *le;
2675199767f8SToomas Soome
2676199767f8SToomas Soome /*
2677199767f8SToomas Soome * Make sure this chunk matches our hash.
2678199767f8SToomas Soome */
2679fdefee4cSToomas Soome if (zl->l_phys->l_hdr.lh_prefix_len > 0 &&
2680fdefee4cSToomas Soome zl->l_phys->l_hdr.lh_prefix !=
2681fdefee4cSToomas Soome hash >> (64 - zl->l_phys->l_hdr.lh_prefix_len))
2682fdefee4cSToomas Soome return (EIO);
2683199767f8SToomas Soome
2684fdefee4cSToomas Soome rc = ENOENT;
2685fdefee4cSToomas Soome for (chunkp = LEAF_HASH_ENTPTR(zl, hash);
2686fdefee4cSToomas Soome *chunkp != CHAIN_END; chunkp = &le->le_next) {
2687fdefee4cSToomas Soome zap_leaf_chunk_t *zc;
2688fdefee4cSToomas Soome uint16_t chunk = *chunkp;
2689fdefee4cSToomas Soome
2690fdefee4cSToomas Soome le = ZAP_LEAF_ENTRY(zl, chunk);
2691fdefee4cSToomas Soome if (le->le_hash != hash)
2692fdefee4cSToomas Soome continue;
2693fdefee4cSToomas Soome zc = &ZAP_LEAF_CHUNK(zl, chunk);
2694fdefee4cSToomas Soome if (fzap_name_equal(zl, zc, name)) {
2695fdefee4cSToomas Soome if (zc->l_entry.le_value_intlen > integer_size) {
2696fdefee4cSToomas Soome rc = EINVAL;
2697fdefee4cSToomas Soome } else {
2698fdefee4cSToomas Soome fzap_leaf_array(zl, zc, integer_size,
2699fdefee4cSToomas Soome num_integers, value);
2700fdefee4cSToomas Soome rc = 0;
2701fdefee4cSToomas Soome }
2702199767f8SToomas Soome break;
2703199767f8SToomas Soome }
2704199767f8SToomas Soome }
2705fdefee4cSToomas Soome return (rc);
2706fdefee4cSToomas Soome }
27074a04e8dbSToomas Soome
2708fdefee4cSToomas Soome /*
2709fdefee4cSToomas Soome * Lookup a value in a fatzap directory.
2710fdefee4cSToomas Soome */
2711fdefee4cSToomas Soome static int
fzap_lookup(const spa_t * spa,const dnode_phys_t * dnode,zap_phys_t * zh,const char * name,uint64_t integer_size,uint64_t num_integers,void * value)2712fdefee4cSToomas Soome fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
2713fdefee4cSToomas Soome const char *name, uint64_t integer_size, uint64_t num_integers,
2714fdefee4cSToomas Soome void *value)
2715fdefee4cSToomas Soome {
2716fdefee4cSToomas Soome int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2717fdefee4cSToomas Soome fat_zap_t z;
2718fdefee4cSToomas Soome zap_leaf_t *zl;
2719fdefee4cSToomas Soome uint64_t hash;
2720fdefee4cSToomas Soome int rc;
2721199767f8SToomas Soome
2722fdefee4cSToomas Soome if (zh->zap_magic != ZAP_MAGIC)
2723fdefee4cSToomas Soome return (EIO);
2724fdefee4cSToomas Soome
2725fdefee4cSToomas Soome if ((rc = fzap_check_size(integer_size, num_integers)) != 0)
2726fdefee4cSToomas Soome return (rc);
2727fdefee4cSToomas Soome
2728fdefee4cSToomas Soome z.zap_block_shift = ilog2(bsize);
2729fdefee4cSToomas Soome z.zap_phys = zh;
2730fdefee4cSToomas Soome z.zap_spa = spa;
2731fdefee4cSToomas Soome z.zap_dnode = dnode;
2732fdefee4cSToomas Soome
2733fdefee4cSToomas Soome hash = zap_hash(zh->zap_salt, name);
2734fdefee4cSToomas Soome rc = zap_deref_leaf(&z, hash, &zl);
2735fdefee4cSToomas Soome if (rc != 0)
2736fdefee4cSToomas Soome return (rc);
2737fdefee4cSToomas Soome
2738fdefee4cSToomas Soome rc = zap_leaf_lookup(zl, hash, name, integer_size, num_integers, value);
2739fdefee4cSToomas Soome
2740fdefee4cSToomas Soome zap_leaf_free(zl);
2741fdefee4cSToomas Soome return (rc);
2742199767f8SToomas Soome }
2743199767f8SToomas Soome
2744199767f8SToomas Soome /*
2745199767f8SToomas Soome * Lookup a name in a zap object and return its value as a uint64_t.
2746199767f8SToomas Soome */
2747199767f8SToomas Soome static int
zap_lookup(const spa_t * spa,const dnode_phys_t * dnode,const char * name,uint64_t integer_size,uint64_t num_integers,void * value)27484a04e8dbSToomas Soome zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name,
27494a04e8dbSToomas Soome uint64_t integer_size, uint64_t num_integers, void *value)
2750199767f8SToomas Soome {
2751199767f8SToomas Soome int rc;
2752fdefee4cSToomas Soome zap_phys_t *zap;
2753199767f8SToomas Soome size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2754199767f8SToomas Soome
2755fdefee4cSToomas Soome zap = malloc(size);
2756fdefee4cSToomas Soome if (zap == NULL)
2757fdefee4cSToomas Soome return (ENOMEM);
2758fdefee4cSToomas Soome
2759fdefee4cSToomas Soome rc = dnode_read(spa, dnode, 0, zap, size);
2760199767f8SToomas Soome if (rc)
2761fdefee4cSToomas Soome goto done;
2762199767f8SToomas Soome
2763fdefee4cSToomas Soome switch (zap->zap_block_type) {
2764fdefee4cSToomas Soome case ZBT_MICRO:
2765fdefee4cSToomas Soome rc = mzap_lookup((const mzap_phys_t *)zap, size, name, value);
2766fdefee4cSToomas Soome break;
2767fdefee4cSToomas Soome case ZBT_HEADER:
2768fdefee4cSToomas Soome rc = fzap_lookup(spa, dnode, zap, name, integer_size,
2769fdefee4cSToomas Soome num_integers, value);
2770fdefee4cSToomas Soome break;
2771fdefee4cSToomas Soome default:
2772fdefee4cSToomas Soome printf("ZFS: invalid zap_type=%" PRIx64 "\n",
2773fdefee4cSToomas Soome zap->zap_block_type);
2774fdefee4cSToomas Soome rc = EIO;
27754a04e8dbSToomas Soome }
2776fdefee4cSToomas Soome done:
2777fdefee4cSToomas Soome free(zap);
2778fdefee4cSToomas Soome return (rc);
2779199767f8SToomas Soome }
2780199767f8SToomas Soome
2781199767f8SToomas Soome /*
2782fdefee4cSToomas Soome * List a microzap directory.
2783199767f8SToomas Soome */
2784199767f8SToomas Soome static int
mzap_list(const mzap_phys_t * mz,size_t size,int (* callback)(const char *,uint64_t))2785fdefee4cSToomas Soome mzap_list(const mzap_phys_t *mz, size_t size,
2786fdefee4cSToomas Soome int (*callback)(const char *, uint64_t))
2787199767f8SToomas Soome {
2788199767f8SToomas Soome const mzap_ent_phys_t *mze;
2789199767f8SToomas Soome int chunks, i, rc;
2790199767f8SToomas Soome
2791199767f8SToomas Soome /*
2792199767f8SToomas Soome * Microzap objects use exactly one block. Read the whole
2793199767f8SToomas Soome * thing.
2794199767f8SToomas Soome */
2795fdefee4cSToomas Soome rc = 0;
2796199767f8SToomas Soome chunks = size / MZAP_ENT_LEN - 1;
2797199767f8SToomas Soome for (i = 0; i < chunks; i++) {
2798199767f8SToomas Soome mze = &mz->mz_chunk[i];
2799199767f8SToomas Soome if (mze->mze_name[0]) {
2800199767f8SToomas Soome rc = callback(mze->mze_name, mze->mze_value);
2801199767f8SToomas Soome if (rc != 0)
2802fdefee4cSToomas Soome break;
2803199767f8SToomas Soome }
2804199767f8SToomas Soome }
2805199767f8SToomas Soome
2806fdefee4cSToomas Soome return (rc);
2807199767f8SToomas Soome }
2808199767f8SToomas Soome
2809199767f8SToomas Soome /*
2810fdefee4cSToomas Soome * List a fatzap directory.
2811199767f8SToomas Soome */
2812199767f8SToomas Soome static int
fzap_list(const spa_t * spa,const dnode_phys_t * dnode,zap_phys_t * zh,int (* callback)(const char *,uint64_t))2813fdefee4cSToomas Soome fzap_list(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
28146fd7fa35SToomas Soome int (*callback)(const char *, uint64_t))
2815199767f8SToomas Soome {
2816199767f8SToomas Soome int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2817199767f8SToomas Soome fat_zap_t z;
2818199767f8SToomas Soome int i, j, rc;
2819199767f8SToomas Soome
2820fdefee4cSToomas Soome if (zh->zap_magic != ZAP_MAGIC)
2821199767f8SToomas Soome return (EIO);
2822199767f8SToomas Soome
2823199767f8SToomas Soome z.zap_block_shift = ilog2(bsize);
2824fdefee4cSToomas Soome z.zap_phys = zh;
2825199767f8SToomas Soome
2826199767f8SToomas Soome /*
2827199767f8SToomas Soome * This assumes that the leaf blocks start at block 1. The
2828199767f8SToomas Soome * documentation isn't exactly clear on this.
2829199767f8SToomas Soome */
2830199767f8SToomas Soome zap_leaf_t zl;
2831199767f8SToomas Soome zl.l_bs = z.zap_block_shift;
2832fdefee4cSToomas Soome zl.l_phys = malloc(bsize);
2833fdefee4cSToomas Soome if (zl.l_phys == NULL)
2834fdefee4cSToomas Soome return (ENOMEM);
2835fdefee4cSToomas Soome
2836fdefee4cSToomas Soome for (i = 0; i < zh->zap_num_leafs; i++) {
2837130d7652SToomas Soome off_t off = ((off_t)(i + 1)) << zl.l_bs;
2838199767f8SToomas Soome char name[256], *p;
2839199767f8SToomas Soome uint64_t value;
2840199767f8SToomas Soome
2841fdefee4cSToomas Soome if (dnode_read(spa, dnode, off, zl.l_phys, bsize)) {
2842fdefee4cSToomas Soome free(zl.l_phys);
2843199767f8SToomas Soome return (EIO);
2844fdefee4cSToomas Soome }
2845199767f8SToomas Soome
2846199767f8SToomas Soome for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
2847199767f8SToomas Soome zap_leaf_chunk_t *zc, *nc;
2848199767f8SToomas Soome int namelen;
2849199767f8SToomas Soome
2850199767f8SToomas Soome zc = &ZAP_LEAF_CHUNK(&zl, j);
2851199767f8SToomas Soome if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
2852199767f8SToomas Soome continue;
2853199767f8SToomas Soome namelen = zc->l_entry.le_name_numints;
28546fd7fa35SToomas Soome if (namelen > sizeof (name))
28556fd7fa35SToomas Soome namelen = sizeof (name);
2856199767f8SToomas Soome
2857199767f8SToomas Soome /*
2858199767f8SToomas Soome * Paste the name back together.
2859199767f8SToomas Soome */
2860199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
2861199767f8SToomas Soome p = name;
2862199767f8SToomas Soome while (namelen > 0) {
2863199767f8SToomas Soome int len;
2864199767f8SToomas Soome len = namelen;
2865199767f8SToomas Soome if (len > ZAP_LEAF_ARRAY_BYTES)
2866199767f8SToomas Soome len = ZAP_LEAF_ARRAY_BYTES;
2867199767f8SToomas Soome memcpy(p, nc->l_array.la_array, len);
2868199767f8SToomas Soome p += len;
2869199767f8SToomas Soome namelen -= len;
2870199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
2871199767f8SToomas Soome }
2872199767f8SToomas Soome
2873199767f8SToomas Soome /*
2874199767f8SToomas Soome * Assume the first eight bytes of the value are
2875199767f8SToomas Soome * a uint64_t.
2876199767f8SToomas Soome */
2877199767f8SToomas Soome value = fzap_leaf_value(&zl, zc);
2878199767f8SToomas Soome
28796fd7fa35SToomas Soome /* printf("%s 0x%jx\n", name, (uintmax_t)value); */
2880199767f8SToomas Soome rc = callback((const char *)name, value);
2881fdefee4cSToomas Soome if (rc != 0) {
2882fdefee4cSToomas Soome free(zl.l_phys);
2883199767f8SToomas Soome return (rc);
2884fdefee4cSToomas Soome }
2885199767f8SToomas Soome }
2886199767f8SToomas Soome }
2887199767f8SToomas Soome
2888fdefee4cSToomas Soome free(zl.l_phys);
2889199767f8SToomas Soome return (0);
2890199767f8SToomas Soome }
2891199767f8SToomas Soome
zfs_printf(const char * name,uint64_t value __unused)2892199767f8SToomas Soome static int zfs_printf(const char *name, uint64_t value __unused)
2893199767f8SToomas Soome {
2894199767f8SToomas Soome
2895199767f8SToomas Soome printf("%s\n", name);
2896199767f8SToomas Soome
2897199767f8SToomas Soome return (0);
2898199767f8SToomas Soome }
2899199767f8SToomas Soome
2900199767f8SToomas Soome /*
2901199767f8SToomas Soome * List a zap directory.
2902199767f8SToomas Soome */
2903199767f8SToomas Soome static int
zap_list(const spa_t * spa,const dnode_phys_t * dnode)2904199767f8SToomas Soome zap_list(const spa_t *spa, const dnode_phys_t *dnode)
2905199767f8SToomas Soome {
2906fdefee4cSToomas Soome zap_phys_t *zap;
2907fdefee4cSToomas Soome size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2908fdefee4cSToomas Soome int rc;
2909199767f8SToomas Soome
2910fdefee4cSToomas Soome zap = malloc(size);
2911fdefee4cSToomas Soome if (zap == NULL)
2912fdefee4cSToomas Soome return (ENOMEM);
2913199767f8SToomas Soome
2914fdefee4cSToomas Soome rc = dnode_read(spa, dnode, 0, zap, size);
2915fdefee4cSToomas Soome if (rc == 0) {
2916fdefee4cSToomas Soome if (zap->zap_block_type == ZBT_MICRO)
2917fdefee4cSToomas Soome rc = mzap_list((const mzap_phys_t *)zap, size,
2918fdefee4cSToomas Soome zfs_printf);
2919fdefee4cSToomas Soome else
2920fdefee4cSToomas Soome rc = fzap_list(spa, dnode, zap, zfs_printf);
2921fdefee4cSToomas Soome }
2922fdefee4cSToomas Soome free(zap);
2923fdefee4cSToomas Soome return (rc);
2924199767f8SToomas Soome }
2925199767f8SToomas Soome
2926199767f8SToomas Soome static int
objset_get_dnode(const spa_t * spa,const objset_phys_t * os,uint64_t objnum,dnode_phys_t * dnode)29276fd7fa35SToomas Soome objset_get_dnode(const spa_t *spa, const objset_phys_t *os, uint64_t objnum,
29286fd7fa35SToomas Soome dnode_phys_t *dnode)
2929199767f8SToomas Soome {
2930199767f8SToomas Soome off_t offset;
2931199767f8SToomas Soome
29326fd7fa35SToomas Soome offset = objnum * sizeof (dnode_phys_t);
29336fd7fa35SToomas Soome return (dnode_read(spa, &os->os_meta_dnode, offset,
29346fd7fa35SToomas Soome dnode, sizeof (dnode_phys_t)));
2935199767f8SToomas Soome }
2936199767f8SToomas Soome
2937fdefee4cSToomas Soome /*
2938fdefee4cSToomas Soome * Lookup a name in a microzap directory.
2939fdefee4cSToomas Soome */
2940199767f8SToomas Soome static int
mzap_rlookup(const mzap_phys_t * mz,size_t size,char * name,uint64_t value)2941fdefee4cSToomas Soome mzap_rlookup(const mzap_phys_t *mz, size_t size, char *name, uint64_t value)
2942199767f8SToomas Soome {
2943199767f8SToomas Soome const mzap_ent_phys_t *mze;
2944199767f8SToomas Soome int chunks, i;
2945199767f8SToomas Soome
2946199767f8SToomas Soome /*
2947199767f8SToomas Soome * Microzap objects use exactly one block. Read the whole
2948199767f8SToomas Soome * thing.
2949199767f8SToomas Soome */
2950199767f8SToomas Soome chunks = size / MZAP_ENT_LEN - 1;
2951199767f8SToomas Soome for (i = 0; i < chunks; i++) {
2952199767f8SToomas Soome mze = &mz->mz_chunk[i];
2953199767f8SToomas Soome if (value == mze->mze_value) {
2954199767f8SToomas Soome strcpy(name, mze->mze_name);
2955199767f8SToomas Soome return (0);
2956199767f8SToomas Soome }
2957199767f8SToomas Soome }
2958199767f8SToomas Soome
2959199767f8SToomas Soome return (ENOENT);
2960199767f8SToomas Soome }
2961199767f8SToomas Soome
2962199767f8SToomas Soome static void
fzap_name_copy(const zap_leaf_t * zl,const zap_leaf_chunk_t * zc,char * name)2963199767f8SToomas Soome fzap_name_copy(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, char *name)
2964199767f8SToomas Soome {
2965199767f8SToomas Soome size_t namelen;
2966199767f8SToomas Soome const zap_leaf_chunk_t *nc;
2967199767f8SToomas Soome char *p;
2968199767f8SToomas Soome
2969199767f8SToomas Soome namelen = zc->l_entry.le_name_numints;
2970199767f8SToomas Soome
2971199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
2972199767f8SToomas Soome p = name;
2973199767f8SToomas Soome while (namelen > 0) {
2974199767f8SToomas Soome size_t len;
2975199767f8SToomas Soome len = namelen;
2976199767f8SToomas Soome if (len > ZAP_LEAF_ARRAY_BYTES)
2977199767f8SToomas Soome len = ZAP_LEAF_ARRAY_BYTES;
2978199767f8SToomas Soome memcpy(p, nc->l_array.la_array, len);
2979199767f8SToomas Soome p += len;
2980199767f8SToomas Soome namelen -= len;
2981199767f8SToomas Soome nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
2982199767f8SToomas Soome }
2983199767f8SToomas Soome
2984199767f8SToomas Soome *p = '\0';
2985199767f8SToomas Soome }
2986199767f8SToomas Soome
2987199767f8SToomas Soome static int
fzap_rlookup(const spa_t * spa,const dnode_phys_t * dnode,zap_phys_t * zh,char * name,uint64_t value)2988fdefee4cSToomas Soome fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
2989fdefee4cSToomas Soome char *name, uint64_t value)
2990199767f8SToomas Soome {
2991199767f8SToomas Soome int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
2992199767f8SToomas Soome fat_zap_t z;
2993fdefee4cSToomas Soome uint64_t i;
2994fdefee4cSToomas Soome int j, rc;
2995199767f8SToomas Soome
2996fdefee4cSToomas Soome if (zh->zap_magic != ZAP_MAGIC)
2997199767f8SToomas Soome return (EIO);
2998199767f8SToomas Soome
2999199767f8SToomas Soome z.zap_block_shift = ilog2(bsize);
3000fdefee4cSToomas Soome z.zap_phys = zh;
3001199767f8SToomas Soome
3002199767f8SToomas Soome /*
3003199767f8SToomas Soome * This assumes that the leaf blocks start at block 1. The
3004199767f8SToomas Soome * documentation isn't exactly clear on this.
3005199767f8SToomas Soome */
3006199767f8SToomas Soome zap_leaf_t zl;
3007199767f8SToomas Soome zl.l_bs = z.zap_block_shift;
3008fdefee4cSToomas Soome zl.l_phys = malloc(bsize);
3009fdefee4cSToomas Soome if (zl.l_phys == NULL)
3010fdefee4cSToomas Soome return (ENOMEM);
3011199767f8SToomas Soome
3012fdefee4cSToomas Soome for (i = 0; i < zh->zap_num_leafs; i++) {
3013fdefee4cSToomas Soome off_t off = ((off_t)(i + 1)) << zl.l_bs;
3014199767f8SToomas Soome
3015fdefee4cSToomas Soome rc = dnode_read(spa, dnode, off, zl.l_phys, bsize);
3016fdefee4cSToomas Soome if (rc != 0)
3017fdefee4cSToomas Soome goto done;
3018199767f8SToomas Soome
3019199767f8SToomas Soome for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
3020199767f8SToomas Soome zap_leaf_chunk_t *zc;
3021199767f8SToomas Soome
3022199767f8SToomas Soome zc = &ZAP_LEAF_CHUNK(&zl, j);
3023199767f8SToomas Soome if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
3024199767f8SToomas Soome continue;
3025199767f8SToomas Soome if (zc->l_entry.le_value_intlen != 8 ||
3026199767f8SToomas Soome zc->l_entry.le_value_numints != 1)
3027199767f8SToomas Soome continue;
3028199767f8SToomas Soome
3029199767f8SToomas Soome if (fzap_leaf_value(&zl, zc) == value) {
3030199767f8SToomas Soome fzap_name_copy(&zl, zc, name);
3031fdefee4cSToomas Soome goto done;
3032199767f8SToomas Soome }
3033199767f8SToomas Soome }
3034199767f8SToomas Soome }
3035199767f8SToomas Soome
3036fdefee4cSToomas Soome rc = ENOENT;
3037fdefee4cSToomas Soome done:
3038fdefee4cSToomas Soome free(zl.l_phys);
3039fdefee4cSToomas Soome return (rc);
3040199767f8SToomas Soome }
3041199767f8SToomas Soome
3042199767f8SToomas Soome static int
zap_rlookup(const spa_t * spa,const dnode_phys_t * dnode,char * name,uint64_t value)30436fd7fa35SToomas Soome zap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name,
30446fd7fa35SToomas Soome uint64_t value)
3045199767f8SToomas Soome {
3046fdefee4cSToomas Soome zap_phys_t *zap;
3047fdefee4cSToomas Soome size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
3048199767f8SToomas Soome int rc;
3049199767f8SToomas Soome
3050fdefee4cSToomas Soome zap = malloc(size);
3051fdefee4cSToomas Soome if (zap == NULL)
3052fdefee4cSToomas Soome return (ENOMEM);
3053199767f8SToomas Soome
3054fdefee4cSToomas Soome rc = dnode_read(spa, dnode, 0, zap, size);
3055fdefee4cSToomas Soome if (rc == 0) {
3056fdefee4cSToomas Soome if (zap->zap_block_type == ZBT_MICRO)
3057fdefee4cSToomas Soome rc = mzap_rlookup((const mzap_phys_t *)zap, size,
3058fdefee4cSToomas Soome name, value);
3059fdefee4cSToomas Soome else
3060fdefee4cSToomas Soome rc = fzap_rlookup(spa, dnode, zap, name, value);
3061fdefee4cSToomas Soome }
3062fdefee4cSToomas Soome free(zap);
3063fdefee4cSToomas Soome return (rc);
3064199767f8SToomas Soome }
3065199767f8SToomas Soome
3066199767f8SToomas Soome static int
zfs_rlookup(const spa_t * spa,uint64_t objnum,char * result)3067199767f8SToomas Soome zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result)
3068199767f8SToomas Soome {
3069199767f8SToomas Soome char name[256];
3070199767f8SToomas Soome char component[256];
3071199767f8SToomas Soome uint64_t dir_obj, parent_obj, child_dir_zapobj;
3072199767f8SToomas Soome dnode_phys_t child_dir_zap, dataset, dir, parent;
3073199767f8SToomas Soome dsl_dir_phys_t *dd;
3074199767f8SToomas Soome dsl_dataset_phys_t *ds;
3075199767f8SToomas Soome char *p;
3076199767f8SToomas Soome int len;
3077199767f8SToomas Soome
30786fd7fa35SToomas Soome p = &name[sizeof (name) - 1];
3079199767f8SToomas Soome *p = '\0';
3080199767f8SToomas Soome
3081199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
3082199767f8SToomas Soome printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
3083199767f8SToomas Soome return (EIO);
3084199767f8SToomas Soome }
3085199767f8SToomas Soome ds = (dsl_dataset_phys_t *)&dataset.dn_bonus;
3086199767f8SToomas Soome dir_obj = ds->ds_dir_obj;
3087199767f8SToomas Soome
3088199767f8SToomas Soome for (;;) {
3089199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir) != 0)
3090199767f8SToomas Soome return (EIO);
3091199767f8SToomas Soome dd = (dsl_dir_phys_t *)&dir.dn_bonus;
3092199767f8SToomas Soome
3093199767f8SToomas Soome /* Actual loop condition. */
30946fd7fa35SToomas Soome parent_obj = dd->dd_parent_obj;
3095199767f8SToomas Soome if (parent_obj == 0)
3096199767f8SToomas Soome break;
3097199767f8SToomas Soome
30986fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, parent_obj,
30996fd7fa35SToomas Soome &parent) != 0)
3100199767f8SToomas Soome return (EIO);
3101199767f8SToomas Soome dd = (dsl_dir_phys_t *)&parent.dn_bonus;
3102199767f8SToomas Soome child_dir_zapobj = dd->dd_child_dir_zapobj;
31036fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj,
31046fd7fa35SToomas Soome &child_dir_zap) != 0)
3105199767f8SToomas Soome return (EIO);
3106199767f8SToomas Soome if (zap_rlookup(spa, &child_dir_zap, component, dir_obj) != 0)
3107199767f8SToomas Soome return (EIO);
3108199767f8SToomas Soome
3109199767f8SToomas Soome len = strlen(component);
3110199767f8SToomas Soome p -= len;
3111199767f8SToomas Soome memcpy(p, component, len);
3112199767f8SToomas Soome --p;
3113199767f8SToomas Soome *p = '/';
3114199767f8SToomas Soome
3115199767f8SToomas Soome /* Actual loop iteration. */
3116199767f8SToomas Soome dir_obj = parent_obj;
3117199767f8SToomas Soome }
3118199767f8SToomas Soome
3119199767f8SToomas Soome if (*p != '\0')
3120199767f8SToomas Soome ++p;
3121199767f8SToomas Soome strcpy(result, p);
3122199767f8SToomas Soome
3123199767f8SToomas Soome return (0);
3124199767f8SToomas Soome }
3125199767f8SToomas Soome
3126199767f8SToomas Soome static int
zfs_lookup_dataset(const spa_t * spa,const char * name,uint64_t * objnum)3127199767f8SToomas Soome zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum)
3128199767f8SToomas Soome {
3129199767f8SToomas Soome char element[256];
3130199767f8SToomas Soome uint64_t dir_obj, child_dir_zapobj;
3131199767f8SToomas Soome dnode_phys_t child_dir_zap, dir;
3132199767f8SToomas Soome dsl_dir_phys_t *dd;
3133199767f8SToomas Soome const char *p, *q;
3134199767f8SToomas Soome
31356fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos,
31366fd7fa35SToomas Soome DMU_POOL_DIRECTORY_OBJECT, &dir))
3137199767f8SToomas Soome return (EIO);
31384a04e8dbSToomas Soome if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, sizeof (dir_obj),
31394a04e8dbSToomas Soome 1, &dir_obj))
3140199767f8SToomas Soome return (EIO);
3141199767f8SToomas Soome
3142199767f8SToomas Soome p = name;
3143199767f8SToomas Soome for (;;) {
3144199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir))
3145199767f8SToomas Soome return (EIO);
3146199767f8SToomas Soome dd = (dsl_dir_phys_t *)&dir.dn_bonus;
3147199767f8SToomas Soome
3148199767f8SToomas Soome while (*p == '/')
3149199767f8SToomas Soome p++;
3150199767f8SToomas Soome /* Actual loop condition #1. */
3151199767f8SToomas Soome if (*p == '\0')
3152199767f8SToomas Soome break;
3153199767f8SToomas Soome
3154199767f8SToomas Soome q = strchr(p, '/');
3155199767f8SToomas Soome if (q) {
3156199767f8SToomas Soome memcpy(element, p, q - p);
3157199767f8SToomas Soome element[q - p] = '\0';
3158199767f8SToomas Soome p = q + 1;
3159199767f8SToomas Soome } else {
3160199767f8SToomas Soome strcpy(element, p);
3161199767f8SToomas Soome p += strlen(p);
3162199767f8SToomas Soome }
3163199767f8SToomas Soome
3164199767f8SToomas Soome child_dir_zapobj = dd->dd_child_dir_zapobj;
31656fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj,
31666fd7fa35SToomas Soome &child_dir_zap) != 0)
3167199767f8SToomas Soome return (EIO);
3168199767f8SToomas Soome
3169199767f8SToomas Soome /* Actual loop condition #2. */
31704a04e8dbSToomas Soome if (zap_lookup(spa, &child_dir_zap, element, sizeof (dir_obj),
31714a04e8dbSToomas Soome 1, &dir_obj) != 0)
3172199767f8SToomas Soome return (ENOENT);
3173199767f8SToomas Soome }
3174199767f8SToomas Soome
3175199767f8SToomas Soome *objnum = dd->dd_head_dataset_obj;
3176199767f8SToomas Soome return (0);
3177199767f8SToomas Soome }
3178199767f8SToomas Soome
3179199767f8SToomas Soome #pragma GCC diagnostic ignored "-Wstrict-aliasing"
3180199767f8SToomas Soome static int
zfs_list_dataset(const spa_t * spa,uint64_t objnum)31816fd7fa35SToomas Soome zfs_list_dataset(const spa_t *spa, uint64_t objnum)
3182199767f8SToomas Soome {
3183199767f8SToomas Soome uint64_t dir_obj, child_dir_zapobj;
3184199767f8SToomas Soome dnode_phys_t child_dir_zap, dir, dataset;
3185199767f8SToomas Soome dsl_dataset_phys_t *ds;
3186199767f8SToomas Soome dsl_dir_phys_t *dd;
3187199767f8SToomas Soome
3188199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
3189199767f8SToomas Soome printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
3190199767f8SToomas Soome return (EIO);
3191199767f8SToomas Soome }
31926fd7fa35SToomas Soome ds = (dsl_dataset_phys_t *)&dataset.dn_bonus;
3193199767f8SToomas Soome dir_obj = ds->ds_dir_obj;
3194199767f8SToomas Soome
3195199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir)) {
3196199767f8SToomas Soome printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj);
3197199767f8SToomas Soome return (EIO);
3198199767f8SToomas Soome }
3199199767f8SToomas Soome dd = (dsl_dir_phys_t *)&dir.dn_bonus;
3200199767f8SToomas Soome
3201199767f8SToomas Soome child_dir_zapobj = dd->dd_child_dir_zapobj;
32026fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj,
32036fd7fa35SToomas Soome &child_dir_zap) != 0) {
3204199767f8SToomas Soome printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj);
3205199767f8SToomas Soome return (EIO);
3206199767f8SToomas Soome }
3207199767f8SToomas Soome
3208199767f8SToomas Soome return (zap_list(spa, &child_dir_zap) != 0);
3209199767f8SToomas Soome }
3210199767f8SToomas Soome
3211199767f8SToomas Soome int
zfs_callback_dataset(const spa_t * spa,uint64_t objnum,int (* callback)(const char *,uint64_t))32126fd7fa35SToomas Soome zfs_callback_dataset(const spa_t *spa, uint64_t objnum,
32136fd7fa35SToomas Soome int (*callback)(const char *, uint64_t))
3214199767f8SToomas Soome {
3215fdefee4cSToomas Soome uint64_t dir_obj, child_dir_zapobj;
3216199767f8SToomas Soome dnode_phys_t child_dir_zap, dir, dataset;
3217199767f8SToomas Soome dsl_dataset_phys_t *ds;
3218199767f8SToomas Soome dsl_dir_phys_t *dd;
3219fdefee4cSToomas Soome zap_phys_t *zap;
3220fdefee4cSToomas Soome size_t size;
3221199767f8SToomas Soome int err;
3222199767f8SToomas Soome
3223199767f8SToomas Soome err = objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset);
3224199767f8SToomas Soome if (err != 0) {
3225199767f8SToomas Soome printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
3226199767f8SToomas Soome return (err);
3227199767f8SToomas Soome }
32286fd7fa35SToomas Soome ds = (dsl_dataset_phys_t *)&dataset.dn_bonus;
3229199767f8SToomas Soome dir_obj = ds->ds_dir_obj;
3230199767f8SToomas Soome
3231199767f8SToomas Soome err = objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir);
3232199767f8SToomas Soome if (err != 0) {
3233199767f8SToomas Soome printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj);
3234199767f8SToomas Soome return (err);
3235199767f8SToomas Soome }
3236199767f8SToomas Soome dd = (dsl_dir_phys_t *)&dir.dn_bonus;
3237199767f8SToomas Soome
3238199767f8SToomas Soome child_dir_zapobj = dd->dd_child_dir_zapobj;
32396fd7fa35SToomas Soome err = objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj,
32406fd7fa35SToomas Soome &child_dir_zap);
3241199767f8SToomas Soome if (err != 0) {
3242199767f8SToomas Soome printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj);
3243199767f8SToomas Soome return (err);
3244199767f8SToomas Soome }
3245199767f8SToomas Soome
3246fdefee4cSToomas Soome size = child_dir_zap.dn_datablkszsec << SPA_MINBLOCKSHIFT;
3247fdefee4cSToomas Soome zap = malloc(size);
3248fdefee4cSToomas Soome if (zap != NULL) {
3249fdefee4cSToomas Soome err = dnode_read(spa, &child_dir_zap, 0, zap, size);
3250fdefee4cSToomas Soome if (err != 0)
3251fdefee4cSToomas Soome goto done;
3252199767f8SToomas Soome
3253fdefee4cSToomas Soome if (zap->zap_block_type == ZBT_MICRO)
3254fdefee4cSToomas Soome err = mzap_list((const mzap_phys_t *)zap, size,
3255fdefee4cSToomas Soome callback);
3256fdefee4cSToomas Soome else
3257fdefee4cSToomas Soome err = fzap_list(spa, &child_dir_zap, zap, callback);
3258fdefee4cSToomas Soome } else {
3259fdefee4cSToomas Soome err = ENOMEM;
3260fdefee4cSToomas Soome }
3261fdefee4cSToomas Soome done:
3262fdefee4cSToomas Soome free(zap);
3263fdefee4cSToomas Soome return (err);
3264199767f8SToomas Soome }
3265199767f8SToomas Soome
3266199767f8SToomas Soome /*
3267199767f8SToomas Soome * Find the object set given the object number of its dataset object
3268199767f8SToomas Soome * and return its details in *objset
3269199767f8SToomas Soome */
3270199767f8SToomas Soome static int
zfs_mount_dataset(const spa_t * spa,uint64_t objnum,objset_phys_t * objset)3271199767f8SToomas Soome zfs_mount_dataset(const spa_t *spa, uint64_t objnum, objset_phys_t *objset)
3272199767f8SToomas Soome {
3273199767f8SToomas Soome dnode_phys_t dataset;
3274199767f8SToomas Soome dsl_dataset_phys_t *ds;
3275199767f8SToomas Soome
3276199767f8SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
3277199767f8SToomas Soome printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
3278199767f8SToomas Soome return (EIO);
3279199767f8SToomas Soome }
3280199767f8SToomas Soome
32816fd7fa35SToomas Soome ds = (dsl_dataset_phys_t *)&dataset.dn_bonus;
3282199767f8SToomas Soome if (zio_read(spa, &ds->ds_bp, objset)) {
3283199767f8SToomas Soome printf("ZFS: can't read object set for dataset %ju\n",
3284199767f8SToomas Soome (uintmax_t)objnum);
3285199767f8SToomas Soome return (EIO);
3286199767f8SToomas Soome }
3287199767f8SToomas Soome
3288199767f8SToomas Soome return (0);
3289199767f8SToomas Soome }
3290199767f8SToomas Soome
3291199767f8SToomas Soome /*
3292199767f8SToomas Soome * Find the object set pointed to by the BOOTFS property or the root
3293199767f8SToomas Soome * dataset if there is none and return its details in *objset
3294199767f8SToomas Soome */
3295199767f8SToomas Soome static int
zfs_get_root(const spa_t * spa,uint64_t * objid)3296199767f8SToomas Soome zfs_get_root(const spa_t *spa, uint64_t *objid)
3297199767f8SToomas Soome {
3298199767f8SToomas Soome dnode_phys_t dir, propdir;
3299199767f8SToomas Soome uint64_t props, bootfs, root;
3300199767f8SToomas Soome
3301199767f8SToomas Soome *objid = 0;
3302199767f8SToomas Soome
3303199767f8SToomas Soome /*
3304199767f8SToomas Soome * Start with the MOS directory object.
3305199767f8SToomas Soome */
33066fd7fa35SToomas Soome if (objset_get_dnode(spa, &spa->spa_mos,
33076fd7fa35SToomas Soome DMU_POOL_DIRECTORY_OBJECT, &dir)) {
3308199767f8SToomas Soome printf("ZFS: can't read MOS object directory\n");
3309199767f8SToomas Soome return (EIO);
3310199767f8SToomas Soome }
3311199767f8SToomas Soome
3312199767f8SToomas Soome /*
3313199767f8SToomas Soome * Lookup the pool_props and see if we can find a bootfs.
3314199767f8SToomas Soome */
33156fd7fa35SToomas Soome if (zap_lookup(spa, &dir, DMU_POOL_PROPS,
33166fd7fa35SToomas Soome sizeof (props), 1, &props) == 0 &&
33176fd7fa35SToomas Soome objset_get_dnode(spa, &spa->spa_mos, props, &propdir) == 0 &&
33186fd7fa35SToomas Soome zap_lookup(spa, &propdir, "bootfs",
33196fd7fa35SToomas Soome sizeof (bootfs), 1, &bootfs) == 0 && bootfs != 0) {
3320199767f8SToomas Soome *objid = bootfs;
3321199767f8SToomas Soome return (0);
3322199767f8SToomas Soome }
3323199767f8SToomas Soome /*
3324199767f8SToomas Soome * Lookup the root dataset directory
3325199767f8SToomas Soome */
33266fd7fa35SToomas Soome if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET,
33276fd7fa35SToomas Soome sizeof (root), 1, &root) ||
33286fd7fa35SToomas Soome objset_get_dnode(spa, &spa->spa_mos, root, &dir)) {
3329199767f8SToomas Soome printf("ZFS: can't find root dsl_dir\n");
3330199767f8SToomas Soome return (EIO);
3331199767f8SToomas Soome }
3332199767f8SToomas Soome
3333199767f8SToomas Soome /*
3334199767f8SToomas Soome * Use the information from the dataset directory's bonus buffer
3335199767f8SToomas Soome * to find the dataset object and from that the object set itself.
3336199767f8SToomas Soome */
33376fd7fa35SToomas Soome dsl_dir_phys_t *dd = (dsl_dir_phys_t *)&dir.dn_bonus;
3338199767f8SToomas Soome *objid = dd->dd_head_dataset_obj;
3339199767f8SToomas Soome return (0);
3340199767f8SToomas Soome }
3341199767f8SToomas Soome
3342199767f8SToomas Soome static int
zfs_mount(const spa_t * spa,uint64_t rootobj,struct zfsmount * mnt)3343199767f8SToomas Soome zfs_mount(const spa_t *spa, uint64_t rootobj, struct zfsmount *mnt)
3344199767f8SToomas Soome {
3345199767f8SToomas Soome
3346199767f8SToomas Soome mnt->spa = spa;
3347199767f8SToomas Soome
3348199767f8SToomas Soome /*
3349199767f8SToomas Soome * Find the root object set if not explicitly provided
3350199767f8SToomas Soome */
3351199767f8SToomas Soome if (rootobj == 0 && zfs_get_root(spa, &rootobj)) {
3352199767f8SToomas Soome printf("ZFS: can't find root filesystem\n");
3353199767f8SToomas Soome return (EIO);
3354199767f8SToomas Soome }
3355199767f8SToomas Soome
3356199767f8SToomas Soome if (zfs_mount_dataset(spa, rootobj, &mnt->objset)) {
3357199767f8SToomas Soome printf("ZFS: can't open root filesystem\n");
3358199767f8SToomas Soome return (EIO);
3359199767f8SToomas Soome }
3360199767f8SToomas Soome
3361199767f8SToomas Soome mnt->rootobj = rootobj;
3362199767f8SToomas Soome
3363199767f8SToomas Soome return (0);
3364199767f8SToomas Soome }
3365199767f8SToomas Soome
3366199767f8SToomas Soome /*
3367199767f8SToomas Soome * callback function for feature name checks.
3368199767f8SToomas Soome */
3369199767f8SToomas Soome static int
check_feature(const char * name,uint64_t value)3370199767f8SToomas Soome check_feature(const char *name, uint64_t value)
3371199767f8SToomas Soome {
3372199767f8SToomas Soome int i;
3373199767f8SToomas Soome
3374199767f8SToomas Soome if (value == 0)
3375199767f8SToomas Soome return (0);
3376199767f8SToomas Soome if (name[0] == '\0')
3377199767f8SToomas Soome return (0);
3378199767f8SToomas Soome
3379199767f8SToomas Soome for (i = 0; features_for_read[i] != NULL; i++) {
3380199767f8SToomas Soome if (strcmp(name, features_for_read[i]) == 0)
3381199767f8SToomas Soome return (0);
3382199767f8SToomas Soome }
3383199767f8SToomas Soome printf("ZFS: unsupported feature: %s\n", name);
3384199767f8SToomas Soome return (EIO);
3385199767f8SToomas Soome }
3386199767f8SToomas Soome
3387199767f8SToomas Soome /*
3388199767f8SToomas Soome * Checks whether the MOS features that are active are supported.
3389199767f8SToomas Soome */
3390199767f8SToomas Soome static int
check_mos_features(const spa_t * spa)3391199767f8SToomas Soome check_mos_features(const spa_t *spa)
3392199767f8SToomas Soome {
3393199767f8SToomas Soome dnode_phys_t dir;
3394fdefee4cSToomas Soome zap_phys_t *zap;
3395fdefee4cSToomas Soome uint64_t objnum;
3396199767f8SToomas Soome size_t size;
3397199767f8SToomas Soome int rc;
3398199767f8SToomas Soome
3399199767f8SToomas Soome if ((rc = objset_get_dnode(spa, &spa->spa_mos, DMU_OT_OBJECT_DIRECTORY,
3400199767f8SToomas Soome &dir)) != 0)
3401199767f8SToomas Soome return (rc);
34025e069aafSToomas Soome if ((rc = zap_lookup(spa, &dir, DMU_POOL_FEATURES_FOR_READ,
34034a04e8dbSToomas Soome sizeof (objnum), 1, &objnum)) != 0) {
34045e069aafSToomas Soome /*
34055e069aafSToomas Soome * It is older pool without features. As we have already
34065e069aafSToomas Soome * tested the label, just return without raising the error.
34075e069aafSToomas Soome */
34085e069aafSToomas Soome if (rc == ENOENT)
34095e069aafSToomas Soome rc = 0;
3410199767f8SToomas Soome return (rc);
34115e069aafSToomas Soome }
3412199767f8SToomas Soome
3413199767f8SToomas Soome if ((rc = objset_get_dnode(spa, &spa->spa_mos, objnum, &dir)) != 0)
3414199767f8SToomas Soome return (rc);
3415199767f8SToomas Soome
3416199767f8SToomas Soome if (dir.dn_type != DMU_OTN_ZAP_METADATA)
3417199767f8SToomas Soome return (EIO);
3418199767f8SToomas Soome
3419fdefee4cSToomas Soome size = dir.dn_datablkszsec << SPA_MINBLOCKSHIFT;
3420fdefee4cSToomas Soome zap = malloc(size);
3421fdefee4cSToomas Soome if (zap == NULL)
3422fdefee4cSToomas Soome return (ENOMEM);
3423fdefee4cSToomas Soome
3424fdefee4cSToomas Soome if (dnode_read(spa, &dir, 0, zap, size)) {
3425fdefee4cSToomas Soome free(zap);
3426199767f8SToomas Soome return (EIO);
3427fdefee4cSToomas Soome }
3428199767f8SToomas Soome
3429fdefee4cSToomas Soome if (zap->zap_block_type == ZBT_MICRO)
3430fdefee4cSToomas Soome rc = mzap_list((const mzap_phys_t *)zap, size, check_feature);
3431199767f8SToomas Soome else
3432fdefee4cSToomas Soome rc = fzap_list(spa, &dir, zap, check_feature);
3433199767f8SToomas Soome
3434fdefee4cSToomas Soome free(zap);
3435199767f8SToomas Soome return (rc);
3436199767f8SToomas Soome }
3437199767f8SToomas Soome
3438199767f8SToomas Soome static int
load_nvlist(spa_t * spa,uint64_t obj,nvlist_t ** value)3439b713c91eSToomas Soome load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
3440199767f8SToomas Soome {
34414a04e8dbSToomas Soome dnode_phys_t dir;
3442c023f651SToomas Soome size_t size;
3443199767f8SToomas Soome int rc;
3444b713c91eSToomas Soome char *nv;
3445c023f651SToomas Soome
3446c023f651SToomas Soome *value = NULL;
3447c023f651SToomas Soome if ((rc = objset_get_dnode(spa, &spa->spa_mos, obj, &dir)) != 0)
3448c023f651SToomas Soome return (rc);
3449c023f651SToomas Soome if (dir.dn_type != DMU_OT_PACKED_NVLIST &&
3450c023f651SToomas Soome dir.dn_bonustype != DMU_OT_PACKED_NVLIST_SIZE) {
3451c023f651SToomas Soome return (EIO);
3452c023f651SToomas Soome }
3453c023f651SToomas Soome
3454c023f651SToomas Soome if (dir.dn_bonuslen != sizeof (uint64_t))
3455c023f651SToomas Soome return (EIO);
3456c023f651SToomas Soome
3457c023f651SToomas Soome size = *(uint64_t *)DN_BONUS(&dir);
3458c023f651SToomas Soome nv = malloc(size);
3459c023f651SToomas Soome if (nv == NULL)
3460c023f651SToomas Soome return (ENOMEM);
3461c023f651SToomas Soome
3462c023f651SToomas Soome rc = dnode_read(spa, &dir, 0, nv, size);
3463c023f651SToomas Soome if (rc != 0) {
3464c023f651SToomas Soome free(nv);
3465c023f651SToomas Soome nv = NULL;
3466c023f651SToomas Soome return (rc);
3467c023f651SToomas Soome }
3468b713c91eSToomas Soome *value = nvlist_import(nv, size);
3469b713c91eSToomas Soome free(nv);
3470c023f651SToomas Soome return (rc);
3471c023f651SToomas Soome }
3472c023f651SToomas Soome
3473c023f651SToomas Soome static int
zfs_spa_init(spa_t * spa)3474c023f651SToomas Soome zfs_spa_init(spa_t *spa)
3475c023f651SToomas Soome {
3476c023f651SToomas Soome dnode_phys_t dir;
3477c023f651SToomas Soome uint64_t config_object;
3478b713c91eSToomas Soome nvlist_t *nvlist;
3479da9bf005SToomas Soome int rc;
3480199767f8SToomas Soome
3481199767f8SToomas Soome if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {
3482199767f8SToomas Soome printf("ZFS: can't read MOS of pool %s\n", spa->spa_name);
3483199767f8SToomas Soome return (EIO);
3484199767f8SToomas Soome }
3485199767f8SToomas Soome if (spa->spa_mos.os_type != DMU_OST_META) {
3486199767f8SToomas Soome printf("ZFS: corrupted MOS of pool %s\n", spa->spa_name);
3487199767f8SToomas Soome return (EIO);
3488199767f8SToomas Soome }
3489199767f8SToomas Soome
34904a04e8dbSToomas Soome if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT,
34914a04e8dbSToomas Soome &dir)) {
34924a04e8dbSToomas Soome printf("ZFS: failed to read pool %s directory object\n",
34934a04e8dbSToomas Soome spa->spa_name);
34944a04e8dbSToomas Soome return (EIO);
34954a04e8dbSToomas Soome }
34964a04e8dbSToomas Soome /* this is allowed to fail, older pools do not have salt */
34974a04e8dbSToomas Soome rc = zap_lookup(spa, &dir, DMU_POOL_CHECKSUM_SALT, 1,
34984a04e8dbSToomas Soome sizeof (spa->spa_cksum_salt.zcs_bytes),
34994a04e8dbSToomas Soome spa->spa_cksum_salt.zcs_bytes);
35004a04e8dbSToomas Soome
3501199767f8SToomas Soome rc = check_mos_features(spa);
3502199767f8SToomas Soome if (rc != 0) {
3503199767f8SToomas Soome printf("ZFS: pool %s is not supported\n", spa->spa_name);
3504c023f651SToomas Soome return (rc);
3505199767f8SToomas Soome }
3506199767f8SToomas Soome
3507c023f651SToomas Soome rc = zap_lookup(spa, &dir, DMU_POOL_CONFIG,
3508c023f651SToomas Soome sizeof (config_object), 1, &config_object);
3509c023f651SToomas Soome if (rc != 0) {
3510c023f651SToomas Soome printf("ZFS: can not read MOS %s\n", DMU_POOL_CONFIG);
3511c023f651SToomas Soome return (EIO);
3512c023f651SToomas Soome }
3513c023f651SToomas Soome rc = load_nvlist(spa, config_object, &nvlist);
3514c023f651SToomas Soome if (rc != 0)
3515c023f651SToomas Soome return (rc);
3516c023f651SToomas Soome
3517da9bf005SToomas Soome /*
3518da9bf005SToomas Soome * Update vdevs from MOS config. Note, we do skip encoding bytes
3519da9bf005SToomas Soome * here. See also vdev_label_read_config().
3520da9bf005SToomas Soome */
3521b713c91eSToomas Soome rc = vdev_init_from_nvlist(spa, nvlist);
3522b713c91eSToomas Soome nvlist_destroy(nvlist);
3523199767f8SToomas Soome return (rc);
3524199767f8SToomas Soome }
3525199767f8SToomas Soome
3526199767f8SToomas Soome static int
zfs_dnode_stat(const spa_t * spa,dnode_phys_t * dn,struct stat * sb)3527199767f8SToomas Soome zfs_dnode_stat(const spa_t *spa, dnode_phys_t *dn, struct stat *sb)
3528199767f8SToomas Soome {
3529199767f8SToomas Soome
3530199767f8SToomas Soome if (dn->dn_bonustype != DMU_OT_SA) {
3531199767f8SToomas Soome znode_phys_t *zp = (znode_phys_t *)dn->dn_bonus;
3532199767f8SToomas Soome
3533199767f8SToomas Soome sb->st_mode = zp->zp_mode;
3534199767f8SToomas Soome sb->st_uid = zp->zp_uid;
3535199767f8SToomas Soome sb->st_gid = zp->zp_gid;
3536199767f8SToomas Soome sb->st_size = zp->zp_size;
3537199767f8SToomas Soome } else {
3538199767f8SToomas Soome sa_hdr_phys_t *sahdrp;
3539199767f8SToomas Soome int hdrsize;
3540199767f8SToomas Soome size_t size = 0;
3541199767f8SToomas Soome void *buf = NULL;
3542199767f8SToomas Soome
3543199767f8SToomas Soome if (dn->dn_bonuslen != 0)
3544199767f8SToomas Soome sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn);
3545199767f8SToomas Soome else {
3546199767f8SToomas Soome if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0) {
3547f905073dSToomas Soome blkptr_t *bp = DN_SPILL_BLKPTR(dn);
3548199767f8SToomas Soome int error;
3549199767f8SToomas Soome
3550199767f8SToomas Soome size = BP_GET_LSIZE(bp);
35513e8c7f16SToomas Soome buf = malloc(size);
35523e8c7f16SToomas Soome if (buf == NULL)
35533e8c7f16SToomas Soome error = ENOMEM;
35543e8c7f16SToomas Soome else
35553e8c7f16SToomas Soome error = zio_read(spa, bp, buf);
35563e8c7f16SToomas Soome
3557199767f8SToomas Soome if (error != 0) {
35583e8c7f16SToomas Soome free(buf);
3559199767f8SToomas Soome return (error);
3560199767f8SToomas Soome }
3561199767f8SToomas Soome sahdrp = buf;
3562199767f8SToomas Soome } else {
3563199767f8SToomas Soome return (EIO);
3564199767f8SToomas Soome }
3565199767f8SToomas Soome }
3566199767f8SToomas Soome hdrsize = SA_HDR_SIZE(sahdrp);
3567199767f8SToomas Soome sb->st_mode = *(uint64_t *)((char *)sahdrp + hdrsize +
3568199767f8SToomas Soome SA_MODE_OFFSET);
3569199767f8SToomas Soome sb->st_uid = *(uint64_t *)((char *)sahdrp + hdrsize +
3570199767f8SToomas Soome SA_UID_OFFSET);
3571199767f8SToomas Soome sb->st_gid = *(uint64_t *)((char *)sahdrp + hdrsize +
3572199767f8SToomas Soome SA_GID_OFFSET);
3573199767f8SToomas Soome sb->st_size = *(uint64_t *)((char *)sahdrp + hdrsize +
3574199767f8SToomas Soome SA_SIZE_OFFSET);
35753e8c7f16SToomas Soome free(buf);
3576199767f8SToomas Soome }
3577199767f8SToomas Soome
3578199767f8SToomas Soome return (0);
3579199767f8SToomas Soome }
3580199767f8SToomas Soome
3581fae4f9e0SToomas Soome static int
zfs_dnode_readlink(const spa_t * spa,dnode_phys_t * dn,char * path,size_t psize)3582fae4f9e0SToomas Soome zfs_dnode_readlink(const spa_t *spa, dnode_phys_t *dn, char *path, size_t psize)
3583fae4f9e0SToomas Soome {
3584fae4f9e0SToomas Soome int rc = 0;
3585fae4f9e0SToomas Soome
3586fae4f9e0SToomas Soome if (dn->dn_bonustype == DMU_OT_SA) {
3587fae4f9e0SToomas Soome sa_hdr_phys_t *sahdrp = NULL;
3588fae4f9e0SToomas Soome size_t size = 0;
3589fae4f9e0SToomas Soome void *buf = NULL;
3590fae4f9e0SToomas Soome int hdrsize;
3591fae4f9e0SToomas Soome char *p;
3592fae4f9e0SToomas Soome
35933e8c7f16SToomas Soome if (dn->dn_bonuslen != 0) {
3594fae4f9e0SToomas Soome sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn);
35953e8c7f16SToomas Soome } else {
3596fae4f9e0SToomas Soome blkptr_t *bp;
3597fae4f9e0SToomas Soome
3598fae4f9e0SToomas Soome if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) == 0)
3599fae4f9e0SToomas Soome return (EIO);
3600f905073dSToomas Soome bp = DN_SPILL_BLKPTR(dn);
3601fae4f9e0SToomas Soome
3602fae4f9e0SToomas Soome size = BP_GET_LSIZE(bp);
36033e8c7f16SToomas Soome buf = malloc(size);
36043e8c7f16SToomas Soome if (buf == NULL)
36053e8c7f16SToomas Soome rc = ENOMEM;
36063e8c7f16SToomas Soome else
36073e8c7f16SToomas Soome rc = zio_read(spa, bp, buf);
3608fae4f9e0SToomas Soome if (rc != 0) {
36093e8c7f16SToomas Soome free(buf);
3610fae4f9e0SToomas Soome return (rc);
3611fae4f9e0SToomas Soome }
3612fae4f9e0SToomas Soome sahdrp = buf;
3613fae4f9e0SToomas Soome }
3614fae4f9e0SToomas Soome hdrsize = SA_HDR_SIZE(sahdrp);
3615fae4f9e0SToomas Soome p = (char *)((uintptr_t)sahdrp + hdrsize + SA_SYMLINK_OFFSET);
3616fae4f9e0SToomas Soome memcpy(path, p, psize);
36173e8c7f16SToomas Soome free(buf);
3618fae4f9e0SToomas Soome return (0);
3619fae4f9e0SToomas Soome }
3620fae4f9e0SToomas Soome /*
3621fae4f9e0SToomas Soome * Second test is purely to silence bogus compiler
3622fae4f9e0SToomas Soome * warning about accessing past the end of dn_bonus.
3623fae4f9e0SToomas Soome */
36246fd7fa35SToomas Soome if (psize + sizeof (znode_phys_t) <= dn->dn_bonuslen &&
36256fd7fa35SToomas Soome sizeof (znode_phys_t) <= sizeof (dn->dn_bonus)) {
36266fd7fa35SToomas Soome memcpy(path, &dn->dn_bonus[sizeof (znode_phys_t)], psize);
3627fae4f9e0SToomas Soome } else {
3628fae4f9e0SToomas Soome rc = dnode_read(spa, dn, 0, path, psize);
3629fae4f9e0SToomas Soome }
3630fae4f9e0SToomas Soome return (rc);
3631fae4f9e0SToomas Soome }
3632fae4f9e0SToomas Soome
3633fae4f9e0SToomas Soome struct obj_list {
3634fae4f9e0SToomas Soome uint64_t objnum;
3635fae4f9e0SToomas Soome STAILQ_ENTRY(obj_list) entry;
3636fae4f9e0SToomas Soome };
3637fae4f9e0SToomas Soome
3638199767f8SToomas Soome /*
3639199767f8SToomas Soome * Lookup a file and return its dnode.
3640199767f8SToomas Soome */
3641199767f8SToomas Soome static int
zfs_lookup(const struct zfsmount * mnt,const char * upath,dnode_phys_t * dnode)3642199767f8SToomas Soome zfs_lookup(const struct zfsmount *mnt, const char *upath, dnode_phys_t *dnode)
3643199767f8SToomas Soome {
3644199767f8SToomas Soome int rc;
3645fae4f9e0SToomas Soome uint64_t objnum;
3646199767f8SToomas Soome const spa_t *spa;
3647199767f8SToomas Soome dnode_phys_t dn;
3648199767f8SToomas Soome const char *p, *q;
3649199767f8SToomas Soome char element[256];
3650199767f8SToomas Soome char path[1024];
3651199767f8SToomas Soome int symlinks_followed = 0;
3652199767f8SToomas Soome struct stat sb;
3653fae4f9e0SToomas Soome struct obj_list *entry, *tentry;
3654fae4f9e0SToomas Soome STAILQ_HEAD(, obj_list) on_cache = STAILQ_HEAD_INITIALIZER(on_cache);
3655199767f8SToomas Soome
3656199767f8SToomas Soome spa = mnt->spa;
3657199767f8SToomas Soome if (mnt->objset.os_type != DMU_OST_ZFS) {
3658199767f8SToomas Soome printf("ZFS: unexpected object set type %ju\n",
3659199767f8SToomas Soome (uintmax_t)mnt->objset.os_type);
3660199767f8SToomas Soome return (EIO);
3661199767f8SToomas Soome }
3662199767f8SToomas Soome
36636fd7fa35SToomas Soome if ((entry = malloc(sizeof (struct obj_list))) == NULL)
3664fae4f9e0SToomas Soome return (ENOMEM);
3665fae4f9e0SToomas Soome
3666199767f8SToomas Soome /*
3667199767f8SToomas Soome * Get the root directory dnode.
3668199767f8SToomas Soome */
3669199767f8SToomas Soome rc = objset_get_dnode(spa, &mnt->objset, MASTER_NODE_OBJ, &dn);
3670fae4f9e0SToomas Soome if (rc) {
3671fae4f9e0SToomas Soome free(entry);
3672199767f8SToomas Soome return (rc);
3673fae4f9e0SToomas Soome }
3674199767f8SToomas Soome
36756fd7fa35SToomas Soome rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, sizeof (objnum), 1, &objnum);
3676fae4f9e0SToomas Soome if (rc) {
3677fae4f9e0SToomas Soome free(entry);
3678199767f8SToomas Soome return (rc);
3679fae4f9e0SToomas Soome }
3680fae4f9e0SToomas Soome entry->objnum = objnum;
3681fae4f9e0SToomas Soome STAILQ_INSERT_HEAD(&on_cache, entry, entry);
3682199767f8SToomas Soome
3683fae4f9e0SToomas Soome rc = objset_get_dnode(spa, &mnt->objset, objnum, &dn);
3684fae4f9e0SToomas Soome if (rc != 0)
3685fae4f9e0SToomas Soome goto done;
3686199767f8SToomas Soome
3687199767f8SToomas Soome p = upath;
3688199767f8SToomas Soome while (p && *p) {
3689fae4f9e0SToomas Soome rc = objset_get_dnode(spa, &mnt->objset, objnum, &dn);
3690fae4f9e0SToomas Soome if (rc != 0)
3691fae4f9e0SToomas Soome goto done;
3692fae4f9e0SToomas Soome
3693199767f8SToomas Soome while (*p == '/')
3694199767f8SToomas Soome p++;
3695fae4f9e0SToomas Soome if (*p == '\0')
3696199767f8SToomas Soome break;
3697fae4f9e0SToomas Soome q = p;
3698fae4f9e0SToomas Soome while (*q != '\0' && *q != '/')
3699fae4f9e0SToomas Soome q++;
3700199767f8SToomas Soome
3701fae4f9e0SToomas Soome /* skip dot */
3702fae4f9e0SToomas Soome if (p + 1 == q && p[0] == '.') {
3703fae4f9e0SToomas Soome p++;
3704fae4f9e0SToomas Soome continue;
3705fae4f9e0SToomas Soome }
3706fae4f9e0SToomas Soome /* double dot */
3707fae4f9e0SToomas Soome if (p + 2 == q && p[0] == '.' && p[1] == '.') {
3708fae4f9e0SToomas Soome p += 2;
3709fae4f9e0SToomas Soome if (STAILQ_FIRST(&on_cache) ==
3710fae4f9e0SToomas Soome STAILQ_LAST(&on_cache, obj_list, entry)) {
3711fae4f9e0SToomas Soome rc = ENOENT;
3712fae4f9e0SToomas Soome goto done;
3713fae4f9e0SToomas Soome }
3714fae4f9e0SToomas Soome entry = STAILQ_FIRST(&on_cache);
3715fae4f9e0SToomas Soome STAILQ_REMOVE_HEAD(&on_cache, entry);
3716fae4f9e0SToomas Soome free(entry);
3717fae4f9e0SToomas Soome objnum = (STAILQ_FIRST(&on_cache))->objnum;
3718fae4f9e0SToomas Soome continue;
3719fae4f9e0SToomas Soome }
37206fd7fa35SToomas Soome if (q - p + 1 > sizeof (element)) {
3721fae4f9e0SToomas Soome rc = ENAMETOOLONG;
3722fae4f9e0SToomas Soome goto done;
3723fae4f9e0SToomas Soome }
3724fae4f9e0SToomas Soome memcpy(element, p, q - p);
3725fae4f9e0SToomas Soome element[q - p] = 0;
3726fae4f9e0SToomas Soome p = q;
3727fae4f9e0SToomas Soome
3728fae4f9e0SToomas Soome if ((rc = zfs_dnode_stat(spa, &dn, &sb)) != 0)
3729fae4f9e0SToomas Soome goto done;
3730fae4f9e0SToomas Soome if (!S_ISDIR(sb.st_mode)) {
3731fae4f9e0SToomas Soome rc = ENOTDIR;
3732fae4f9e0SToomas Soome goto done;
3733fae4f9e0SToomas Soome }
3734199767f8SToomas Soome
37356fd7fa35SToomas Soome rc = zap_lookup(spa, &dn, element, sizeof (objnum), 1, &objnum);
3736199767f8SToomas Soome if (rc)
3737fae4f9e0SToomas Soome goto done;
3738199767f8SToomas Soome objnum = ZFS_DIRENT_OBJ(objnum);
3739199767f8SToomas Soome
37406fd7fa35SToomas Soome if ((entry = malloc(sizeof (struct obj_list))) == NULL) {
3741fae4f9e0SToomas Soome rc = ENOMEM;
3742fae4f9e0SToomas Soome goto done;
3743fae4f9e0SToomas Soome }
3744fae4f9e0SToomas Soome entry->objnum = objnum;
3745fae4f9e0SToomas Soome STAILQ_INSERT_HEAD(&on_cache, entry, entry);
3746199767f8SToomas Soome rc = objset_get_dnode(spa, &mnt->objset, objnum, &dn);
3747199767f8SToomas Soome if (rc)
3748fae4f9e0SToomas Soome goto done;
3749199767f8SToomas Soome
3750199767f8SToomas Soome /*
3751199767f8SToomas Soome * Check for symlink.
3752199767f8SToomas Soome */
3753199767f8SToomas Soome rc = zfs_dnode_stat(spa, &dn, &sb);
3754199767f8SToomas Soome if (rc)
3755fae4f9e0SToomas Soome goto done;
3756199767f8SToomas Soome if (S_ISLNK(sb.st_mode)) {
3757fae4f9e0SToomas Soome if (symlinks_followed > 10) {
3758fae4f9e0SToomas Soome rc = EMLINK;
3759fae4f9e0SToomas Soome goto done;
3760fae4f9e0SToomas Soome }
3761199767f8SToomas Soome symlinks_followed++;
3762199767f8SToomas Soome
3763199767f8SToomas Soome /*
3764199767f8SToomas Soome * Read the link value and copy the tail of our
3765199767f8SToomas Soome * current path onto the end.
3766199767f8SToomas Soome */
37676fd7fa35SToomas Soome if (sb.st_size + strlen(p) + 1 > sizeof (path)) {
3768fae4f9e0SToomas Soome rc = ENAMETOOLONG;
3769fae4f9e0SToomas Soome goto done;
3770199767f8SToomas Soome }
3771fae4f9e0SToomas Soome strcpy(&path[sb.st_size], p);
3772fae4f9e0SToomas Soome
3773fae4f9e0SToomas Soome rc = zfs_dnode_readlink(spa, &dn, path, sb.st_size);
3774fae4f9e0SToomas Soome if (rc != 0)
3775fae4f9e0SToomas Soome goto done;
3776199767f8SToomas Soome
3777199767f8SToomas Soome /*
3778199767f8SToomas Soome * Restart with the new path, starting either at
3779199767f8SToomas Soome * the root or at the parent depending whether or
3780199767f8SToomas Soome * not the link is relative.
3781199767f8SToomas Soome */
3782199767f8SToomas Soome p = path;
3783fae4f9e0SToomas Soome if (*p == '/') {
3784fae4f9e0SToomas Soome while (STAILQ_FIRST(&on_cache) !=
3785fae4f9e0SToomas Soome STAILQ_LAST(&on_cache, obj_list, entry)) {
3786fae4f9e0SToomas Soome entry = STAILQ_FIRST(&on_cache);
3787fae4f9e0SToomas Soome STAILQ_REMOVE_HEAD(&on_cache, entry);
3788fae4f9e0SToomas Soome free(entry);
3789fae4f9e0SToomas Soome }
3790fae4f9e0SToomas Soome } else {
3791fae4f9e0SToomas Soome entry = STAILQ_FIRST(&on_cache);
3792fae4f9e0SToomas Soome STAILQ_REMOVE_HEAD(&on_cache, entry);
3793fae4f9e0SToomas Soome free(entry);
3794fae4f9e0SToomas Soome }
3795fae4f9e0SToomas Soome objnum = (STAILQ_FIRST(&on_cache))->objnum;
3796199767f8SToomas Soome }
3797199767f8SToomas Soome }
3798199767f8SToomas Soome
3799199767f8SToomas Soome *dnode = dn;
3800fae4f9e0SToomas Soome done:
3801fae4f9e0SToomas Soome STAILQ_FOREACH_SAFE(entry, &on_cache, entry, tentry)
3802fae4f9e0SToomas Soome free(entry);
3803fae4f9e0SToomas Soome return (rc);
3804199767f8SToomas Soome }
3805