spa_misc.c revision 0a4e9518a44f226be6d39383330b5b1792d2f184
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/zfs_context.h>
29#include <sys/spa_impl.h>
30#include <sys/zio.h>
31#include <sys/zio_checksum.h>
32#include <sys/zio_compress.h>
33#include <sys/dmu.h>
34#include <sys/dmu_tx.h>
35#include <sys/zap.h>
36#include <sys/zil.h>
37#include <sys/vdev_impl.h>
38#include <sys/metaslab.h>
39#include <sys/uberblock_impl.h>
40#include <sys/txg.h>
41#include <sys/avl.h>
42#include <sys/unique.h>
43#include <sys/dsl_pool.h>
44#include <sys/dsl_dir.h>
45#include <sys/dsl_prop.h>
46#include <sys/fs/zfs.h>
47#include <sys/metaslab_impl.h>
48#include "zfs_prop.h"
49
50/*
51 * SPA locking
52 *
53 * There are four basic locks for managing spa_t structures:
54 *
55 * spa_namespace_lock (global mutex)
56 *
57 *	This lock must be acquired to do any of the following:
58 *
59 *		- Lookup a spa_t by name
60 *		- Add or remove a spa_t from the namespace
61 *		- Increase spa_refcount from non-zero
62 *		- Check if spa_refcount is zero
63 *		- Rename a spa_t
64 *		- add/remove/attach/detach devices
65 *		- Held for the duration of create/destroy/import/export
66 *
67 *	It does not need to handle recursion.  A create or destroy may
68 *	reference objects (files or zvols) in other pools, but by
69 *	definition they must have an existing reference, and will never need
70 *	to lookup a spa_t by name.
71 *
72 * spa_refcount (per-spa refcount_t protected by mutex)
73 *
74 *	This reference count keep track of any active users of the spa_t.  The
75 *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
76 *	the refcount is never really 'zero' - opening a pool implicitly keeps
77 *	some references in the DMU.  Internally we check against SPA_MINREF, but
78 *	present the image of a zero/non-zero value to consumers.
79 *
80 * spa_config_lock (per-spa read-priority rwlock)
81 *
82 *	This protects the spa_t from config changes, and must be held in
83 *	the following circumstances:
84 *
85 *		- RW_READER to perform I/O to the spa
86 *		- RW_WRITER to change the vdev config
87 *
88 * spa_config_cache_lock (per-spa mutex)
89 *
90 *	This mutex prevents the spa_config nvlist from being updated.  No
91 *      other locks are required to obtain this lock, although implicitly you
92 *      must have the namespace lock or non-zero refcount to have any kind
93 *      of spa_t pointer at all.
94 *
95 * The locking order is fairly straightforward:
96 *
97 *		spa_namespace_lock	->	spa_refcount
98 *
99 *	The namespace lock must be acquired to increase the refcount from 0
100 *	or to check if it is zero.
101 *
102 *		spa_refcount		->	spa_config_lock
103 *
104 *	There must be at least one valid reference on the spa_t to acquire
105 *	the config lock.
106 *
107 *		spa_namespace_lock	->	spa_config_lock
108 *
109 *	The namespace lock must always be taken before the config lock.
110 *
111 *
112 * The spa_namespace_lock and spa_config_cache_lock can be acquired directly and
113 * are globally visible.
114 *
115 * The namespace is manipulated using the following functions, all which require
116 * the spa_namespace_lock to be held.
117 *
118 *	spa_lookup()		Lookup a spa_t by name.
119 *
120 *	spa_add()		Create a new spa_t in the namespace.
121 *
122 *	spa_remove()		Remove a spa_t from the namespace.  This also
123 *				frees up any memory associated with the spa_t.
124 *
125 *	spa_next()		Returns the next spa_t in the system, or the
126 *				first if NULL is passed.
127 *
128 *	spa_evict_all()		Shutdown and remove all spa_t structures in
129 *				the system.
130 *
131 *	spa_guid_exists()	Determine whether a pool/device guid exists.
132 *
133 * The spa_refcount is manipulated using the following functions:
134 *
135 *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
136 *				called with spa_namespace_lock held if the
137 *				refcount is currently zero.
138 *
139 *	spa_close()		Remove a reference from the spa_t.  This will
140 *				not free the spa_t or remove it from the
141 *				namespace.  No locking is required.
142 *
143 *	spa_refcount_zero()	Returns true if the refcount is currently
144 *				zero.  Must be called with spa_namespace_lock
145 *				held.
146 *
147 * The spa_config_lock is manipulated using the following functions:
148 *
149 *	spa_config_enter()	Acquire the config lock as RW_READER or
150 *				RW_WRITER.  At least one reference on the spa_t
151 *				must exist.
152 *
153 *	spa_config_exit()	Release the config lock.
154 *
155 *	spa_config_held()	Returns true if the config lock is currently
156 *				held in the given state.
157 *
158 * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
159 *
160 *	spa_vdev_enter()	Acquire the namespace lock and the config lock
161 *				for writing.
162 *
163 *	spa_vdev_exit()		Release the config lock, wait for all I/O
164 *				to complete, sync the updated configs to the
165 *				cache, and release the namespace lock.
166 *
167 * The spa_name() function also requires either the spa_namespace_lock
168 * or the spa_config_lock, as both are needed to do a rename.  spa_rename() is
169 * also implemented within this file since is requires manipulation of the
170 * namespace.
171 */
172
173static avl_tree_t spa_namespace_avl;
174kmutex_t spa_namespace_lock;
175static kcondvar_t spa_namespace_cv;
176static int spa_active_count;
177int spa_max_replication_override = SPA_DVAS_PER_BP;
178
179static kmutex_t spa_spare_lock;
180static avl_tree_t spa_spare_avl;
181
182kmem_cache_t *spa_buffer_pool;
183int spa_mode;
184
185#ifdef ZFS_DEBUG
186/* Everything except dprintf is on by default in debug builds */
187int zfs_flags = ~ZFS_DEBUG_DPRINTF;
188#else
189int zfs_flags = 0;
190#endif
191
192/*
193 * zfs_recover can be set to nonzero to attempt to recover from
194 * otherwise-fatal errors, typically caused by on-disk corruption.  When
195 * set, calls to zfs_panic_recover() will turn into warning messages.
196 */
197int zfs_recover = 0;
198
199#define	SPA_MINREF	5	/* spa_refcnt for an open-but-idle pool */
200
201/*
202 * ==========================================================================
203 * SPA namespace functions
204 * ==========================================================================
205 */
206
207/*
208 * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
209 * Returns NULL if no matching spa_t is found.
210 */
211spa_t *
212spa_lookup(const char *name)
213{
214	spa_t search, *spa;
215	avl_index_t where;
216	char c;
217	char *cp;
218
219	ASSERT(MUTEX_HELD(&spa_namespace_lock));
220
221	/*
222	 * If it's a full dataset name, figure out the pool name and
223	 * just use that.
224	 */
225	cp = strpbrk(name, "/@");
226	if (cp) {
227		c = *cp;
228		*cp = '\0';
229	}
230
231	search.spa_name = (char *)name;
232	spa = avl_find(&spa_namespace_avl, &search, &where);
233
234	if (cp)
235		*cp = c;
236
237	return (spa);
238}
239
240/*
241 * Create an uninitialized spa_t with the given name.  Requires
242 * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
243 * exist by calling spa_lookup() first.
244 */
245spa_t *
246spa_add(const char *name, const char *altroot)
247{
248	spa_t *spa;
249
250	ASSERT(MUTEX_HELD(&spa_namespace_lock));
251
252	spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
253
254	rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL);
255
256	mutex_init(&spa->spa_uberblock_lock, NULL, MUTEX_DEFAULT, NULL);
257	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
258	mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL);
259	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
260	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
261	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
262	mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
263	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
264	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
265
266	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
267	cv_init(&spa->spa_scrub_cv, NULL, CV_DEFAULT, NULL);
268	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
269
270	spa->spa_name = spa_strdup(name);
271	spa->spa_state = POOL_STATE_UNINITIALIZED;
272	spa->spa_freeze_txg = UINT64_MAX;
273	spa->spa_final_txg = UINT64_MAX;
274
275	refcount_create(&spa->spa_refcount);
276	rprw_init(&spa->spa_config_lock);
277
278	avl_add(&spa_namespace_avl, spa);
279
280	mutex_init(&spa->spa_zio_lock, NULL, MUTEX_DEFAULT, NULL);
281
282	/*
283	 * Set the alternate root, if there is one.
284	 */
285	if (altroot) {
286		spa->spa_root = spa_strdup(altroot);
287		spa_active_count++;
288	}
289
290	return (spa);
291}
292
293/*
294 * Removes a spa_t from the namespace, freeing up any memory used.  Requires
295 * spa_namespace_lock.  This is called only after the spa_t has been closed and
296 * deactivated.
297 */
298void
299spa_remove(spa_t *spa)
300{
301	ASSERT(MUTEX_HELD(&spa_namespace_lock));
302	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
303	ASSERT(spa->spa_scrub_thread == NULL);
304
305	avl_remove(&spa_namespace_avl, spa);
306	cv_broadcast(&spa_namespace_cv);
307
308	if (spa->spa_root) {
309		spa_strfree(spa->spa_root);
310		spa_active_count--;
311	}
312
313	if (spa->spa_name)
314		spa_strfree(spa->spa_name);
315
316	spa_config_set(spa, NULL);
317
318	refcount_destroy(&spa->spa_refcount);
319
320	rprw_destroy(&spa->spa_config_lock);
321
322	rw_destroy(&spa->spa_traverse_lock);
323
324	cv_destroy(&spa->spa_async_cv);
325	cv_destroy(&spa->spa_scrub_cv);
326	cv_destroy(&spa->spa_scrub_io_cv);
327
328	mutex_destroy(&spa->spa_uberblock_lock);
329	mutex_destroy(&spa->spa_async_lock);
330	mutex_destroy(&spa->spa_config_cache_lock);
331	mutex_destroy(&spa->spa_scrub_lock);
332	mutex_destroy(&spa->spa_errlog_lock);
333	mutex_destroy(&spa->spa_errlist_lock);
334	mutex_destroy(&spa->spa_sync_bplist.bpl_lock);
335	mutex_destroy(&spa->spa_history_lock);
336	mutex_destroy(&spa->spa_props_lock);
337	mutex_destroy(&spa->spa_zio_lock);
338
339	kmem_free(spa, sizeof (spa_t));
340}
341
342/*
343 * Given a pool, return the next pool in the namespace, or NULL if there is
344 * none.  If 'prev' is NULL, return the first pool.
345 */
346spa_t *
347spa_next(spa_t *prev)
348{
349	ASSERT(MUTEX_HELD(&spa_namespace_lock));
350
351	if (prev)
352		return (AVL_NEXT(&spa_namespace_avl, prev));
353	else
354		return (avl_first(&spa_namespace_avl));
355}
356
357/*
358 * ==========================================================================
359 * SPA refcount functions
360 * ==========================================================================
361 */
362
363/*
364 * Add a reference to the given spa_t.  Must have at least one reference, or
365 * have the namespace lock held.
366 */
367void
368spa_open_ref(spa_t *spa, void *tag)
369{
370	ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
371	    MUTEX_HELD(&spa_namespace_lock));
372
373	(void) refcount_add(&spa->spa_refcount, tag);
374}
375
376/*
377 * Remove a reference to the given spa_t.  Must have at least one reference, or
378 * have the namespace lock held.
379 */
380void
381spa_close(spa_t *spa, void *tag)
382{
383	ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
384	    MUTEX_HELD(&spa_namespace_lock));
385
386	(void) refcount_remove(&spa->spa_refcount, tag);
387}
388
389/*
390 * Check to see if the spa refcount is zero.  Must be called with
391 * spa_namespace_lock held.  We really compare against SPA_MINREF, which is the
392 * number of references acquired when opening a pool
393 */
394boolean_t
395spa_refcount_zero(spa_t *spa)
396{
397	ASSERT(MUTEX_HELD(&spa_namespace_lock));
398
399	return (refcount_count(&spa->spa_refcount) == SPA_MINREF);
400}
401
402/*
403 * ==========================================================================
404 * SPA spare tracking
405 * ==========================================================================
406 */
407
408/*
409 * Spares are tracked globally due to the following constraints:
410 *
411 * 	- A spare may be part of multiple pools.
412 * 	- A spare may be added to a pool even if it's actively in use within
413 *	  another pool.
414 * 	- A spare in use in any pool can only be the source of a replacement if
415 *	  the target is a spare in the same pool.
416 *
417 * We keep track of all spares on the system through the use of a reference
418 * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
419 * spare, then we bump the reference count in the AVL tree.  In addition, we set
420 * the 'vdev_isspare' member to indicate that the device is a spare (active or
421 * inactive).  When a spare is made active (used to replace a device in the
422 * pool), we also keep track of which pool its been made a part of.
423 *
424 * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
425 * called under the spa_namespace lock as part of vdev reconfiguration.  The
426 * separate spare lock exists for the status query path, which does not need to
427 * be completely consistent with respect to other vdev configuration changes.
428 */
429
430typedef struct spa_spare {
431	uint64_t	spare_guid;
432	uint64_t	spare_pool;
433	avl_node_t	spare_avl;
434	int		spare_count;
435} spa_spare_t;
436
437static int
438spa_spare_compare(const void *a, const void *b)
439{
440	const spa_spare_t *sa = a;
441	const spa_spare_t *sb = b;
442
443	if (sa->spare_guid < sb->spare_guid)
444		return (-1);
445	else if (sa->spare_guid > sb->spare_guid)
446		return (1);
447	else
448		return (0);
449}
450
451void
452spa_spare_add(vdev_t *vd)
453{
454	avl_index_t where;
455	spa_spare_t search;
456	spa_spare_t *spare;
457
458	mutex_enter(&spa_spare_lock);
459	ASSERT(!vd->vdev_isspare);
460
461	search.spare_guid = vd->vdev_guid;
462	if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
463		spare->spare_count++;
464	} else {
465		spare = kmem_zalloc(sizeof (spa_spare_t), KM_SLEEP);
466		spare->spare_guid = vd->vdev_guid;
467		spare->spare_count = 1;
468		avl_insert(&spa_spare_avl, spare, where);
469	}
470	vd->vdev_isspare = B_TRUE;
471
472	mutex_exit(&spa_spare_lock);
473}
474
475void
476spa_spare_remove(vdev_t *vd)
477{
478	spa_spare_t search;
479	spa_spare_t *spare;
480	avl_index_t where;
481
482	mutex_enter(&spa_spare_lock);
483
484	search.spare_guid = vd->vdev_guid;
485	spare = avl_find(&spa_spare_avl, &search, &where);
486
487	ASSERT(vd->vdev_isspare);
488	ASSERT(spare != NULL);
489
490	if (--spare->spare_count == 0) {
491		avl_remove(&spa_spare_avl, spare);
492		kmem_free(spare, sizeof (spa_spare_t));
493	} else if (spare->spare_pool == spa_guid(vd->vdev_spa)) {
494		spare->spare_pool = 0ULL;
495	}
496
497	vd->vdev_isspare = B_FALSE;
498	mutex_exit(&spa_spare_lock);
499}
500
501boolean_t
502spa_spare_exists(uint64_t guid, uint64_t *pool)
503{
504	spa_spare_t search, *found;
505	avl_index_t where;
506
507	mutex_enter(&spa_spare_lock);
508
509	search.spare_guid = guid;
510	found = avl_find(&spa_spare_avl, &search, &where);
511
512	if (pool) {
513		if (found)
514			*pool = found->spare_pool;
515		else
516			*pool = 0ULL;
517	}
518
519	mutex_exit(&spa_spare_lock);
520
521	return (found != NULL);
522}
523
524void
525spa_spare_activate(vdev_t *vd)
526{
527	spa_spare_t search, *found;
528	avl_index_t where;
529
530	mutex_enter(&spa_spare_lock);
531	ASSERT(vd->vdev_isspare);
532
533	search.spare_guid = vd->vdev_guid;
534	found = avl_find(&spa_spare_avl, &search, &where);
535	ASSERT(found != NULL);
536	ASSERT(found->spare_pool == 0ULL);
537
538	found->spare_pool = spa_guid(vd->vdev_spa);
539	mutex_exit(&spa_spare_lock);
540}
541
542/*
543 * ==========================================================================
544 * SPA config locking
545 * ==========================================================================
546 */
547void
548spa_config_enter(spa_t *spa, krw_t rw, void *tag)
549{
550	rprw_enter(&spa->spa_config_lock, rw, tag);
551}
552
553void
554spa_config_exit(spa_t *spa, void *tag)
555{
556	rprw_exit(&spa->spa_config_lock, tag);
557}
558
559boolean_t
560spa_config_held(spa_t *spa, krw_t rw)
561{
562	return (rprw_held(&spa->spa_config_lock, rw));
563}
564
565/*
566 * ==========================================================================
567 * SPA vdev locking
568 * ==========================================================================
569 */
570
571/*
572 * Lock the given spa_t for the purpose of adding or removing a vdev.
573 * Grabs the global spa_namespace_lock plus the spa config lock for writing.
574 * It returns the next transaction group for the spa_t.
575 */
576uint64_t
577spa_vdev_enter(spa_t *spa)
578{
579	mutex_enter(&spa_namespace_lock);
580
581	/*
582	 * Suspend scrub activity while we mess with the config.  We must do
583	 * this after acquiring the namespace lock to avoid a 3-way deadlock
584	 * with spa_scrub_stop() and the scrub thread.
585	 */
586	spa_scrub_suspend(spa);
587
588	spa_config_enter(spa, RW_WRITER, spa);
589
590	return (spa_last_synced_txg(spa) + 1);
591}
592
593/*
594 * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
595 * locking of spa_vdev_enter(), we also want make sure the transactions have
596 * synced to disk, and then update the global configuration cache with the new
597 * information.
598 */
599int
600spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
601{
602	int config_changed = B_FALSE;
603
604	ASSERT(txg > spa_last_synced_txg(spa));
605
606	/*
607	 * Reassess the DTLs.
608	 */
609	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
610
611	/*
612	 * If the config changed, notify the scrub thread that it must restart.
613	 */
614	if (error == 0 && !list_is_empty(&spa->spa_dirty_list)) {
615		config_changed = B_TRUE;
616		spa_scrub_restart(spa, txg);
617	}
618
619	spa_config_exit(spa, spa);
620
621	/*
622	 * Allow scrubbing to resume.
623	 */
624	spa_scrub_resume(spa);
625
626	/*
627	 * Note: this txg_wait_synced() is important because it ensures
628	 * that there won't be more than one config change per txg.
629	 * This allows us to use the txg as the generation number.
630	 */
631	if (error == 0)
632		txg_wait_synced(spa->spa_dsl_pool, txg);
633
634	if (vd != NULL) {
635		ASSERT(!vd->vdev_detached || vd->vdev_dtl.smo_object == 0);
636		vdev_free(vd);
637	}
638
639	/*
640	 * If the config changed, update the config cache.
641	 */
642	if (config_changed)
643		spa_config_sync();
644
645	mutex_exit(&spa_namespace_lock);
646
647	return (error);
648}
649
650/*
651 * ==========================================================================
652 * Miscellaneous functions
653 * ==========================================================================
654 */
655
656/*
657 * Rename a spa_t.
658 */
659int
660spa_rename(const char *name, const char *newname)
661{
662	spa_t *spa;
663	int err;
664
665	/*
666	 * Lookup the spa_t and grab the config lock for writing.  We need to
667	 * actually open the pool so that we can sync out the necessary labels.
668	 * It's OK to call spa_open() with the namespace lock held because we
669	 * allow recursive calls for other reasons.
670	 */
671	mutex_enter(&spa_namespace_lock);
672	if ((err = spa_open(name, &spa, FTAG)) != 0) {
673		mutex_exit(&spa_namespace_lock);
674		return (err);
675	}
676
677	spa_config_enter(spa, RW_WRITER, FTAG);
678
679	avl_remove(&spa_namespace_avl, spa);
680	spa_strfree(spa->spa_name);
681	spa->spa_name = spa_strdup(newname);
682	avl_add(&spa_namespace_avl, spa);
683
684	/*
685	 * Sync all labels to disk with the new names by marking the root vdev
686	 * dirty and waiting for it to sync.  It will pick up the new pool name
687	 * during the sync.
688	 */
689	vdev_config_dirty(spa->spa_root_vdev);
690
691	spa_config_exit(spa, FTAG);
692
693	txg_wait_synced(spa->spa_dsl_pool, 0);
694
695	/*
696	 * Sync the updated config cache.
697	 */
698	spa_config_sync();
699
700	spa_close(spa, FTAG);
701
702	mutex_exit(&spa_namespace_lock);
703
704	return (0);
705}
706
707
708/*
709 * Determine whether a pool with given pool_guid exists.  If device_guid is
710 * non-zero, determine whether the pool exists *and* contains a device with the
711 * specified device_guid.
712 */
713boolean_t
714spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
715{
716	spa_t *spa;
717	avl_tree_t *t = &spa_namespace_avl;
718
719	ASSERT(MUTEX_HELD(&spa_namespace_lock));
720
721	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
722		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
723			continue;
724		if (spa->spa_root_vdev == NULL)
725			continue;
726		if (spa_guid(spa) == pool_guid) {
727			if (device_guid == 0)
728				break;
729
730			if (vdev_lookup_by_guid(spa->spa_root_vdev,
731			    device_guid) != NULL)
732				break;
733
734			/*
735			 * Check any devices we may be in the process of adding.
736			 */
737			if (spa->spa_pending_vdev) {
738				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
739				    device_guid) != NULL)
740					break;
741			}
742		}
743	}
744
745	return (spa != NULL);
746}
747
748char *
749spa_strdup(const char *s)
750{
751	size_t len;
752	char *new;
753
754	len = strlen(s);
755	new = kmem_alloc(len + 1, KM_SLEEP);
756	bcopy(s, new, len);
757	new[len] = '\0';
758
759	return (new);
760}
761
762void
763spa_strfree(char *s)
764{
765	kmem_free(s, strlen(s) + 1);
766}
767
768uint64_t
769spa_get_random(uint64_t range)
770{
771	uint64_t r;
772
773	ASSERT(range != 0);
774
775	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
776
777	return (r % range);
778}
779
780void
781sprintf_blkptr(char *buf, int len, const blkptr_t *bp)
782{
783	int d;
784
785	if (bp == NULL) {
786		(void) snprintf(buf, len, "<NULL>");
787		return;
788	}
789
790	if (BP_IS_HOLE(bp)) {
791		(void) snprintf(buf, len, "<hole>");
792		return;
793	}
794
795	(void) snprintf(buf, len, "[L%llu %s] %llxL/%llxP ",
796	    (u_longlong_t)BP_GET_LEVEL(bp),
797	    dmu_ot[BP_GET_TYPE(bp)].ot_name,
798	    (u_longlong_t)BP_GET_LSIZE(bp),
799	    (u_longlong_t)BP_GET_PSIZE(bp));
800
801	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
802		const dva_t *dva = &bp->blk_dva[d];
803		(void) snprintf(buf + strlen(buf), len - strlen(buf),
804		    "DVA[%d]=<%llu:%llx:%llx> ", d,
805		    (u_longlong_t)DVA_GET_VDEV(dva),
806		    (u_longlong_t)DVA_GET_OFFSET(dva),
807		    (u_longlong_t)DVA_GET_ASIZE(dva));
808	}
809
810	(void) snprintf(buf + strlen(buf), len - strlen(buf),
811	    "%s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx",
812	    zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
813	    zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
814	    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",
815	    BP_IS_GANG(bp) ? "gang" : "contiguous",
816	    (u_longlong_t)bp->blk_birth,
817	    (u_longlong_t)bp->blk_fill,
818	    (u_longlong_t)bp->blk_cksum.zc_word[0],
819	    (u_longlong_t)bp->blk_cksum.zc_word[1],
820	    (u_longlong_t)bp->blk_cksum.zc_word[2],
821	    (u_longlong_t)bp->blk_cksum.zc_word[3]);
822}
823
824void
825spa_freeze(spa_t *spa)
826{
827	uint64_t freeze_txg = 0;
828
829	spa_config_enter(spa, RW_WRITER, FTAG);
830	if (spa->spa_freeze_txg == UINT64_MAX) {
831		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
832		spa->spa_freeze_txg = freeze_txg;
833	}
834	spa_config_exit(spa, FTAG);
835	if (freeze_txg != 0)
836		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
837}
838
839void
840zfs_panic_recover(const char *fmt, ...)
841{
842	va_list adx;
843
844	va_start(adx, fmt);
845	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
846	va_end(adx);
847}
848
849/*
850 * ==========================================================================
851 * Accessor functions
852 * ==========================================================================
853 */
854
855krwlock_t *
856spa_traverse_rwlock(spa_t *spa)
857{
858	return (&spa->spa_traverse_lock);
859}
860
861int
862spa_traverse_wanted(spa_t *spa)
863{
864	return (spa->spa_traverse_wanted);
865}
866
867dsl_pool_t *
868spa_get_dsl(spa_t *spa)
869{
870	return (spa->spa_dsl_pool);
871}
872
873blkptr_t *
874spa_get_rootblkptr(spa_t *spa)
875{
876	return (&spa->spa_ubsync.ub_rootbp);
877}
878
879void
880spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
881{
882	spa->spa_uberblock.ub_rootbp = *bp;
883}
884
885void
886spa_altroot(spa_t *spa, char *buf, size_t buflen)
887{
888	if (spa->spa_root == NULL)
889		buf[0] = '\0';
890	else
891		(void) strncpy(buf, spa->spa_root, buflen);
892}
893
894int
895spa_sync_pass(spa_t *spa)
896{
897	return (spa->spa_sync_pass);
898}
899
900char *
901spa_name(spa_t *spa)
902{
903	/*
904	 * Accessing the name requires holding either the namespace lock or the
905	 * config lock, both of which are required to do a rename.
906	 */
907	ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
908	    spa_config_held(spa, RW_READER) || spa_config_held(spa, RW_WRITER));
909
910	return (spa->spa_name);
911}
912
913uint64_t
914spa_guid(spa_t *spa)
915{
916	/*
917	 * If we fail to parse the config during spa_load(), we can go through
918	 * the error path (which posts an ereport) and end up here with no root
919	 * vdev.  We stash the original pool guid in 'spa_load_guid' to handle
920	 * this case.
921	 */
922	if (spa->spa_root_vdev != NULL)
923		return (spa->spa_root_vdev->vdev_guid);
924	else
925		return (spa->spa_load_guid);
926}
927
928uint64_t
929spa_last_synced_txg(spa_t *spa)
930{
931	return (spa->spa_ubsync.ub_txg);
932}
933
934uint64_t
935spa_first_txg(spa_t *spa)
936{
937	return (spa->spa_first_txg);
938}
939
940int
941spa_state(spa_t *spa)
942{
943	return (spa->spa_state);
944}
945
946uint64_t
947spa_freeze_txg(spa_t *spa)
948{
949	return (spa->spa_freeze_txg);
950}
951
952/*
953 * Return how much space is allocated in the pool (ie. sum of all asize)
954 */
955uint64_t
956spa_get_alloc(spa_t *spa)
957{
958	return (spa->spa_root_vdev->vdev_stat.vs_alloc);
959}
960
961/*
962 * Return how much (raid-z inflated) space there is in the pool.
963 */
964uint64_t
965spa_get_space(spa_t *spa)
966{
967	return (spa->spa_root_vdev->vdev_stat.vs_space);
968}
969
970/*
971 * Return the amount of raid-z-deflated space in the pool.
972 */
973uint64_t
974spa_get_dspace(spa_t *spa)
975{
976	if (spa->spa_deflate)
977		return (spa->spa_root_vdev->vdev_stat.vs_dspace);
978	else
979		return (spa->spa_root_vdev->vdev_stat.vs_space);
980}
981
982/* ARGSUSED */
983uint64_t
984spa_get_asize(spa_t *spa, uint64_t lsize)
985{
986	/*
987	 * For now, the worst case is 512-byte RAID-Z blocks, in which
988	 * case the space requirement is exactly 2x; so just assume that.
989	 * Add to this the fact that we can have up to 3 DVAs per bp, and
990	 * we have to multiply by a total of 6x.
991	 */
992	return (lsize * 6);
993}
994
995/*
996 * Return the failure mode that has been set to this pool. The default
997 * behavior will be to block all I/Os when a complete failure occurs.
998 */
999uint8_t
1000spa_get_failmode(spa_t *spa)
1001{
1002	return (spa->spa_failmode);
1003}
1004
1005uint64_t
1006spa_version(spa_t *spa)
1007{
1008	return (spa->spa_ubsync.ub_version);
1009}
1010
1011int
1012spa_max_replication(spa_t *spa)
1013{
1014	/*
1015	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1016	 * handle BPs with more than one DVA allocated.  Set our max
1017	 * replication level accordingly.
1018	 */
1019	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1020		return (1);
1021	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
1022}
1023
1024uint64_t
1025bp_get_dasize(spa_t *spa, const blkptr_t *bp)
1026{
1027	int sz = 0, i;
1028
1029	if (!spa->spa_deflate)
1030		return (BP_GET_ASIZE(bp));
1031
1032	spa_config_enter(spa, RW_READER, FTAG);
1033	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
1034		vdev_t *vd =
1035		    vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
1036		if (vd)
1037			sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >>
1038			    SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
1039	}
1040	spa_config_exit(spa, FTAG);
1041	return (sz);
1042}
1043
1044/*
1045 * ==========================================================================
1046 * Initialization and Termination
1047 * ==========================================================================
1048 */
1049
1050static int
1051spa_name_compare(const void *a1, const void *a2)
1052{
1053	const spa_t *s1 = a1;
1054	const spa_t *s2 = a2;
1055	int s;
1056
1057	s = strcmp(s1->spa_name, s2->spa_name);
1058	if (s > 0)
1059		return (1);
1060	if (s < 0)
1061		return (-1);
1062	return (0);
1063}
1064
1065int
1066spa_busy(void)
1067{
1068	return (spa_active_count);
1069}
1070
1071void
1072spa_init(int mode)
1073{
1074	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
1075	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
1076	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
1077
1078	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
1079	    offsetof(spa_t, spa_avl));
1080
1081	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
1082	    offsetof(spa_spare_t, spare_avl));
1083
1084	spa_mode = mode;
1085
1086	refcount_init();
1087	unique_init();
1088	zio_init();
1089	dmu_init();
1090	zil_init();
1091	zfs_prop_init();
1092	zpool_prop_init();
1093	spa_config_load();
1094}
1095
1096void
1097spa_fini(void)
1098{
1099	spa_evict_all();
1100
1101	zil_fini();
1102	dmu_fini();
1103	zio_fini();
1104	unique_fini();
1105	refcount_fini();
1106
1107	avl_destroy(&spa_namespace_avl);
1108	avl_destroy(&spa_spare_avl);
1109
1110	cv_destroy(&spa_namespace_cv);
1111	mutex_destroy(&spa_namespace_lock);
1112	mutex_destroy(&spa_spare_lock);
1113}
1114
1115/*
1116 * Return whether this pool has slogs. No locking needed.
1117 * It's not a problem if the wrong answer is returned as it's only for
1118 * performance and not correctness
1119 */
1120boolean_t
1121spa_has_slogs(spa_t *spa)
1122{
1123	return (spa->spa_log_class->mc_rotor != NULL);
1124}
1125