spa_misc.c revision 1934e92fc930c49429ad71a8ca97340f33227e78
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/zfs_context.h>
29#include <sys/spa_impl.h>
30#include <sys/zio.h>
31#include <sys/zio_checksum.h>
32#include <sys/zio_compress.h>
33#include <sys/dmu.h>
34#include <sys/dmu_tx.h>
35#include <sys/zap.h>
36#include <sys/zil.h>
37#include <sys/vdev_impl.h>
38#include <sys/metaslab.h>
39#include <sys/uberblock_impl.h>
40#include <sys/txg.h>
41#include <sys/avl.h>
42#include <sys/unique.h>
43#include <sys/dsl_pool.h>
44#include <sys/dsl_dir.h>
45#include <sys/dsl_prop.h>
46#include <sys/fs/zfs.h>
47#include <sys/metaslab_impl.h>
48#include "zfs_prop.h"
49
50/*
51 * SPA locking
52 *
53 * There are four basic locks for managing spa_t structures:
54 *
55 * spa_namespace_lock (global mutex)
56 *
57 *	This lock must be acquired to do any of the following:
58 *
59 *		- Lookup a spa_t by name
60 *		- Add or remove a spa_t from the namespace
61 *		- Increase spa_refcount from non-zero
62 *		- Check if spa_refcount is zero
63 *		- Rename a spa_t
64 *		- add/remove/attach/detach devices
65 *		- Held for the duration of create/destroy/import/export
66 *
67 *	It does not need to handle recursion.  A create or destroy may
68 *	reference objects (files or zvols) in other pools, but by
69 *	definition they must have an existing reference, and will never need
70 *	to lookup a spa_t by name.
71 *
72 * spa_refcount (per-spa refcount_t protected by mutex)
73 *
74 *	This reference count keep track of any active users of the spa_t.  The
75 *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
76 *	the refcount is never really 'zero' - opening a pool implicitly keeps
77 *	some references in the DMU.  Internally we check against SPA_MINREF, but
78 *	present the image of a zero/non-zero value to consumers.
79 *
80 * spa_config_lock (per-spa read-priority rwlock)
81 *
82 *	This protects the spa_t from config changes, and must be held in
83 *	the following circumstances:
84 *
85 *		- RW_READER to perform I/O to the spa
86 *		- RW_WRITER to change the vdev config
87 *
88 * spa_config_cache_lock (per-spa mutex)
89 *
90 *	This mutex prevents the spa_config nvlist from being updated.  No
91 *      other locks are required to obtain this lock, although implicitly you
92 *      must have the namespace lock or non-zero refcount to have any kind
93 *      of spa_t pointer at all.
94 *
95 * The locking order is fairly straightforward:
96 *
97 *		spa_namespace_lock	->	spa_refcount
98 *
99 *	The namespace lock must be acquired to increase the refcount from 0
100 *	or to check if it is zero.
101 *
102 *		spa_refcount		->	spa_config_lock
103 *
104 *	There must be at least one valid reference on the spa_t to acquire
105 *	the config lock.
106 *
107 *		spa_namespace_lock	->	spa_config_lock
108 *
109 *	The namespace lock must always be taken before the config lock.
110 *
111 *
112 * The spa_namespace_lock and spa_config_cache_lock can be acquired directly and
113 * are globally visible.
114 *
115 * The namespace is manipulated using the following functions, all which require
116 * the spa_namespace_lock to be held.
117 *
118 *	spa_lookup()		Lookup a spa_t by name.
119 *
120 *	spa_add()		Create a new spa_t in the namespace.
121 *
122 *	spa_remove()		Remove a spa_t from the namespace.  This also
123 *				frees up any memory associated with the spa_t.
124 *
125 *	spa_next()		Returns the next spa_t in the system, or the
126 *				first if NULL is passed.
127 *
128 *	spa_evict_all()		Shutdown and remove all spa_t structures in
129 *				the system.
130 *
131 *	spa_guid_exists()	Determine whether a pool/device guid exists.
132 *
133 * The spa_refcount is manipulated using the following functions:
134 *
135 *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
136 *				called with spa_namespace_lock held if the
137 *				refcount is currently zero.
138 *
139 *	spa_close()		Remove a reference from the spa_t.  This will
140 *				not free the spa_t or remove it from the
141 *				namespace.  No locking is required.
142 *
143 *	spa_refcount_zero()	Returns true if the refcount is currently
144 *				zero.  Must be called with spa_namespace_lock
145 *				held.
146 *
147 * The spa_config_lock is manipulated using the following functions:
148 *
149 *	spa_config_enter()	Acquire the config lock as RW_READER or
150 *				RW_WRITER.  At least one reference on the spa_t
151 *				must exist.
152 *
153 *	spa_config_exit()	Release the config lock.
154 *
155 *	spa_config_held()	Returns true if the config lock is currently
156 *				held in the given state.
157 *
158 * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
159 *
160 *	spa_vdev_enter()	Acquire the namespace lock and the config lock
161 *				for writing.
162 *
163 *	spa_vdev_exit()		Release the config lock, wait for all I/O
164 *				to complete, sync the updated configs to the
165 *				cache, and release the namespace lock.
166 *
167 * The spa_name() function also requires either the spa_namespace_lock
168 * or the spa_config_lock, as both are needed to do a rename.  spa_rename() is
169 * also implemented within this file since is requires manipulation of the
170 * namespace.
171 */
172
173static avl_tree_t spa_namespace_avl;
174kmutex_t spa_namespace_lock;
175static kcondvar_t spa_namespace_cv;
176static int spa_active_count;
177int spa_max_replication_override = SPA_DVAS_PER_BP;
178
179static kmutex_t spa_spare_lock;
180static avl_tree_t spa_spare_avl;
181
182kmem_cache_t *spa_buffer_pool;
183int spa_mode;
184
185#ifdef ZFS_DEBUG
186/* Everything except dprintf is on by default in debug builds */
187int zfs_flags = ~ZFS_DEBUG_DPRINTF;
188#else
189int zfs_flags = 0;
190#endif
191
192/*
193 * zfs_recover can be set to nonzero to attempt to recover from
194 * otherwise-fatal errors, typically caused by on-disk corruption.  When
195 * set, calls to zfs_panic_recover() will turn into warning messages.
196 */
197int zfs_recover = 0;
198
199#define	SPA_MINREF	5	/* spa_refcnt for an open-but-idle pool */
200
201/*
202 * ==========================================================================
203 * SPA namespace functions
204 * ==========================================================================
205 */
206
207/*
208 * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
209 * Returns NULL if no matching spa_t is found.
210 */
211spa_t *
212spa_lookup(const char *name)
213{
214	spa_t search, *spa;
215	avl_index_t where;
216	char c;
217	char *cp;
218
219	ASSERT(MUTEX_HELD(&spa_namespace_lock));
220
221	/*
222	 * If it's a full dataset name, figure out the pool name and
223	 * just use that.
224	 */
225	cp = strpbrk(name, "/@");
226	if (cp) {
227		c = *cp;
228		*cp = '\0';
229	}
230
231	search.spa_name = (char *)name;
232	spa = avl_find(&spa_namespace_avl, &search, &where);
233
234	if (cp)
235		*cp = c;
236
237	return (spa);
238}
239
240/*
241 * Create an uninitialized spa_t with the given name.  Requires
242 * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
243 * exist by calling spa_lookup() first.
244 */
245spa_t *
246spa_add(const char *name, const char *altroot)
247{
248	spa_t *spa;
249
250	ASSERT(MUTEX_HELD(&spa_namespace_lock));
251
252	spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
253
254	rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL);
255
256	mutex_init(&spa->spa_uberblock_lock, NULL, MUTEX_DEFAULT, NULL);
257	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
258	mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL);
259	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
260	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
261	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
262	mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
263	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
264	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
265
266	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
267	cv_init(&spa->spa_scrub_cv, NULL, CV_DEFAULT, NULL);
268	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
269
270	spa->spa_name = spa_strdup(name);
271	spa->spa_state = POOL_STATE_UNINITIALIZED;
272	spa->spa_freeze_txg = UINT64_MAX;
273	spa->spa_final_txg = UINT64_MAX;
274
275	refcount_create(&spa->spa_refcount);
276	rprw_init(&spa->spa_config_lock);
277
278	avl_add(&spa_namespace_avl, spa);
279
280	/*
281	 * Set the alternate root, if there is one.
282	 */
283	if (altroot) {
284		spa->spa_root = spa_strdup(altroot);
285		spa_active_count++;
286	}
287
288	return (spa);
289}
290
291/*
292 * Removes a spa_t from the namespace, freeing up any memory used.  Requires
293 * spa_namespace_lock.  This is called only after the spa_t has been closed and
294 * deactivated.
295 */
296void
297spa_remove(spa_t *spa)
298{
299	ASSERT(MUTEX_HELD(&spa_namespace_lock));
300	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
301	ASSERT(spa->spa_scrub_thread == NULL);
302
303	avl_remove(&spa_namespace_avl, spa);
304	cv_broadcast(&spa_namespace_cv);
305
306	if (spa->spa_root) {
307		spa_strfree(spa->spa_root);
308		spa_active_count--;
309	}
310
311	if (spa->spa_name)
312		spa_strfree(spa->spa_name);
313
314	spa_config_set(spa, NULL);
315
316	refcount_destroy(&spa->spa_refcount);
317
318	rprw_destroy(&spa->spa_config_lock);
319
320	rw_destroy(&spa->spa_traverse_lock);
321
322	cv_destroy(&spa->spa_async_cv);
323	cv_destroy(&spa->spa_scrub_cv);
324	cv_destroy(&spa->spa_scrub_io_cv);
325
326	mutex_destroy(&spa->spa_uberblock_lock);
327	mutex_destroy(&spa->spa_async_lock);
328	mutex_destroy(&spa->spa_config_cache_lock);
329	mutex_destroy(&spa->spa_scrub_lock);
330	mutex_destroy(&spa->spa_errlog_lock);
331	mutex_destroy(&spa->spa_errlist_lock);
332	mutex_destroy(&spa->spa_sync_bplist.bpl_lock);
333	mutex_destroy(&spa->spa_history_lock);
334	mutex_destroy(&spa->spa_props_lock);
335
336	kmem_free(spa, sizeof (spa_t));
337}
338
339/*
340 * Given a pool, return the next pool in the namespace, or NULL if there is
341 * none.  If 'prev' is NULL, return the first pool.
342 */
343spa_t *
344spa_next(spa_t *prev)
345{
346	ASSERT(MUTEX_HELD(&spa_namespace_lock));
347
348	if (prev)
349		return (AVL_NEXT(&spa_namespace_avl, prev));
350	else
351		return (avl_first(&spa_namespace_avl));
352}
353
354/*
355 * ==========================================================================
356 * SPA refcount functions
357 * ==========================================================================
358 */
359
360/*
361 * Add a reference to the given spa_t.  Must have at least one reference, or
362 * have the namespace lock held.
363 */
364void
365spa_open_ref(spa_t *spa, void *tag)
366{
367	ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
368	    MUTEX_HELD(&spa_namespace_lock));
369
370	(void) refcount_add(&spa->spa_refcount, tag);
371}
372
373/*
374 * Remove a reference to the given spa_t.  Must have at least one reference, or
375 * have the namespace lock held.
376 */
377void
378spa_close(spa_t *spa, void *tag)
379{
380	ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
381	    MUTEX_HELD(&spa_namespace_lock));
382
383	(void) refcount_remove(&spa->spa_refcount, tag);
384}
385
386/*
387 * Check to see if the spa refcount is zero.  Must be called with
388 * spa_namespace_lock held.  We really compare against SPA_MINREF, which is the
389 * number of references acquired when opening a pool
390 */
391boolean_t
392spa_refcount_zero(spa_t *spa)
393{
394	ASSERT(MUTEX_HELD(&spa_namespace_lock));
395
396	return (refcount_count(&spa->spa_refcount) == SPA_MINREF);
397}
398
399/*
400 * ==========================================================================
401 * SPA spare tracking
402 * ==========================================================================
403 */
404
405/*
406 * Spares are tracked globally due to the following constraints:
407 *
408 * 	- A spare may be part of multiple pools.
409 * 	- A spare may be added to a pool even if it's actively in use within
410 *	  another pool.
411 * 	- A spare in use in any pool can only be the source of a replacement if
412 *	  the target is a spare in the same pool.
413 *
414 * We keep track of all spares on the system through the use of a reference
415 * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
416 * spare, then we bump the reference count in the AVL tree.  In addition, we set
417 * the 'vdev_isspare' member to indicate that the device is a spare (active or
418 * inactive).  When a spare is made active (used to replace a device in the
419 * pool), we also keep track of which pool its been made a part of.
420 *
421 * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
422 * called under the spa_namespace lock as part of vdev reconfiguration.  The
423 * separate spare lock exists for the status query path, which does not need to
424 * be completely consistent with respect to other vdev configuration changes.
425 */
426
427typedef struct spa_spare {
428	uint64_t	spare_guid;
429	uint64_t	spare_pool;
430	avl_node_t	spare_avl;
431	int		spare_count;
432} spa_spare_t;
433
434static int
435spa_spare_compare(const void *a, const void *b)
436{
437	const spa_spare_t *sa = a;
438	const spa_spare_t *sb = b;
439
440	if (sa->spare_guid < sb->spare_guid)
441		return (-1);
442	else if (sa->spare_guid > sb->spare_guid)
443		return (1);
444	else
445		return (0);
446}
447
448void
449spa_spare_add(vdev_t *vd)
450{
451	avl_index_t where;
452	spa_spare_t search;
453	spa_spare_t *spare;
454
455	mutex_enter(&spa_spare_lock);
456	ASSERT(!vd->vdev_isspare);
457
458	search.spare_guid = vd->vdev_guid;
459	if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
460		spare->spare_count++;
461	} else {
462		spare = kmem_zalloc(sizeof (spa_spare_t), KM_SLEEP);
463		spare->spare_guid = vd->vdev_guid;
464		spare->spare_count = 1;
465		avl_insert(&spa_spare_avl, spare, where);
466	}
467	vd->vdev_isspare = B_TRUE;
468
469	mutex_exit(&spa_spare_lock);
470}
471
472void
473spa_spare_remove(vdev_t *vd)
474{
475	spa_spare_t search;
476	spa_spare_t *spare;
477	avl_index_t where;
478
479	mutex_enter(&spa_spare_lock);
480
481	search.spare_guid = vd->vdev_guid;
482	spare = avl_find(&spa_spare_avl, &search, &where);
483
484	ASSERT(vd->vdev_isspare);
485	ASSERT(spare != NULL);
486
487	if (--spare->spare_count == 0) {
488		avl_remove(&spa_spare_avl, spare);
489		kmem_free(spare, sizeof (spa_spare_t));
490	} else if (spare->spare_pool == spa_guid(vd->vdev_spa)) {
491		spare->spare_pool = 0ULL;
492	}
493
494	vd->vdev_isspare = B_FALSE;
495	mutex_exit(&spa_spare_lock);
496}
497
498boolean_t
499spa_spare_exists(uint64_t guid, uint64_t *pool)
500{
501	spa_spare_t search, *found;
502	avl_index_t where;
503
504	mutex_enter(&spa_spare_lock);
505
506	search.spare_guid = guid;
507	found = avl_find(&spa_spare_avl, &search, &where);
508
509	if (pool) {
510		if (found)
511			*pool = found->spare_pool;
512		else
513			*pool = 0ULL;
514	}
515
516	mutex_exit(&spa_spare_lock);
517
518	return (found != NULL);
519}
520
521void
522spa_spare_activate(vdev_t *vd)
523{
524	spa_spare_t search, *found;
525	avl_index_t where;
526
527	mutex_enter(&spa_spare_lock);
528	ASSERT(vd->vdev_isspare);
529
530	search.spare_guid = vd->vdev_guid;
531	found = avl_find(&spa_spare_avl, &search, &where);
532	ASSERT(found != NULL);
533	ASSERT(found->spare_pool == 0ULL);
534
535	found->spare_pool = spa_guid(vd->vdev_spa);
536	mutex_exit(&spa_spare_lock);
537}
538
539/*
540 * ==========================================================================
541 * SPA config locking
542 * ==========================================================================
543 */
544void
545spa_config_enter(spa_t *spa, krw_t rw, void *tag)
546{
547	rprw_enter(&spa->spa_config_lock, rw, tag);
548}
549
550void
551spa_config_exit(spa_t *spa, void *tag)
552{
553	rprw_exit(&spa->spa_config_lock, tag);
554}
555
556boolean_t
557spa_config_held(spa_t *spa, krw_t rw)
558{
559	return (rprw_held(&spa->spa_config_lock, rw));
560}
561
562/*
563 * ==========================================================================
564 * SPA vdev locking
565 * ==========================================================================
566 */
567
568/*
569 * Lock the given spa_t for the purpose of adding or removing a vdev.
570 * Grabs the global spa_namespace_lock plus the spa config lock for writing.
571 * It returns the next transaction group for the spa_t.
572 */
573uint64_t
574spa_vdev_enter(spa_t *spa)
575{
576	mutex_enter(&spa_namespace_lock);
577
578	/*
579	 * Suspend scrub activity while we mess with the config.  We must do
580	 * this after acquiring the namespace lock to avoid a 3-way deadlock
581	 * with spa_scrub_stop() and the scrub thread.
582	 */
583	spa_scrub_suspend(spa);
584
585	spa_config_enter(spa, RW_WRITER, spa);
586
587	return (spa_last_synced_txg(spa) + 1);
588}
589
590/*
591 * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
592 * locking of spa_vdev_enter(), we also want make sure the transactions have
593 * synced to disk, and then update the global configuration cache with the new
594 * information.
595 */
596int
597spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
598{
599	int config_changed = B_FALSE;
600
601	ASSERT(txg > spa_last_synced_txg(spa));
602
603	/*
604	 * Reassess the DTLs.
605	 */
606	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
607
608	/*
609	 * If the config changed, notify the scrub thread that it must restart.
610	 */
611	if (error == 0 && !list_is_empty(&spa->spa_dirty_list)) {
612		config_changed = B_TRUE;
613		spa_scrub_restart(spa, txg);
614	}
615
616	spa_config_exit(spa, spa);
617
618	/*
619	 * Allow scrubbing to resume.
620	 */
621	spa_scrub_resume(spa);
622
623	/*
624	 * Note: this txg_wait_synced() is important because it ensures
625	 * that there won't be more than one config change per txg.
626	 * This allows us to use the txg as the generation number.
627	 */
628	if (error == 0)
629		txg_wait_synced(spa->spa_dsl_pool, txg);
630
631	if (vd != NULL) {
632		ASSERT(!vd->vdev_detached || vd->vdev_dtl.smo_object == 0);
633		vdev_free(vd);
634	}
635
636	/*
637	 * If the config changed, update the config cache.
638	 */
639	if (config_changed)
640		spa_config_sync();
641
642	mutex_exit(&spa_namespace_lock);
643
644	return (error);
645}
646
647/*
648 * ==========================================================================
649 * Miscellaneous functions
650 * ==========================================================================
651 */
652
653/*
654 * Rename a spa_t.
655 */
656int
657spa_rename(const char *name, const char *newname)
658{
659	spa_t *spa;
660	int err;
661
662	/*
663	 * Lookup the spa_t and grab the config lock for writing.  We need to
664	 * actually open the pool so that we can sync out the necessary labels.
665	 * It's OK to call spa_open() with the namespace lock held because we
666	 * allow recursive calls for other reasons.
667	 */
668	mutex_enter(&spa_namespace_lock);
669	if ((err = spa_open(name, &spa, FTAG)) != 0) {
670		mutex_exit(&spa_namespace_lock);
671		return (err);
672	}
673
674	spa_config_enter(spa, RW_WRITER, FTAG);
675
676	avl_remove(&spa_namespace_avl, spa);
677	spa_strfree(spa->spa_name);
678	spa->spa_name = spa_strdup(newname);
679	avl_add(&spa_namespace_avl, spa);
680
681	/*
682	 * Sync all labels to disk with the new names by marking the root vdev
683	 * dirty and waiting for it to sync.  It will pick up the new pool name
684	 * during the sync.
685	 */
686	vdev_config_dirty(spa->spa_root_vdev);
687
688	spa_config_exit(spa, FTAG);
689
690	txg_wait_synced(spa->spa_dsl_pool, 0);
691
692	/*
693	 * Sync the updated config cache.
694	 */
695	spa_config_sync();
696
697	spa_close(spa, FTAG);
698
699	mutex_exit(&spa_namespace_lock);
700
701	return (0);
702}
703
704
705/*
706 * Determine whether a pool with given pool_guid exists.  If device_guid is
707 * non-zero, determine whether the pool exists *and* contains a device with the
708 * specified device_guid.
709 */
710boolean_t
711spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
712{
713	spa_t *spa;
714	avl_tree_t *t = &spa_namespace_avl;
715
716	ASSERT(MUTEX_HELD(&spa_namespace_lock));
717
718	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
719		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
720			continue;
721		if (spa->spa_root_vdev == NULL)
722			continue;
723		if (spa_guid(spa) == pool_guid) {
724			if (device_guid == 0)
725				break;
726
727			if (vdev_lookup_by_guid(spa->spa_root_vdev,
728			    device_guid) != NULL)
729				break;
730
731			/*
732			 * Check any devices we may be in the process of adding.
733			 */
734			if (spa->spa_pending_vdev) {
735				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
736				    device_guid) != NULL)
737					break;
738			}
739		}
740	}
741
742	return (spa != NULL);
743}
744
745char *
746spa_strdup(const char *s)
747{
748	size_t len;
749	char *new;
750
751	len = strlen(s);
752	new = kmem_alloc(len + 1, KM_SLEEP);
753	bcopy(s, new, len);
754	new[len] = '\0';
755
756	return (new);
757}
758
759void
760spa_strfree(char *s)
761{
762	kmem_free(s, strlen(s) + 1);
763}
764
765uint64_t
766spa_get_random(uint64_t range)
767{
768	uint64_t r;
769
770	ASSERT(range != 0);
771
772	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
773
774	return (r % range);
775}
776
777void
778sprintf_blkptr(char *buf, int len, const blkptr_t *bp)
779{
780	int d;
781
782	if (bp == NULL) {
783		(void) snprintf(buf, len, "<NULL>");
784		return;
785	}
786
787	if (BP_IS_HOLE(bp)) {
788		(void) snprintf(buf, len, "<hole>");
789		return;
790	}
791
792	(void) snprintf(buf, len, "[L%llu %s] %llxL/%llxP ",
793	    (u_longlong_t)BP_GET_LEVEL(bp),
794	    dmu_ot[BP_GET_TYPE(bp)].ot_name,
795	    (u_longlong_t)BP_GET_LSIZE(bp),
796	    (u_longlong_t)BP_GET_PSIZE(bp));
797
798	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
799		const dva_t *dva = &bp->blk_dva[d];
800		(void) snprintf(buf + strlen(buf), len - strlen(buf),
801		    "DVA[%d]=<%llu:%llx:%llx> ", d,
802		    (u_longlong_t)DVA_GET_VDEV(dva),
803		    (u_longlong_t)DVA_GET_OFFSET(dva),
804		    (u_longlong_t)DVA_GET_ASIZE(dva));
805	}
806
807	(void) snprintf(buf + strlen(buf), len - strlen(buf),
808	    "%s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx",
809	    zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
810	    zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
811	    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",
812	    BP_IS_GANG(bp) ? "gang" : "contiguous",
813	    (u_longlong_t)bp->blk_birth,
814	    (u_longlong_t)bp->blk_fill,
815	    (u_longlong_t)bp->blk_cksum.zc_word[0],
816	    (u_longlong_t)bp->blk_cksum.zc_word[1],
817	    (u_longlong_t)bp->blk_cksum.zc_word[2],
818	    (u_longlong_t)bp->blk_cksum.zc_word[3]);
819}
820
821void
822spa_freeze(spa_t *spa)
823{
824	uint64_t freeze_txg = 0;
825
826	spa_config_enter(spa, RW_WRITER, FTAG);
827	if (spa->spa_freeze_txg == UINT64_MAX) {
828		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
829		spa->spa_freeze_txg = freeze_txg;
830	}
831	spa_config_exit(spa, FTAG);
832	if (freeze_txg != 0)
833		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
834}
835
836void
837zfs_panic_recover(const char *fmt, ...)
838{
839	va_list adx;
840
841	va_start(adx, fmt);
842	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
843	va_end(adx);
844}
845
846/*
847 * ==========================================================================
848 * Accessor functions
849 * ==========================================================================
850 */
851
852krwlock_t *
853spa_traverse_rwlock(spa_t *spa)
854{
855	return (&spa->spa_traverse_lock);
856}
857
858int
859spa_traverse_wanted(spa_t *spa)
860{
861	return (spa->spa_traverse_wanted);
862}
863
864dsl_pool_t *
865spa_get_dsl(spa_t *spa)
866{
867	return (spa->spa_dsl_pool);
868}
869
870blkptr_t *
871spa_get_rootblkptr(spa_t *spa)
872{
873	return (&spa->spa_ubsync.ub_rootbp);
874}
875
876void
877spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
878{
879	spa->spa_uberblock.ub_rootbp = *bp;
880}
881
882void
883spa_altroot(spa_t *spa, char *buf, size_t buflen)
884{
885	if (spa->spa_root == NULL)
886		buf[0] = '\0';
887	else
888		(void) strncpy(buf, spa->spa_root, buflen);
889}
890
891int
892spa_sync_pass(spa_t *spa)
893{
894	return (spa->spa_sync_pass);
895}
896
897char *
898spa_name(spa_t *spa)
899{
900	/*
901	 * Accessing the name requires holding either the namespace lock or the
902	 * config lock, both of which are required to do a rename.
903	 */
904	ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
905	    spa_config_held(spa, RW_READER) || spa_config_held(spa, RW_WRITER));
906
907	return (spa->spa_name);
908}
909
910uint64_t
911spa_guid(spa_t *spa)
912{
913	/*
914	 * If we fail to parse the config during spa_load(), we can go through
915	 * the error path (which posts an ereport) and end up here with no root
916	 * vdev.  We stash the original pool guid in 'spa_load_guid' to handle
917	 * this case.
918	 */
919	if (spa->spa_root_vdev != NULL)
920		return (spa->spa_root_vdev->vdev_guid);
921	else
922		return (spa->spa_load_guid);
923}
924
925uint64_t
926spa_last_synced_txg(spa_t *spa)
927{
928	return (spa->spa_ubsync.ub_txg);
929}
930
931uint64_t
932spa_first_txg(spa_t *spa)
933{
934	return (spa->spa_first_txg);
935}
936
937int
938spa_state(spa_t *spa)
939{
940	return (spa->spa_state);
941}
942
943uint64_t
944spa_freeze_txg(spa_t *spa)
945{
946	return (spa->spa_freeze_txg);
947}
948
949/*
950 * Return how much space is allocated in the pool (ie. sum of all asize)
951 */
952uint64_t
953spa_get_alloc(spa_t *spa)
954{
955	return (spa->spa_root_vdev->vdev_stat.vs_alloc);
956}
957
958/*
959 * Return how much (raid-z inflated) space there is in the pool.
960 */
961uint64_t
962spa_get_space(spa_t *spa)
963{
964	return (spa->spa_root_vdev->vdev_stat.vs_space);
965}
966
967/*
968 * Return the amount of raid-z-deflated space in the pool.
969 */
970uint64_t
971spa_get_dspace(spa_t *spa)
972{
973	if (spa->spa_deflate)
974		return (spa->spa_root_vdev->vdev_stat.vs_dspace);
975	else
976		return (spa->spa_root_vdev->vdev_stat.vs_space);
977}
978
979/* ARGSUSED */
980uint64_t
981spa_get_asize(spa_t *spa, uint64_t lsize)
982{
983	/*
984	 * For now, the worst case is 512-byte RAID-Z blocks, in which
985	 * case the space requirement is exactly 2x; so just assume that.
986	 * Add to this the fact that we can have up to 3 DVAs per bp, and
987	 * we have to multiply by a total of 6x.
988	 */
989	return (lsize * 6);
990}
991
992uint64_t
993spa_version(spa_t *spa)
994{
995	return (spa->spa_ubsync.ub_version);
996}
997
998int
999spa_max_replication(spa_t *spa)
1000{
1001	/*
1002	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1003	 * handle BPs with more than one DVA allocated.  Set our max
1004	 * replication level accordingly.
1005	 */
1006	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1007		return (1);
1008	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
1009}
1010
1011uint64_t
1012bp_get_dasize(spa_t *spa, const blkptr_t *bp)
1013{
1014	int sz = 0, i;
1015
1016	if (!spa->spa_deflate)
1017		return (BP_GET_ASIZE(bp));
1018
1019	spa_config_enter(spa, RW_READER, FTAG);
1020	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
1021		vdev_t *vd =
1022		    vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
1023		if (vd)
1024			sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >>
1025			    SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
1026	}
1027	spa_config_exit(spa, FTAG);
1028	return (sz);
1029}
1030
1031/*
1032 * ==========================================================================
1033 * Initialization and Termination
1034 * ==========================================================================
1035 */
1036
1037static int
1038spa_name_compare(const void *a1, const void *a2)
1039{
1040	const spa_t *s1 = a1;
1041	const spa_t *s2 = a2;
1042	int s;
1043
1044	s = strcmp(s1->spa_name, s2->spa_name);
1045	if (s > 0)
1046		return (1);
1047	if (s < 0)
1048		return (-1);
1049	return (0);
1050}
1051
1052int
1053spa_busy(void)
1054{
1055	return (spa_active_count);
1056}
1057
1058void
1059spa_init(int mode)
1060{
1061	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
1062	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
1063	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
1064
1065	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
1066	    offsetof(spa_t, spa_avl));
1067
1068	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
1069	    offsetof(spa_spare_t, spare_avl));
1070
1071	spa_mode = mode;
1072
1073	refcount_init();
1074	unique_init();
1075	zio_init();
1076	dmu_init();
1077	zil_init();
1078	zfs_prop_init();
1079	spa_config_load();
1080}
1081
1082void
1083spa_fini(void)
1084{
1085	spa_evict_all();
1086
1087	zil_fini();
1088	dmu_fini();
1089	zio_fini();
1090	unique_fini();
1091	refcount_fini();
1092
1093	avl_destroy(&spa_namespace_avl);
1094	avl_destroy(&spa_spare_avl);
1095
1096	cv_destroy(&spa_namespace_cv);
1097	mutex_destroy(&spa_namespace_lock);
1098	mutex_destroy(&spa_spare_lock);
1099}
1100
1101/*
1102 * Return whether this pool has slogs. No locking needed.
1103 * It's not a problem if the wrong answer is returned as it's only for
1104 * performance and not correctness
1105 */
1106boolean_t
1107spa_has_slogs(spa_t *spa)
1108{
1109	return (spa->spa_log_class->mc_rotor != NULL);
1110}
1111