xref: /illumos-gate/usr/src/uts/common/fs/zfs/space_map.c (revision 5cabbc6b49070407fb9610cfe73d4c0e0dea3e77)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22d6e555bdSGeorge Wilson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fb09f5aaSMadhav Suresh /*
263991b535SGeorge Wilson  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
27fb09f5aaSMadhav Suresh  */
28fa9e4066Sahrens 
29fa9e4066Sahrens #include <sys/zfs_context.h>
30fa9e4066Sahrens #include <sys/spa.h>
31fa9e4066Sahrens #include <sys/dmu.h>
320713e232SGeorge Wilson #include <sys/dmu_tx.h>
330713e232SGeorge Wilson #include <sys/dnode.h>
340713e232SGeorge Wilson #include <sys/dsl_pool.h>
35ecc2d604Sbonwick #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/space_map.h>
370713e232SGeorge Wilson #include <sys/refcount.h>
380713e232SGeorge Wilson #include <sys/zfeature.h>
39fa9e4066Sahrens 
40ecc2d604Sbonwick /*
41b1be2892SMatthew Ahrens  * The data for a given space map can be kept on blocks of any size.
42b1be2892SMatthew Ahrens  * Larger blocks entail fewer i/o operations, but they also cause the
43b1be2892SMatthew Ahrens  * DMU to keep more data in-core, and also to waste more i/o bandwidth
44b1be2892SMatthew Ahrens  * when only a few blocks have changed since the last transaction group.
45ecc2d604Sbonwick  */
46b1be2892SMatthew Ahrens int space_map_blksz = (1 << 12);
47ecc2d604Sbonwick 
48ecc2d604Sbonwick /*
49*5cabbc6bSPrashanth Sreenivasa  * Iterate through the space map, invoking the callback on each (non-debug)
50*5cabbc6bSPrashanth Sreenivasa  * space map entry.
51ecc2d604Sbonwick  */
52fa9e4066Sahrens int
53*5cabbc6bSPrashanth Sreenivasa space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
54fa9e4066Sahrens {
55fa9e4066Sahrens 	uint64_t *entry, *entry_map, *entry_map_end;
56*5cabbc6bSPrashanth Sreenivasa 	uint64_t bufsize, size, offset, end;
570a4e9518Sgw 	int error = 0;
58fa9e4066Sahrens 
590713e232SGeorge Wilson 	end = space_map_length(sm);
60fa9e4066Sahrens 
610713e232SGeorge Wilson 	bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
62ecc2d604Sbonwick 	entry_map = zio_buf_alloc(bufsize);
63ecc2d604Sbonwick 
640713e232SGeorge Wilson 	if (end > bufsize) {
65a2cdcdd2SPaul Dagnelie 		dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
66a2cdcdd2SPaul Dagnelie 		    end - bufsize, ZIO_PRIORITY_SYNC_READ);
670713e232SGeorge Wilson 	}
68ecc2d604Sbonwick 
69*5cabbc6bSPrashanth Sreenivasa 	for (offset = 0; offset < end && error == 0; offset += bufsize) {
70fa9e4066Sahrens 		size = MIN(end - offset, bufsize);
71fa9e4066Sahrens 		VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
72fa9e4066Sahrens 		VERIFY(size != 0);
730713e232SGeorge Wilson 		ASSERT3U(sm->sm_blksz, !=, 0);
74fa9e4066Sahrens 
75fa9e4066Sahrens 		dprintf("object=%llu  offset=%llx  size=%llx\n",
760713e232SGeorge Wilson 		    space_map_object(sm), offset, size);
77ecc2d604Sbonwick 
780713e232SGeorge Wilson 		error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
790713e232SGeorge Wilson 		    entry_map, DMU_READ_PREFETCH);
800a4e9518Sgw 		if (error != 0)
81b8493d5dSvl 			break;
82fa9e4066Sahrens 
83fa9e4066Sahrens 		entry_map_end = entry_map + (size / sizeof (uint64_t));
84*5cabbc6bSPrashanth Sreenivasa 		for (entry = entry_map; entry < entry_map_end && error == 0;
85*5cabbc6bSPrashanth Sreenivasa 		    entry++) {
86fa9e4066Sahrens 			uint64_t e = *entry;
870713e232SGeorge Wilson 			uint64_t offset, size;
88fa9e4066Sahrens 
89*5cabbc6bSPrashanth Sreenivasa 			if (SM_DEBUG_DECODE(e))	/* Skip debug entries */
90fa9e4066Sahrens 				continue;
91fa9e4066Sahrens 
920713e232SGeorge Wilson 			offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
930713e232SGeorge Wilson 			    sm->sm_start;
940713e232SGeorge Wilson 			size = SM_RUN_DECODE(e) << sm->sm_shift;
950713e232SGeorge Wilson 
960713e232SGeorge Wilson 			VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
970713e232SGeorge Wilson 			VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift));
980713e232SGeorge Wilson 			VERIFY3U(offset, >=, sm->sm_start);
990713e232SGeorge Wilson 			VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
100*5cabbc6bSPrashanth Sreenivasa 			error = callback(SM_TYPE_DECODE(e), offset, size, arg);
101fa9e4066Sahrens 		}
102fa9e4066Sahrens 	}
103fa9e4066Sahrens 
104*5cabbc6bSPrashanth Sreenivasa 	zio_buf_free(entry_map, bufsize);
105*5cabbc6bSPrashanth Sreenivasa 	return (error);
106*5cabbc6bSPrashanth Sreenivasa }
107*5cabbc6bSPrashanth Sreenivasa 
108*5cabbc6bSPrashanth Sreenivasa typedef struct space_map_load_arg {
109*5cabbc6bSPrashanth Sreenivasa 	space_map_t	*smla_sm;
110*5cabbc6bSPrashanth Sreenivasa 	range_tree_t	*smla_rt;
111*5cabbc6bSPrashanth Sreenivasa 	maptype_t	smla_type;
112*5cabbc6bSPrashanth Sreenivasa } space_map_load_arg_t;
113*5cabbc6bSPrashanth Sreenivasa 
114*5cabbc6bSPrashanth Sreenivasa static int
115*5cabbc6bSPrashanth Sreenivasa space_map_load_callback(maptype_t type, uint64_t offset, uint64_t size,
116*5cabbc6bSPrashanth Sreenivasa     void *arg)
117*5cabbc6bSPrashanth Sreenivasa {
118*5cabbc6bSPrashanth Sreenivasa 	space_map_load_arg_t *smla = arg;
119*5cabbc6bSPrashanth Sreenivasa 	if (type == smla->smla_type) {
120*5cabbc6bSPrashanth Sreenivasa 		VERIFY3U(range_tree_space(smla->smla_rt) + size, <=,
121*5cabbc6bSPrashanth Sreenivasa 		    smla->smla_sm->sm_size);
122*5cabbc6bSPrashanth Sreenivasa 		range_tree_add(smla->smla_rt, offset, size);
123*5cabbc6bSPrashanth Sreenivasa 	} else {
124*5cabbc6bSPrashanth Sreenivasa 		range_tree_remove(smla->smla_rt, offset, size);
125*5cabbc6bSPrashanth Sreenivasa 	}
126*5cabbc6bSPrashanth Sreenivasa 
127*5cabbc6bSPrashanth Sreenivasa 	return (0);
128*5cabbc6bSPrashanth Sreenivasa }
129*5cabbc6bSPrashanth Sreenivasa 
130*5cabbc6bSPrashanth Sreenivasa /*
131*5cabbc6bSPrashanth Sreenivasa  * Load the space map disk into the specified range tree. Segments of maptype
132*5cabbc6bSPrashanth Sreenivasa  * are added to the range tree, other segment types are removed.
133*5cabbc6bSPrashanth Sreenivasa  */
134*5cabbc6bSPrashanth Sreenivasa int
135*5cabbc6bSPrashanth Sreenivasa space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
136*5cabbc6bSPrashanth Sreenivasa {
137*5cabbc6bSPrashanth Sreenivasa 	uint64_t space;
138*5cabbc6bSPrashanth Sreenivasa 	int err;
139*5cabbc6bSPrashanth Sreenivasa 	space_map_load_arg_t smla;
140*5cabbc6bSPrashanth Sreenivasa 
141*5cabbc6bSPrashanth Sreenivasa 	VERIFY0(range_tree_space(rt));
142*5cabbc6bSPrashanth Sreenivasa 	space = space_map_allocated(sm);
143*5cabbc6bSPrashanth Sreenivasa 
144*5cabbc6bSPrashanth Sreenivasa 	if (maptype == SM_FREE) {
145*5cabbc6bSPrashanth Sreenivasa 		range_tree_add(rt, sm->sm_start, sm->sm_size);
146*5cabbc6bSPrashanth Sreenivasa 		space = sm->sm_size - space;
147*5cabbc6bSPrashanth Sreenivasa 	}
148*5cabbc6bSPrashanth Sreenivasa 
149*5cabbc6bSPrashanth Sreenivasa 	smla.smla_rt = rt;
150*5cabbc6bSPrashanth Sreenivasa 	smla.smla_sm = sm;
151*5cabbc6bSPrashanth Sreenivasa 	smla.smla_type = maptype;
152*5cabbc6bSPrashanth Sreenivasa 	err = space_map_iterate(sm, space_map_load_callback, &smla);
153*5cabbc6bSPrashanth Sreenivasa 
154*5cabbc6bSPrashanth Sreenivasa 	if (err == 0) {
1550713e232SGeorge Wilson 		VERIFY3U(range_tree_space(rt), ==, space);
156*5cabbc6bSPrashanth Sreenivasa 	} else {
1570713e232SGeorge Wilson 		range_tree_vacate(rt, NULL, NULL);
158*5cabbc6bSPrashanth Sreenivasa 	}
159b8493d5dSvl 
160*5cabbc6bSPrashanth Sreenivasa 	return (err);
1610713e232SGeorge Wilson }
162ecc2d604Sbonwick 
1630713e232SGeorge Wilson void
1640713e232SGeorge Wilson space_map_histogram_clear(space_map_t *sm)
1650713e232SGeorge Wilson {
1660713e232SGeorge Wilson 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1670713e232SGeorge Wilson 		return;
168ecc2d604Sbonwick 
1690713e232SGeorge Wilson 	bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
1700713e232SGeorge Wilson }
171ecc2d604Sbonwick 
1720713e232SGeorge Wilson boolean_t
1730713e232SGeorge Wilson space_map_histogram_verify(space_map_t *sm, range_tree_t *rt)
1740713e232SGeorge Wilson {
1750713e232SGeorge Wilson 	/*
1760713e232SGeorge Wilson 	 * Verify that the in-core range tree does not have any
1770713e232SGeorge Wilson 	 * ranges smaller than our sm_shift size.
1780713e232SGeorge Wilson 	 */
1790713e232SGeorge Wilson 	for (int i = 0; i < sm->sm_shift; i++) {
1800713e232SGeorge Wilson 		if (rt->rt_histogram[i] != 0)
1810713e232SGeorge Wilson 			return (B_FALSE);
1820713e232SGeorge Wilson 	}
1830713e232SGeorge Wilson 	return (B_TRUE);
184fa9e4066Sahrens }
185fa9e4066Sahrens 
186fa9e4066Sahrens void
1870713e232SGeorge Wilson space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
188ecc2d604Sbonwick {
1890713e232SGeorge Wilson 	int idx = 0;
1900713e232SGeorge Wilson 
1910713e232SGeorge Wilson 	ASSERT(dmu_tx_is_syncing(tx));
1920713e232SGeorge Wilson 	VERIFY3U(space_map_object(sm), !=, 0);
193ecc2d604Sbonwick 
1940713e232SGeorge Wilson 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
1950713e232SGeorge Wilson 		return;
196ecc2d604Sbonwick 
1970713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
198ecc2d604Sbonwick 
1990713e232SGeorge Wilson 	ASSERT(space_map_histogram_verify(sm, rt));
2000713e232SGeorge Wilson 	/*
2010713e232SGeorge Wilson 	 * Transfer the content of the range tree histogram to the space
2020713e232SGeorge Wilson 	 * map histogram. The space map histogram contains 32 buckets ranging
2030713e232SGeorge Wilson 	 * between 2^sm_shift to 2^(32+sm_shift-1). The range tree,
2040713e232SGeorge Wilson 	 * however, can represent ranges from 2^0 to 2^63. Since the space
2050713e232SGeorge Wilson 	 * map only cares about allocatable blocks (minimum of sm_shift) we
2060713e232SGeorge Wilson 	 * can safely ignore all ranges in the range tree smaller than sm_shift.
2070713e232SGeorge Wilson 	 */
2080713e232SGeorge Wilson 	for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
2090713e232SGeorge Wilson 
2100713e232SGeorge Wilson 		/*
2110713e232SGeorge Wilson 		 * Since the largest histogram bucket in the space map is
2120713e232SGeorge Wilson 		 * 2^(32+sm_shift-1), we need to normalize the values in
2130713e232SGeorge Wilson 		 * the range tree for any bucket larger than that size. For
2140713e232SGeorge Wilson 		 * example given an sm_shift of 9, ranges larger than 2^40
2150713e232SGeorge Wilson 		 * would get normalized as if they were 1TB ranges. Assume
2160713e232SGeorge Wilson 		 * the range tree had a count of 5 in the 2^44 (16TB) bucket,
2170713e232SGeorge Wilson 		 * the calculation below would normalize this to 5 * 2^4 (16).
2180713e232SGeorge Wilson 		 */
2190713e232SGeorge Wilson 		ASSERT3U(i, >=, idx + sm->sm_shift);
2200713e232SGeorge Wilson 		sm->sm_phys->smp_histogram[idx] +=
2210713e232SGeorge Wilson 		    rt->rt_histogram[i] << (i - idx - sm->sm_shift);
2220713e232SGeorge Wilson 
2230713e232SGeorge Wilson 		/*
2240713e232SGeorge Wilson 		 * Increment the space map's index as long as we haven't
2250713e232SGeorge Wilson 		 * reached the maximum bucket size. Accumulate all ranges
2260713e232SGeorge Wilson 		 * larger than the max bucket size into the last bucket.
2270713e232SGeorge Wilson 		 */
2282e4c9986SGeorge Wilson 		if (idx < SPACE_MAP_HISTOGRAM_SIZE - 1) {
2290713e232SGeorge Wilson 			ASSERT3U(idx + sm->sm_shift, ==, i);
2300713e232SGeorge Wilson 			idx++;
2312e4c9986SGeorge Wilson 			ASSERT3U(idx, <, SPACE_MAP_HISTOGRAM_SIZE);
2320713e232SGeorge Wilson 		}
2330713e232SGeorge Wilson 	}
234d6e555bdSGeorge Wilson }
235d6e555bdSGeorge Wilson 
236ecc2d604Sbonwick uint64_t
2370713e232SGeorge Wilson space_map_entries(space_map_t *sm, range_tree_t *rt)
238ecc2d604Sbonwick {
2390713e232SGeorge Wilson 	avl_tree_t *t = &rt->rt_root;
2400713e232SGeorge Wilson 	range_seg_t *rs;
2410713e232SGeorge Wilson 	uint64_t size, entries;
242ecc2d604Sbonwick 
2430713e232SGeorge Wilson 	/*
2440713e232SGeorge Wilson 	 * All space_maps always have a debug entry so account for it here.
2450713e232SGeorge Wilson 	 */
2460713e232SGeorge Wilson 	entries = 1;
247ecc2d604Sbonwick 
2480713e232SGeorge Wilson 	/*
2490713e232SGeorge Wilson 	 * Traverse the range tree and calculate the number of space map
2500713e232SGeorge Wilson 	 * entries that would be required to write out the range tree.
2510713e232SGeorge Wilson 	 */
2520713e232SGeorge Wilson 	for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
2530713e232SGeorge Wilson 		size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
2540713e232SGeorge Wilson 		entries += howmany(size, SM_RUN_MAX);
2550713e232SGeorge Wilson 	}
2560713e232SGeorge Wilson 	return (entries);
257ecc2d604Sbonwick }
258ecc2d604Sbonwick 
259ecc2d604Sbonwick void
2600713e232SGeorge Wilson space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
2610713e232SGeorge Wilson     dmu_tx_t *tx)
262fa9e4066Sahrens {
2630713e232SGeorge Wilson 	objset_t *os = sm->sm_os;
264fa9e4066Sahrens 	spa_t *spa = dmu_objset_spa(os);
2650713e232SGeorge Wilson 	avl_tree_t *t = &rt->rt_root;
2660713e232SGeorge Wilson 	range_seg_t *rs;
2670713e232SGeorge Wilson 	uint64_t size, total, rt_space, nodes;
268fa9e4066Sahrens 	uint64_t *entry, *entry_map, *entry_map_end;
269b1be2892SMatthew Ahrens 	uint64_t expected_entries, actual_entries = 1;
270fa9e4066Sahrens 
2710713e232SGeorge Wilson 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
2720713e232SGeorge Wilson 	VERIFY3U(space_map_object(sm), !=, 0);
2730713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
274fa9e4066Sahrens 
2750713e232SGeorge Wilson 	/*
2760713e232SGeorge Wilson 	 * This field is no longer necessary since the in-core space map
2770713e232SGeorge Wilson 	 * now contains the object number but is maintained for backwards
2780713e232SGeorge Wilson 	 * compatibility.
2790713e232SGeorge Wilson 	 */
2800713e232SGeorge Wilson 	sm->sm_phys->smp_object = sm->sm_object;
281fa9e4066Sahrens 
2820713e232SGeorge Wilson 	if (range_tree_space(rt) == 0) {
2830713e232SGeorge Wilson 		VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object);
2840713e232SGeorge Wilson 		return;
2850713e232SGeorge Wilson 	}
286fa9e4066Sahrens 
287ecc2d604Sbonwick 	if (maptype == SM_ALLOC)
2880713e232SGeorge Wilson 		sm->sm_phys->smp_alloc += range_tree_space(rt);
289ecc2d604Sbonwick 	else
2900713e232SGeorge Wilson 		sm->sm_phys->smp_alloc -= range_tree_space(rt);
291ecc2d604Sbonwick 
2920713e232SGeorge Wilson 	expected_entries = space_map_entries(sm, rt);
2930713e232SGeorge Wilson 
2940713e232SGeorge Wilson 	entry_map = zio_buf_alloc(sm->sm_blksz);
2950713e232SGeorge Wilson 	entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
296fa9e4066Sahrens 	entry = entry_map;
297fa9e4066Sahrens 
298fa9e4066Sahrens 	*entry++ = SM_DEBUG_ENCODE(1) |
299fa9e4066Sahrens 	    SM_DEBUG_ACTION_ENCODE(maptype) |
300fa9e4066Sahrens 	    SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
301fa9e4066Sahrens 	    SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
302fa9e4066Sahrens 
30316a4a807SGeorge Wilson 	total = 0;
3040713e232SGeorge Wilson 	nodes = avl_numnodes(&rt->rt_root);
3050713e232SGeorge Wilson 	rt_space = range_tree_space(rt);
3060713e232SGeorge Wilson 	for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
3070713e232SGeorge Wilson 		uint64_t start;
3080713e232SGeorge Wilson 
3090713e232SGeorge Wilson 		size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
3100713e232SGeorge Wilson 		start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
3110713e232SGeorge Wilson 
3120713e232SGeorge Wilson 		total += size << sm->sm_shift;
313fa9e4066Sahrens 
3140713e232SGeorge Wilson 		while (size != 0) {
3150713e232SGeorge Wilson 			uint64_t run_len;
316fa9e4066Sahrens 
317fa9e4066Sahrens 			run_len = MIN(size, SM_RUN_MAX);
318fa9e4066Sahrens 
319fa9e4066Sahrens 			if (entry == entry_map_end) {
3200713e232SGeorge Wilson 				dmu_write(os, space_map_object(sm),
3210713e232SGeorge Wilson 				    sm->sm_phys->smp_objsize, sm->sm_blksz,
3220713e232SGeorge Wilson 				    entry_map, tx);
3230713e232SGeorge Wilson 				sm->sm_phys->smp_objsize += sm->sm_blksz;
324fa9e4066Sahrens 				entry = entry_map;
325fa9e4066Sahrens 			}
326fa9e4066Sahrens 
327fa9e4066Sahrens 			*entry++ = SM_OFFSET_ENCODE(start) |
328fa9e4066Sahrens 			    SM_TYPE_ENCODE(maptype) |
329fa9e4066Sahrens 			    SM_RUN_ENCODE(run_len);
330fa9e4066Sahrens 
331fa9e4066Sahrens 			start += run_len;
332fa9e4066Sahrens 			size -= run_len;
3330713e232SGeorge Wilson 			actual_entries++;
334fa9e4066Sahrens 		}
335fa9e4066Sahrens 	}
336fa9e4066Sahrens 
337fa9e4066Sahrens 	if (entry != entry_map) {
338fa9e4066Sahrens 		size = (entry - entry_map) * sizeof (uint64_t);
3390713e232SGeorge Wilson 		dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
340fa9e4066Sahrens 		    size, entry_map, tx);
3410713e232SGeorge Wilson 		sm->sm_phys->smp_objsize += size;
342fa9e4066Sahrens 	}
3430713e232SGeorge Wilson 	ASSERT3U(expected_entries, ==, actual_entries);
344fa9e4066Sahrens 
34501f55e48SGeorge Wilson 	/*
34601f55e48SGeorge Wilson 	 * Ensure that the space_map's accounting wasn't changed
34701f55e48SGeorge Wilson 	 * while we were in the middle of writing it out.
34801f55e48SGeorge Wilson 	 */
3490713e232SGeorge Wilson 	VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
3500713e232SGeorge Wilson 	VERIFY3U(range_tree_space(rt), ==, rt_space);
3510713e232SGeorge Wilson 	VERIFY3U(range_tree_space(rt), ==, total);
35201f55e48SGeorge Wilson 
3530713e232SGeorge Wilson 	zio_buf_free(entry_map, sm->sm_blksz);
354fa9e4066Sahrens }
355fa9e4066Sahrens 
3560713e232SGeorge Wilson static int
3570713e232SGeorge Wilson space_map_open_impl(space_map_t *sm)
358fa9e4066Sahrens {
3590713e232SGeorge Wilson 	int error;
3600713e232SGeorge Wilson 	u_longlong_t blocks;
361fa9e4066Sahrens 
3620713e232SGeorge Wilson 	error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf);
3630713e232SGeorge Wilson 	if (error)
3640713e232SGeorge Wilson 		return (error);
3650713e232SGeorge Wilson 
3660713e232SGeorge Wilson 	dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
3670713e232SGeorge Wilson 	sm->sm_phys = sm->sm_dbuf->db_data;
3680713e232SGeorge Wilson 	return (0);
369fa9e4066Sahrens }
3708ad4d6ddSJeff Bonwick 
3710713e232SGeorge Wilson int
3720713e232SGeorge Wilson space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
373*5cabbc6bSPrashanth Sreenivasa     uint64_t start, uint64_t size, uint8_t shift)
3748ad4d6ddSJeff Bonwick {
3750713e232SGeorge Wilson 	space_map_t *sm;
3760713e232SGeorge Wilson 	int error;
3778ad4d6ddSJeff Bonwick 
3780713e232SGeorge Wilson 	ASSERT(*smp == NULL);
3790713e232SGeorge Wilson 	ASSERT(os != NULL);
3800713e232SGeorge Wilson 	ASSERT(object != 0);
3818ad4d6ddSJeff Bonwick 
3820713e232SGeorge Wilson 	sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
3838ad4d6ddSJeff Bonwick 
3840713e232SGeorge Wilson 	sm->sm_start = start;
3850713e232SGeorge Wilson 	sm->sm_size = size;
3860713e232SGeorge Wilson 	sm->sm_shift = shift;
3870713e232SGeorge Wilson 	sm->sm_os = os;
3880713e232SGeorge Wilson 	sm->sm_object = object;
3898ad4d6ddSJeff Bonwick 
3900713e232SGeorge Wilson 	error = space_map_open_impl(sm);
3910713e232SGeorge Wilson 	if (error != 0) {
3920713e232SGeorge Wilson 		space_map_close(sm);
3930713e232SGeorge Wilson 		return (error);
3940713e232SGeorge Wilson 	}
3950713e232SGeorge Wilson 
3960713e232SGeorge Wilson 	*smp = sm;
3970713e232SGeorge Wilson 
3980713e232SGeorge Wilson 	return (0);
3998ad4d6ddSJeff Bonwick }
4008ad4d6ddSJeff Bonwick 
4018ad4d6ddSJeff Bonwick void
4020713e232SGeorge Wilson space_map_close(space_map_t *sm)
4038ad4d6ddSJeff Bonwick {
4040713e232SGeorge Wilson 	if (sm == NULL)
4050713e232SGeorge Wilson 		return;
4068ad4d6ddSJeff Bonwick 
4070713e232SGeorge Wilson 	if (sm->sm_dbuf != NULL)
4080713e232SGeorge Wilson 		dmu_buf_rele(sm->sm_dbuf, sm);
4090713e232SGeorge Wilson 	sm->sm_dbuf = NULL;
4100713e232SGeorge Wilson 	sm->sm_phys = NULL;
4118ad4d6ddSJeff Bonwick 
4120713e232SGeorge Wilson 	kmem_free(sm, sizeof (*sm));
4138ad4d6ddSJeff Bonwick }
4148ad4d6ddSJeff Bonwick 
4158ad4d6ddSJeff Bonwick void
4160713e232SGeorge Wilson space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
4178ad4d6ddSJeff Bonwick {
4180713e232SGeorge Wilson 	objset_t *os = sm->sm_os;
4190713e232SGeorge Wilson 	spa_t *spa = dmu_objset_spa(os);
4200713e232SGeorge Wilson 	dmu_object_info_t doi;
4210713e232SGeorge Wilson 
4220713e232SGeorge Wilson 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
4230713e232SGeorge Wilson 	ASSERT(dmu_tx_is_syncing(tx));
4243991b535SGeorge Wilson 	VERIFY3U(dmu_tx_get_txg(tx), <=, spa_final_dirty_txg(spa));
4250713e232SGeorge Wilson 
4260713e232SGeorge Wilson 	dmu_object_info_from_db(sm->sm_dbuf, &doi);
4270713e232SGeorge Wilson 
428b1be2892SMatthew Ahrens 	/*
429b1be2892SMatthew Ahrens 	 * If the space map has the wrong bonus size (because
430b1be2892SMatthew Ahrens 	 * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
431b1be2892SMatthew Ahrens 	 * the wrong block size (because space_map_blksz has changed),
432b1be2892SMatthew Ahrens 	 * free and re-allocate its object with the updated sizes.
433b1be2892SMatthew Ahrens 	 *
434b1be2892SMatthew Ahrens 	 * Otherwise, just truncate the current object.
435b1be2892SMatthew Ahrens 	 */
436b1be2892SMatthew Ahrens 	if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
437b1be2892SMatthew Ahrens 	    doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
438b1be2892SMatthew Ahrens 	    doi.doi_data_block_size != space_map_blksz) {
4393991b535SGeorge Wilson 		zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating "
4403991b535SGeorge Wilson 		    "object[%llu]: old bonus %u, old blocksz %u",
4413991b535SGeorge Wilson 		    dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
4423991b535SGeorge Wilson 		    doi.doi_bonus_size, doi.doi_data_block_size);
443b1be2892SMatthew Ahrens 
444b1be2892SMatthew Ahrens 		space_map_free(sm, tx);
445b1be2892SMatthew Ahrens 		dmu_buf_rele(sm->sm_dbuf, sm);
446b1be2892SMatthew Ahrens 
447b1be2892SMatthew Ahrens 		sm->sm_object = space_map_alloc(sm->sm_os, tx);
448b1be2892SMatthew Ahrens 		VERIFY0(space_map_open_impl(sm));
449b1be2892SMatthew Ahrens 	} else {
450b1be2892SMatthew Ahrens 		VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
451b1be2892SMatthew Ahrens 
452b1be2892SMatthew Ahrens 		/*
453b1be2892SMatthew Ahrens 		 * If the spacemap is reallocated, its histogram
454b1be2892SMatthew Ahrens 		 * will be reset.  Do the same in the common case so that
455b1be2892SMatthew Ahrens 		 * bugs related to the uncommon case do not go unnoticed.
456b1be2892SMatthew Ahrens 		 */
457b1be2892SMatthew Ahrens 		bzero(sm->sm_phys->smp_histogram,
458b1be2892SMatthew Ahrens 		    sizeof (sm->sm_phys->smp_histogram));
4590713e232SGeorge Wilson 	}
4600713e232SGeorge Wilson 
4610713e232SGeorge Wilson 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
4620713e232SGeorge Wilson 	sm->sm_phys->smp_objsize = 0;
4630713e232SGeorge Wilson 	sm->sm_phys->smp_alloc = 0;
4648ad4d6ddSJeff Bonwick }
4658ad4d6ddSJeff Bonwick 
4668ad4d6ddSJeff Bonwick /*
4670713e232SGeorge Wilson  * Update the in-core space_map allocation and length values.
4688ad4d6ddSJeff Bonwick  */
4698ad4d6ddSJeff Bonwick void
4700713e232SGeorge Wilson space_map_update(space_map_t *sm)
4718ad4d6ddSJeff Bonwick {
4720713e232SGeorge Wilson 	if (sm == NULL)
4730713e232SGeorge Wilson 		return;
4748ad4d6ddSJeff Bonwick 
4750713e232SGeorge Wilson 	sm->sm_alloc = sm->sm_phys->smp_alloc;
4760713e232SGeorge Wilson 	sm->sm_length = sm->sm_phys->smp_objsize;
4770713e232SGeorge Wilson }
4780713e232SGeorge Wilson 
4790713e232SGeorge Wilson uint64_t
4800713e232SGeorge Wilson space_map_alloc(objset_t *os, dmu_tx_t *tx)
4810713e232SGeorge Wilson {
4820713e232SGeorge Wilson 	spa_t *spa = dmu_objset_spa(os);
4830713e232SGeorge Wilson 	uint64_t object;
4840713e232SGeorge Wilson 	int bonuslen;
4850713e232SGeorge Wilson 
4862acef22dSMatthew Ahrens 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
4872acef22dSMatthew Ahrens 		spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
4880713e232SGeorge Wilson 		bonuslen = sizeof (space_map_phys_t);
4890713e232SGeorge Wilson 		ASSERT3U(bonuslen, <=, dmu_bonus_max());
4900713e232SGeorge Wilson 	} else {
4910713e232SGeorge Wilson 		bonuslen = SPACE_MAP_SIZE_V0;
4920713e232SGeorge Wilson 	}
4930713e232SGeorge Wilson 
4940713e232SGeorge Wilson 	object = dmu_object_alloc(os,
495b1be2892SMatthew Ahrens 	    DMU_OT_SPACE_MAP, space_map_blksz,
4960713e232SGeorge Wilson 	    DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
4970713e232SGeorge Wilson 
4980713e232SGeorge Wilson 	return (object);
4998ad4d6ddSJeff Bonwick }
5008ad4d6ddSJeff Bonwick 
5018ad4d6ddSJeff Bonwick void
502*5cabbc6bSPrashanth Sreenivasa space_map_free_obj(objset_t *os, uint64_t smobj, dmu_tx_t *tx)
5038ad4d6ddSJeff Bonwick {
504*5cabbc6bSPrashanth Sreenivasa 	spa_t *spa = dmu_objset_spa(os);
5052acef22dSMatthew Ahrens 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
5060713e232SGeorge Wilson 		dmu_object_info_t doi;
5078ad4d6ddSJeff Bonwick 
508*5cabbc6bSPrashanth Sreenivasa 		VERIFY0(dmu_object_info(os, smobj, &doi));
5090713e232SGeorge Wilson 		if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
5102acef22dSMatthew Ahrens 			spa_feature_decr(spa,
5112acef22dSMatthew Ahrens 			    SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
5128ad4d6ddSJeff Bonwick 		}
5138ad4d6ddSJeff Bonwick 	}
5140713e232SGeorge Wilson 
515*5cabbc6bSPrashanth Sreenivasa 	VERIFY0(dmu_object_free(os, smobj, tx));
516*5cabbc6bSPrashanth Sreenivasa }
517*5cabbc6bSPrashanth Sreenivasa 
518*5cabbc6bSPrashanth Sreenivasa void
519*5cabbc6bSPrashanth Sreenivasa space_map_free(space_map_t *sm, dmu_tx_t *tx)
520*5cabbc6bSPrashanth Sreenivasa {
521*5cabbc6bSPrashanth Sreenivasa 	if (sm == NULL)
522*5cabbc6bSPrashanth Sreenivasa 		return;
523*5cabbc6bSPrashanth Sreenivasa 
524*5cabbc6bSPrashanth Sreenivasa 	space_map_free_obj(sm->sm_os, space_map_object(sm), tx);
5250713e232SGeorge Wilson 	sm->sm_object = 0;
5260713e232SGeorge Wilson }
5270713e232SGeorge Wilson 
5280713e232SGeorge Wilson uint64_t
5290713e232SGeorge Wilson space_map_object(space_map_t *sm)
5300713e232SGeorge Wilson {
5310713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_object : 0);
5320713e232SGeorge Wilson }
5330713e232SGeorge Wilson 
5340713e232SGeorge Wilson /*
5350713e232SGeorge Wilson  * Returns the already synced, on-disk allocated space.
5360713e232SGeorge Wilson  */
5370713e232SGeorge Wilson uint64_t
5380713e232SGeorge Wilson space_map_allocated(space_map_t *sm)
5390713e232SGeorge Wilson {
5400713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_alloc : 0);
5410713e232SGeorge Wilson }
5420713e232SGeorge Wilson 
5430713e232SGeorge Wilson /*
5440713e232SGeorge Wilson  * Returns the already synced, on-disk length;
5450713e232SGeorge Wilson  */
5460713e232SGeorge Wilson uint64_t
5470713e232SGeorge Wilson space_map_length(space_map_t *sm)
5480713e232SGeorge Wilson {
5490713e232SGeorge Wilson 	return (sm != NULL ? sm->sm_length : 0);
5500713e232SGeorge Wilson }
5510713e232SGeorge Wilson 
5520713e232SGeorge Wilson /*
5530713e232SGeorge Wilson  * Returns the allocated space that is currently syncing.
5540713e232SGeorge Wilson  */
5550713e232SGeorge Wilson int64_t
5560713e232SGeorge Wilson space_map_alloc_delta(space_map_t *sm)
5570713e232SGeorge Wilson {
5580713e232SGeorge Wilson 	if (sm == NULL)
5590713e232SGeorge Wilson 		return (0);
5600713e232SGeorge Wilson 	ASSERT(sm->sm_dbuf != NULL);
5610713e232SGeorge Wilson 	return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
5628ad4d6ddSJeff Bonwick }
563