xref: /illumos-gate/usr/src/uts/common/fs/zfs/dmu.c (revision 9704bf7f)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2294d1a210STim Haley  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23857c96d2SIgor Kozhukhov  */
24857c96d2SIgor Kozhukhov /*
25857c96d2SIgor Kozhukhov  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
266d658717SJohn Levon  * Copyright 2019 Joyent, Inc.
27857c96d2SIgor Kozhukhov  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28adaec86aSMatthew Ahrens  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
29857c96d2SIgor Kozhukhov  * Copyright (c) 2018 DilOS
30fa9e4066Sahrens  */
31aad02571SSaso Kiselkov 
32fa9e4066Sahrens #include <sys/dmu.h>
33fa9e4066Sahrens #include <sys/dmu_impl.h>
34fa9e4066Sahrens #include <sys/dmu_tx.h>
35fa9e4066Sahrens #include <sys/dbuf.h>
36fa9e4066Sahrens #include <sys/dnode.h>
37fa9e4066Sahrens #include <sys/zfs_context.h>
38fa9e4066Sahrens #include <sys/dmu_objset.h>
39fa9e4066Sahrens #include <sys/dmu_traverse.h>
40fa9e4066Sahrens #include <sys/dsl_dataset.h>
41fa9e4066Sahrens #include <sys/dsl_dir.h>
42fa9e4066Sahrens #include <sys/dsl_pool.h>
431d452cf5Sahrens #include <sys/dsl_synctask.h>
44a2eea2e1Sahrens #include <sys/dsl_prop.h>
45fa9e4066Sahrens #include <sys/dmu_zfetch.h>
46fa9e4066Sahrens #include <sys/zfs_ioctl.h>
47fa9e4066Sahrens #include <sys/zap.h>
48ea8dc4b6Seschrock #include <sys/zio_checksum.h>
4980901aeaSGeorge Wilson #include <sys/zio_compress.h>
500a586ceaSMark Shellenbaum #include <sys/sa.h>
51b8289d24SDaniil Lunev #include <sys/zfeature.h>
52770499e1SDan Kimmel #include <sys/abd.h>
5344eda4d7Smaybee #ifdef _KERNEL
5444eda4d7Smaybee #include <sys/vmsystm.h>
550fab61baSJonathan W Adams #include <sys/zfs_znode.h>
5644eda4d7Smaybee #endif
58857c96d2SIgor Kozhukhov static xuio_stats_t xuio_stats = {
59857c96d2SIgor Kozhukhov 	{ "onloan_read_buf",	KSTAT_DATA_UINT64 },
60857c96d2SIgor Kozhukhov 	{ "onloan_write_buf",	KSTAT_DATA_UINT64 },
61857c96d2SIgor Kozhukhov 	{ "read_buf_copied",	KSTAT_DATA_UINT64 },
62857c96d2SIgor Kozhukhov 	{ "read_buf_nocopy",	KSTAT_DATA_UINT64 },
63857c96d2SIgor Kozhukhov 	{ "write_buf_copied",	KSTAT_DATA_UINT64 },
64857c96d2SIgor Kozhukhov 	{ "write_buf_nocopy",	KSTAT_DATA_UINT64 }
65857c96d2SIgor Kozhukhov };
66857c96d2SIgor Kozhukhov 
67857c96d2SIgor Kozhukhov #define	XUIOSTAT_INCR(stat, val)	\
68857c96d2SIgor Kozhukhov 	atomic_add_64(&xuio_stats.stat.value.ui64, (val))
69857c96d2SIgor Kozhukhov #define	XUIOSTAT_BUMP(stat)	XUIOSTAT_INCR(stat, 1)
70857c96d2SIgor Kozhukhov 
7180901aeaSGeorge Wilson /*
7280901aeaSGeorge Wilson  * Enable/disable nopwrite feature.
7380901aeaSGeorge Wilson  */
7480901aeaSGeorge Wilson int zfs_nopwrite_enabled = 1;
7580901aeaSGeorge Wilson 
76ff5177eeSAlek Pinchuk /*
77ff5177eeSAlek Pinchuk  * Tunable to control percentage of dirtied blocks from frees in one TXG.
78ff5177eeSAlek Pinchuk  * After this threshold is crossed, additional dirty blocks from frees
79ff5177eeSAlek Pinchuk  * wait until the next TXG.
80ff5177eeSAlek Pinchuk  * A value of zero will disable this throttle.
81ff5177eeSAlek Pinchuk  */
82ff5177eeSAlek Pinchuk uint32_t zfs_per_txg_dirty_frees_percent = 30;
83ff5177eeSAlek Pinchuk 
845cabbc6bSPrashanth Sreenivasa /*
855cabbc6bSPrashanth Sreenivasa  * This can be used for testing, to ensure that certain actions happen
865cabbc6bSPrashanth Sreenivasa  * while in the middle of a remap (which might otherwise complete too
875cabbc6bSPrashanth Sreenivasa  * quickly).
885cabbc6bSPrashanth Sreenivasa  */
895cabbc6bSPrashanth Sreenivasa int zfs_object_remap_one_indirect_delay_ticks = 0;
905cabbc6bSPrashanth Sreenivasa 
9152abb70eSMatthew Ahrens /*
9252abb70eSMatthew Ahrens  * Limit the amount we can prefetch with one call to this amount.  This
9352abb70eSMatthew Ahrens  * helps to limit the amount of memory that can be used by prefetching.
9452abb70eSMatthew Ahrens  * Larger objects should be prefetched a bit at a time.
9552abb70eSMatthew Ahrens  */
9652abb70eSMatthew Ahrens uint64_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
9752abb70eSMatthew Ahrens 
98fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
99eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "unallocated"		},
100eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "object directory"	},
101eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "object array"		},
102eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "packed nvlist"	},
103eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "packed nvlist size"	},
104eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj"	},
105eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj header"		},
106eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map header"	},
107eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map"	},
108eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, TRUE,  "ZIL intent log"	},
109eb633035STom Caputi 	{ DMU_BSWAP_DNODE,  TRUE,  FALSE, TRUE,  "DMU dnode"	},
110eb633035STom Caputi 	{ DMU_BSWAP_OBJSET, TRUE,  TRUE,  FALSE, "DMU objset"	},
111eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL directory"	},
112eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL directory child map" },
113eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset snap map"	},
114eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL props"	},
115eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL dataset"	},
116eb633035STom Caputi 	{ DMU_BSWAP_ZNODE,  TRUE,  FALSE, FALSE, "ZFS znode"	},
117eb633035STom Caputi 	{ DMU_BSWAP_OLDACL, TRUE,  FALSE, TRUE,  "ZFS V0 ACL"	},
118eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "ZFS plain file"	},
119eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS directory"	},
120eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "ZFS master node"	},
121eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS delete queue"	},
122eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "zvol object"	},
123eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "zvol prop"	},
124eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "other uint8[]"	},
125eb633035STom Caputi 	{ DMU_BSWAP_UINT64, FALSE, FALSE, TRUE,  "other uint64[]"	},
126eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "other ZAP"	},
127eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "persistent error log"	},
128eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "SPA history"	},
129eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA history offsets"	},
130eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "Pool properties"	},
131eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL permissions"	},
132eb633035STom Caputi 	{ DMU_BSWAP_ACL,    TRUE,  FALSE, TRUE,  "ZFS ACL"	},
133eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "ZFS SYSACL"	},
134eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "FUID table"	},
135eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "FUID table size"	},
136eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset next clones" },
137eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan work queue"	},
138f67950b2SNasf-Fan 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project used"},
139f67950b2SNasf-Fan 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/proj quota"},
140eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "snapshot refcount tags" },
141eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT ZAP algorithm"	},
142eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT statistics"	},
143eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "System attributes"	},
144eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA master node"	},
145eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr registration"	},
146eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr layouts"	},
147eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan translations"	},
148eb633035STom Caputi 	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "deduplicated block"	},
149eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL deadlist map" },
150eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL deadlist map hdr"	},
151eb633035STom Caputi 	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dir clones"	},
152eb633035STom Caputi 	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj subobj"		}
153ad135b5dSChristopher Siden };
154ad135b5dSChristopher Siden 
155ad135b5dSChristopher Siden const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
156ad135b5dSChristopher Siden 	{	byteswap_uint8_array,	"uint8"		},
157ad135b5dSChristopher Siden 	{	byteswap_uint16_array,	"uint16"	},
158ad135b5dSChristopher Siden 	{	byteswap_uint32_array,	"uint32"	},
159ad135b5dSChristopher Siden 	{	byteswap_uint64_array,	"uint64"	},
160ad135b5dSChristopher Siden 	{	zap_byteswap,		"zap"		},
161ad135b5dSChristopher Siden 	{	dnode_buf_byteswap,	"dnode"		},
162ad135b5dSChristopher Siden 	{	dmu_objset_byteswap,	"objset"	},
163ad135b5dSChristopher Siden 	{	zfs_znode_byteswap,	"znode"		},
164ad135b5dSChristopher Siden 	{	zfs_oldacl_byteswap,	"oldacl"	},
165ad135b5dSChristopher Siden 	{	zfs_acl_byteswap,	"acl"		}
1663f9d6ad7SLin Ling };
16879d72832SMatthew Ahrens int
dmu_buf_hold_noread_by_dnode(dnode_t * dn,uint64_t offset,void * tag,dmu_buf_t ** dbp)16979d72832SMatthew Ahrens dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
17079d72832SMatthew Ahrens     void *tag, dmu_buf_t **dbp)
17179d72832SMatthew Ahrens {
17279d72832SMatthew Ahrens 	uint64_t blkid;
17379d72832SMatthew Ahrens 	dmu_buf_impl_t *db;
17479d72832SMatthew Ahrens 
17579d72832SMatthew Ahrens 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
176*9704bf7fSPaul Dagnelie 	blkid = dbuf_whichblock(dn, 0, offset);
17779d72832SMatthew Ahrens 	db = dbuf_hold(dn, blkid, tag);
17879d72832SMatthew Ahrens 	rw_exit(&dn->dn_struct_rwlock);
17979d72832SMatthew Ahrens 
18079d72832SMatthew Ahrens 	if (db == NULL) {
18179d72832SMatthew Ahrens 		*dbp = NULL;
18279d72832SMatthew Ahrens 		return (SET_ERROR(EIO));
18379d72832SMatthew Ahrens 	}
18479d72832SMatthew Ahrens 
18579d72832SMatthew Ahrens 	*dbp = &db->db;
18679d72832SMatthew Ahrens 	return (0);
18779d72832SMatthew Ahrens }
188fa9e4066Sahrens int
dmu_buf_hold_noread(objset_t * os,uint64_t object,uint64_t offset,void * tag,dmu_buf_t ** dbp)1895d7b4d43SMatthew Ahrens dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
1905d7b4d43SMatthew Ahrens     void *tag, dmu_buf_t **dbp)
191fa9e4066Sahrens {
192fa9e4066Sahrens 	dnode_t *dn;
193fa9e4066Sahrens 	uint64_t blkid;
194fa9e4066Sahrens 	dmu_buf_impl_t *db;
195ea8dc4b6Seschrock 	int err;
197503ad85cSMatthew Ahrens 	err = dnode_hold(os, object, FTAG, &dn);
198ea8dc4b6Seschrock 	if (err)
199ea8dc4b6Seschrock 		return (err);
200fa9e4066Sahrens 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
201*9704bf7fSPaul Dagnelie 	blkid = dbuf_whichblock(dn, 0, offset);
202ea8dc4b6Seschrock 	db = dbuf_hold(dn, blkid, tag);
203fa9e4066Sahrens 	rw_exit(&dn->dn_struct_rwlock);
2045d7b4d43SMatthew Ahrens 	dnode_rele(dn, FTAG);
2055d7b4d43SMatthew Ahrens 
206ea8dc4b6Seschrock 	if (db == NULL) {
2075d7b4d43SMatthew Ahrens 		*dbp = NULL;
2085d7b4d43SMatthew Ahrens 		return (SET_ERROR(EIO));
2095d7b4d43SMatthew Ahrens 	}
2105d7b4d43SMatthew Ahrens 
2115d7b4d43SMatthew Ahrens 	*dbp = &db->db;
2125d7b4d43SMatthew Ahrens 	return (err);
2135d7b4d43SMatthew Ahrens }
2145d7b4d43SMatthew Ahrens 
21579d72832SMatthew Ahrens int
dmu_buf_hold_by_dnode(dnode_t * dn,uint64_t offset,void * tag,dmu_buf_t ** dbp,int flags)21679d72832SMatthew Ahrens dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
21779d72832SMatthew Ahrens     void *tag, dmu_buf_t **dbp, int flags)
21879d72832SMatthew Ahrens {
21979d72832SMatthew Ahrens 	int err;
22079d72832SMatthew Ahrens 	int db_flags = DB_RF_CANFAIL;
22179d72832SMatthew Ahrens 
22279d72832SMatthew Ahrens 	if (flags & DMU_READ_NO_PREFETCH)
22379d72832SMatthew Ahrens 		db_flags |= DB_RF_NOPREFETCH;
224eb633035STom Caputi 	if (flags & DMU_READ_NO_DECRYPT)
225eb633035STom Caputi 		db_flags |= DB_RF_NO_DECRYPT;
22679d72832SMatthew Ahrens 
22779d72832SMatthew Ahrens 	err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
22879d72832SMatthew Ahrens 	if (err == 0) {
22979d72832SMatthew Ahrens 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
23079d72832SMatthew Ahrens 		err = dbuf_read(db, NULL, db_flags);
23179d72832SMatthew Ahrens 		if (err != 0) {
23279d72832SMatthew Ahrens 			dbuf_rele(db, tag);
23379d72832SMatthew Ahrens 			*dbp = NULL;
23479d72832SMatthew Ahrens 		}
23579d72832SMatthew Ahrens 	}
23679d72832SMatthew Ahrens 
23779d72832SMatthew Ahrens 	return (err);
23879d72832SMatthew Ahrens }
23979d72832SMatthew Ahrens 
2405d7b4d43SMatthew Ahrens int
dmu_buf_hold(objset_t * os,uint64_t object,uint64_t offset,void * tag,dmu_buf_t ** dbp,int flags)2415d7b4d43SMatthew Ahrens dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
2425d7b4d43SMatthew Ahrens     void *tag, dmu_buf_t **dbp, int flags)
2435d7b4d43SMatthew Ahrens {
2445d7b4d43SMatthew Ahrens 	int err;
2455d7b4d43SMatthew Ahrens 	int db_flags = DB_RF_CANFAIL;
2465d7b4d43SMatthew Ahrens 
2475d7b4d43SMatthew Ahrens 	if (flags & DMU_READ_NO_PREFETCH)
2485d7b4d43SMatthew Ahrens 		db_flags |= DB_RF_NOPREFETCH;
249eb633035STom Caputi 	if (flags & DMU_READ_NO_DECRYPT)
250eb633035STom Caputi 		db_flags |= DB_RF_NO_DECRYPT;
2515d7b4d43SMatthew Ahrens 
2525d7b4d43SMatthew Ahrens 	err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
2535d7b4d43SMatthew Ahrens 	if (err == 0) {
2545d7b4d43SMatthew Ahrens 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
25547cb52daSJeff Bonwick 		err = dbuf_read(db, NULL, db_flags);
2565d7b4d43SMatthew Ahrens 		if (err != 0) {
257ea8dc4b6Seschrock 			dbuf_rele(db, tag);
2585d7b4d43SMatthew Ahrens 			*dbp = NULL;
259ea8dc4b6Seschrock 		}
260ea8dc4b6Seschrock 	}
262ea8dc4b6Seschrock 	return (err);
263fa9e4066Sahrens }
265fa9e4066Sahrens int
dmu_bonus_max(void)266fa9e4066Sahrens dmu_bonus_max(void)
267fa9e4066Sahrens {
26854811da5SToomas Soome 	return (DN_OLD_MAX_BONUSLEN);
269fa9e4066Sahrens }
2711934e92fSmaybee int
dmu_set_bonus(dmu_buf_t * db_fake,int newsize,dmu_tx_t * tx)272744947dcSTom Erickson dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
2731934e92fSmaybee {
274744947dcSTom Erickson 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
275744947dcSTom Erickson 	dnode_t *dn;
276744947dcSTom Erickson 	int error;
278744947dcSTom Erickson 	DB_DNODE_ENTER(db);
279744947dcSTom Erickson 	dn = DB_DNODE(db);
280744947dcSTom Erickson 
281744947dcSTom Erickson 	if (dn->dn_bonus != db) {
282be6fd75aSMatthew Ahrens 		error = SET_ERROR(EINVAL);
283744947dcSTom Erickson 	} else if (newsize < 0 || newsize > db_fake->db_size) {
284be6fd75aSMatthew Ahrens 		error = SET_ERROR(EINVAL);
285744947dcSTom Erickson 	} else {
286744947dcSTom Erickson 		dnode_setbonuslen(dn, newsize, tx);
287744947dcSTom Erickson 		error = 0;
288744947dcSTom Erickson 	}
289744947dcSTom Erickson 
290744947dcSTom Erickson 	DB_DNODE_EXIT(db);
291744947dcSTom Erickson 	return (error);
2921934e92fSmaybee }
2940a586ceaSMark Shellenbaum int
dmu_set_bonustype(dmu_buf_t * db_fake,dmu_object_type_t type,dmu_tx_t * tx)295744947dcSTom Erickson dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
2960a586ceaSMark Shellenbaum {
297744947dcSTom Erickson 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
298744947dcSTom Erickson 	dnode_t *dn;
299744947dcSTom Erickson 	int error;
3000a586ceaSMark Shellenbaum 
301744947dcSTom Erickson 	DB_DNODE_ENTER(db);
302744947dcSTom Erickson 	dn = DB_DNODE(db);
303744947dcSTom Erickson 
304ad135b5dSChristopher Siden 	if (!DMU_OT_IS_VALID(type)) {
305be6fd75aSMatthew Ahrens 		error = SET_ERROR(EINVAL);
306744947dcSTom Erickson 	} else if (dn->dn_bonus != db) {
307be6fd75aSMatthew Ahrens 		error = SET_ERROR(EINVAL);
308744947dcSTom Erickson 	} else {
309744947dcSTom Erickson 		dnode_setbonus_type(dn, type, tx);
310744947dcSTom Erickson 		error = 0;
311744947dcSTom Erickson 	}
3120a586ceaSMark Shellenbaum 
313744947dcSTom Erickson 	DB_DNODE_EXIT(db);
314744947dcSTom Erickson 	return (error);
315744947dcSTom Erickson }
3160a586ceaSMark Shellenbaum 
317744947dcSTom Erickson dmu_object_type_t
dmu_get_bonustype(dmu_buf_t * db_fake)318744947dcSTom Erickson dmu_get_bonustype(dmu_buf_t *db_fake)
319744947dcSTom Erickson {
320744947dcSTom Erickson 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
321744947dcSTom Erickson 	dnode_t *dn;
322744947dcSTom Erickson 	dmu_object_type_t type;
323744947dcSTom Erickson 
324744947dcSTom Erickson 	DB_DNODE_ENTER(db);
325744947dcSTom Erickson 	dn = DB_DNODE(db);
326744947dcSTom Erickson 	type = dn->dn_bonustype;
327744947dcSTom Erickson 	DB_DNODE_EXIT(db);
328744947dcSTom Erickson 
329744947dcSTom Erickson 	return (type);
3300a586ceaSMark Shellenbaum }
3310a586ceaSMark Shellenbaum 
3320a586ceaSMark Shellenbaum int
dmu_rm_spill(objset_t * os,uint64_t object,dmu_tx_t * tx)3330a586ceaSMark Shellenbaum dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
3340a586ceaSMark Shellenbaum {
3350a586ceaSMark Shellenbaum 	dnode_t *dn;
3360a586ceaSMark Shellenbaum 	int error;
3370a586ceaSMark Shellenbaum 
3380a586ceaSMark Shellenbaum 	error = dnode_hold(os, object, FTAG, &dn);
3390a586ceaSMark Shellenbaum 	dbuf_rm_spill(dn, tx);
34006e0070dSMark Shellenbaum 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
34106e0070dSMark Shellenbaum 	dnode_rm_spill(dn, tx);
34206e0070dSMark Shellenbaum 	rw_exit(&dn->dn_struct_rwlock);
3430a586ceaSMark Shellenbaum 	dnode_rele(dn, FTAG);
3440a586ceaSMark Shellenbaum 	return (error);
3450a586ceaSMark Shellenbaum }
3460a586ceaSMark Shellenbaum 
347eb633035STom Caputi /*
348eb633035STom Caputi  * Lookup and hold the bonus buffer for the provided dnode.  If the dnode
349eb633035STom Caputi  * has not yet been allocated a new bonus dbuf a will be allocated.
350eb633035STom Caputi  * Returns ENOENT, EIO, or 0.
351eb633035STom Caputi  */
dmu_bonus_hold_by_dnode(dnode_t * dn,void * tag,dmu_buf_t ** dbp,uint32_t flags)352eb633035STom Caputi int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp,
353eb633035STom Caputi     uint32_t flags)
354eb633035STom Caputi {
355eb633035STom Caputi 	dmu_buf_impl_t *db;
356eb633035STom Caputi 	int error;
357eb633035STom Caputi 	uint32_t db_flags = DB_RF_MUST_SUCCEED;
358eb633035STom Caputi 
359eb633035STom Caputi 	if (flags & DMU_READ_NO_PREFETCH)
360eb633035STom Caputi 		db_flags |= DB_RF_NOPREFETCH;
361eb633035STom Caputi 	if (flags & DMU_READ_NO_DECRYPT)
3626d658717SJohn Levon 		db_flags |= DB_RF_NO_DECRYPT;
363eb633035STom Caputi 
364eb633035STom Caputi 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
365eb633035STom Caputi 	if (dn->dn_bonus == NULL) {
366eb633035STom Caputi 		rw_exit(&dn->dn_struct_rwlock);
367eb633035STom Caputi 		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
368eb633035STom Caputi 		if (dn->dn_bonus == NULL)
369eb633035STom Caputi 			dbuf_create_bonus(dn);
370eb633035STom Caputi 	}
371eb633035STom Caputi 	db = dn->dn_bonus;
372eb633035STom Caputi 
373eb633035STom Caputi 	/* as long as the bonus buf is held, the dnode will be held */
374eb633035STom Caputi 	if (zfs_refcount_add(&db->db_holds, tag) == 1) {
375eb633035STom Caputi 		VERIFY(dnode_add_ref(dn, db));
376eb633035STom Caputi 		atomic_inc_32(&dn->dn_dbufs_count);
377eb633035STom Caputi 	}
378eb633035STom Caputi 
379eb633035STom Caputi 	/*
380eb633035STom Caputi 	 * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's
381eb633035STom Caputi 	 * hold and incrementing the dbuf count to ensure that dnode_move() sees
382eb633035STom Caputi 	 * a dnode hold for every dbuf.
383eb633035STom Caputi 	 */
384eb633035STom Caputi 	rw_exit(&dn->dn_struct_rwlock);
385eb633035STom Caputi 
386eb633035STom Caputi 	error = dbuf_read(db, NULL, db_flags);
387eb633035STom Caputi 	if (error) {
388eb633035STom Caputi 		dnode_evict_bonus(dn);
389eb633035STom Caputi 		dbuf_rele(db, tag);
390eb633035STom Caputi 		*dbp = NULL;
391eb633035STom Caputi 		return (error);
392eb633035STom Caputi 	}
393eb633035STom Caputi 
394eb633035STom Caputi 	*dbp = &db->db;
395eb633035STom Caputi 	return (0);
396eb633035STom Caputi }
397eb633035STom Caputi 
398fa9e4066Sahrens /*
399ea8dc4b6Seschrock  * returns ENOENT, EIO, or 0.
400fa9e4066Sahrens  */
401ea8dc4b6Seschrock int
dmu_bonus_hold_impl(objset_t * os,uint64_t object,void * tag,uint32_t flags,dmu_buf_t ** dbp)402eb633035STom Caputi dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags,
403eb633035STom Caputi     dmu_buf_t **dbp)
404fa9e4066Sahrens {
405ea8dc4b6Seschrock 	dnode_t *dn;
406fa9e4066Sahrens 	dmu_buf_impl_t *db;
4071934e92fSmaybee 	int error;
408eb633035STom Caputi 	uint32_t db_flags = DB_RF_MUST_SUCCEED;
409eb633035STom Caputi 
410eb633035STom Caputi 	if (flags & DMU_READ_NO_PREFETCH)
411eb633035STom Caputi 		db_flags |= DB_RF_NOPREFETCH;
412eb633035STom Caputi 	if (flags & DMU_READ_NO_DECRYPT)
413eb633035STom Caputi 		db_flags |= DB_RF_NO_DECRYPT;
415503ad85cSMatthew Ahrens 	error = dnode_hold(os, object, FTAG, &dn);
4161934e92fSmaybee 	if (error)
4171934e92fSmaybee 		return (error);
419ea8dc4b6Seschrock 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
420ea8dc4b6Seschrock 	if (dn->dn_bonus == NULL) {
421fa9e4066Sahrens 		rw_exit(&dn->dn_struct_rwlock);
422ea8dc4b6Seschrock 		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);