2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
2294d1a21Tim Haley * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23857c96dIgor Kozhukhov */
24857c96dIgor Kozhukhov/*
25857c96dIgor Kozhukhov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
266d65871John Levon * Copyright 2019 Joyent, Inc.
27857c96dIgor Kozhukhov * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28adaec86Matthew Ahrens * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
29857c96dIgor Kozhukhov * Copyright (c) 2018 DilOS
30fa9e406ahrens */
31aad0257Saso Kiselkov
32fa9e406ahrens#include <sys/dmu.h>
33fa9e406ahrens#include <sys/dmu_impl.h>
34fa9e406ahrens#include <sys/dmu_tx.h>
35fa9e406ahrens#include <sys/dbuf.h>
36fa9e406ahrens#include <sys/dnode.h>
37fa9e406ahrens#include <sys/zfs_context.h>
38fa9e406ahrens#include <sys/dmu_objset.h>
39fa9e406ahrens#include <sys/dmu_traverse.h>
40fa9e406ahrens#include <sys/dsl_dataset.h>
41fa9e406ahrens#include <sys/dsl_dir.h>
42fa9e406ahrens#include <sys/dsl_pool.h>
431d452cfahrens#include <sys/dsl_synctask.h>
44a2eea2eahrens#include <sys/dsl_prop.h>
45fa9e406ahrens#include <sys/dmu_zfetch.h>
46fa9e406ahrens#include <sys/zfs_ioctl.h>
47fa9e406ahrens#include <sys/zap.h>
48ea8dc4beschrock#include <sys/zio_checksum.h>
4980901aeGeorge Wilson#include <sys/zio_compress.h>
500a586ceMark Shellenbaum#include <sys/sa.h>
51b8289d2Daniil Lunev#include <sys/zfeature.h>
52770499eDan Kimmel#include <sys/abd.h>
5344eda4dmaybee#ifdef _KERNEL
5444eda4dmaybee#include <sys/vmsystm.h>
550fab61bJonathan W Adams#include <sys/zfs_znode.h>
58857c96dIgor Kozhukhovstatic xuio_stats_t xuio_stats = {
59857c96dIgor Kozhukhov	{ "onloan_read_buf",	KSTAT_DATA_UINT64 },
60857c96dIgor Kozhukhov	{ "onloan_write_buf",	KSTAT_DATA_UINT64 },
61857c96dIgor Kozhukhov	{ "read_buf_copied",	KSTAT_DATA_UINT64 },
62857c96dIgor Kozhukhov	{ "read_buf_nocopy",	KSTAT_DATA_UINT64 },
63857c96dIgor Kozhukhov	{ "write_buf_copied",	KSTAT_DATA_UINT64 },
64857c96dIgor Kozhukhov	{ "write_buf_nocopy",	KSTAT_DATA_UINT64 }
65857c96dIgor Kozhukhov};
66857c96dIgor Kozhukhov
67857c96dIgor Kozhukhov#define	XUIOSTAT_INCR(stat, val)	\
68857c96dIgor Kozhukhov	atomic_add_64(&xuio_stats.stat.value.ui64, (val))
69857c96dIgor Kozhukhov#define	XUIOSTAT_BUMP(stat)	XUIOSTAT_INCR(stat, 1)
70857c96dIgor Kozhukhov
7180901aeGeorge Wilson/*
7280901aeGeorge Wilson * Enable/disable nopwrite feature.
7380901aeGeorge Wilson */
7480901aeGeorge Wilsonint zfs_nopwrite_enabled = 1;
7580901aeGeorge Wilson
76ff5177eAlek Pinchuk/*
77ff5177eAlek Pinchuk * Tunable to control percentage of dirtied blocks from frees in one TXG.
78ff5177eAlek Pinchuk * After this threshold is crossed, additional dirty blocks from frees
79ff5177eAlek Pinchuk * wait until the next TXG.
80ff5177eAlek Pinchuk * A value of zero will disable this throttle.
81ff5177eAlek Pinchuk */
82ff5177eAlek Pinchukuint32_t zfs_per_txg_dirty_frees_percent = 30;
83ff5177eAlek Pinchuk
845cabbc6Prashanth Sreenivasa/*
855cabbc6Prashanth Sreenivasa * This can be used for testing, to ensure that certain actions happen
865cabbc6Prashanth Sreenivasa * while in the middle of a remap (which might otherwise complete too
875cabbc6Prashanth Sreenivasa * quickly).
885cabbc6Prashanth Sreenivasa */
895cabbc6Prashanth Sreenivasaint zfs_object_remap_one_indirect_delay_ticks = 0;
905cabbc6Prashanth Sreenivasa
9152abb70Matthew Ahrens/*
9252abb70Matthew Ahrens * Limit the amount we can prefetch with one call to this amount.  This
9352abb70Matthew Ahrens * helps to limit the amount of memory that can be used by prefetching.
9452abb70Matthew Ahrens * Larger objects should be prefetched a bit at a time.
9552abb70Matthew Ahrens */
9652abb70Matthew Ahrensuint64_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
9752abb70Matthew Ahrens
98fa9e406ahrensconst dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
99eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "unallocated"		},
100eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "object directory"	},
101eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "object array"		},
102eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "packed nvlist"	},
103eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "packed nvlist size"	},
104eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj"	},
105eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj header"		},
106eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map header"	},
107eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map"	},
108eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, TRUE,  "ZIL intent log"	},
109eb63303Tom Caputi	{ DMU_BSWAP_DNODE,  TRUE,  FALSE, TRUE,  "DMU dnode"	},
110eb63303Tom Caputi	{ DMU_BSWAP_OBJSET, TRUE,  TRUE,  FALSE, "DMU objset"	},
111eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL directory"	},
112eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL directory child map" },
113eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset snap map"	},
114eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL props"	},
115eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL dataset"	},
116eb63303Tom Caputi	{ DMU_BSWAP_ZNODE,  TRUE,  FALSE, FALSE, "ZFS znode"	},
117eb63303Tom Caputi	{ DMU_BSWAP_OLDACL, TRUE,  FALSE, TRUE,  "ZFS V0 ACL"	},
118eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "ZFS plain file"	},
119eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS directory"	},
120eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "ZFS master node"	},
121eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS delete queue"	},
122eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "zvol object"	},
123eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "zvol prop"	},
124eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "other uint8[]"	},
125eb63303Tom Caputi	{ DMU_BSWAP_UINT64, FALSE, FALSE, TRUE,  "other uint64[]"	},
126eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "other ZAP"	},
127eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "persistent error log"	},
128eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "SPA history"	},
129eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA history offsets"	},
130eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "Pool properties"	},
131eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL permissions"	},
132eb63303Tom Caputi	{ DMU_BSWAP_ACL,    TRUE,  FALSE, TRUE,  "ZFS ACL"	},
133eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "ZFS SYSACL"	},
134eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "FUID table"	},
135eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "FUID table size"	},
136eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset next clones" },
137eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan work queue"	},
138f67950bNasf-Fan	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project used"},
139f67950bNasf-Fan	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/proj quota"},
140eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "snapshot refcount tags" },
141eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT ZAP algorithm"	},
142eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT statistics"	},
143eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "System attributes"	},
144eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA master node"	},
145eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr registration"	},
146eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr layouts"	},
147eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan translations"	},
148eb63303Tom Caputi	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "deduplicated block"	},
149eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL deadlist map" },
150eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL deadlist map hdr"	},
151eb63303Tom Caputi	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dir clones"	},
152eb63303Tom Caputi	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj subobj"		}
153ad135b5Christopher Siden};
154ad135b5Christopher Siden
155ad135b5Christopher Sidenconst dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
156ad135b5Christopher Siden	{	byteswap_uint8_array,	"uint8"		},
157ad135b5Christopher Siden	{	byteswap_uint16_array,	"uint16"	},
158ad135b5Christopher Siden	{	byteswap_uint32_array,	"uint32"	},
159ad135b5Christopher Siden	{	byteswap_uint64_array,	"uint64"	},
160ad135b5Christopher Siden	{	zap_byteswap,		"zap"		},
161ad135b5Christopher Siden	{	dnode_buf_byteswap,	"dnode"		},
162ad135b5Christopher Siden	{	dmu_objset_byteswap,	"objset"	},
163ad135b5Christopher Siden	{	zfs_znode_byteswap,	"znode"		},
164ad135b5Christopher Siden	{	zfs_oldacl_byteswap,	"oldacl"	},
165ad135b5Christopher Siden	{	zfs_acl_byteswap,	"acl"		}
1663f9d6adLin Ling};
16979d7283Matthew Ahrensdmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
17079d7283Matthew Ahrens    void *tag, dmu_buf_t **dbp)
17179d7283Matthew Ahrens{
17279d7283Matthew Ahrens	uint64_t blkid;
17379d7283Matthew Ahrens	dmu_buf_impl_t *db;
17479d7283Matthew Ahrens
17579d7283Matthew Ahrens	blkid = dbuf_whichblock(dn, 0, offset);
17679d7283Matthew Ahrens	rw_enter(&dn->dn_struct_rwlock, RW_READER);
17779d7283Matthew Ahrens	db = dbuf_hold(dn, blkid, tag);
17879d7283Matthew Ahrens	rw_exit(&dn->dn_struct_rwlock);
17979d7283Matthew Ahrens
18079d7283Matthew Ahrens	if (db == NULL) {
18179d7283Matthew Ahrens		*dbp = NULL;
18279d7283Matthew Ahrens		return (SET_ERROR(EIO));
18379d7283Matthew Ahrens	}
18479d7283Matthew Ahrens
18579d7283Matthew Ahrens	*dbp = &db->db;
18679d7283Matthew Ahrens	return (0);
18779d7283Matthew Ahrens}
18879d7283Matthew Ahrensint
1895d7b4d4Matthew Ahrensdmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
1905d7b4d4Matthew Ahrens    void *tag, dmu_buf_t **dbp)
192fa9e406ahrens	dnode_t *dn;
193fa9e406ahrens	uint64_t blkid;
194fa9e406ahrens	dmu_buf_impl_t *db;
195ea8dc4beschrock	int err;
197503ad85Matthew Ahrens	err = dnode_hold(os, object, FTAG, &dn);
198ea8dc4beschrock	if (err)
199ea8dc4beschrock		return (err);
200a2cdcddPaul Dagnelie	blkid = dbuf_whichblock(dn, 0, offset);
201fa9e406ahrens	rw_enter(&dn->dn_struct_rwlock, RW_READER);
202ea8dc4beschrock	db = dbuf_hold(dn, blkid, tag);
203fa9e406ahrens	rw_exit(&dn->dn_struct_rwlock);
2045d7b4d4Matthew Ahrens	dnode_rele(dn, FTAG);
2055d7b4d4Matthew Ahrens
206ea8dc4beschrock	if (db == NULL) {
2075d7b4d4Matthew Ahrens		*dbp = NULL;
2085d7b4d4Matthew Ahrens		return (SET_ERROR(EIO));
2095d7b4d4Matthew Ahrens	}
2105d7b4d4Matthew Ahrens
2115d7b4d4Matthew Ahrens	*dbp = &db->db;
2125d7b4d4Matthew Ahrens	return (err);
2135d7b4d4Matthew Ahrens}
2145d7b4d4Matthew Ahrens
2155d7b4d4Matthew Ahrensint
21679d7283Matthew Ahrensdmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
21779d7283Matthew Ahrens    void *tag, dmu_buf_t **dbp, int flags)
21879d7283Matthew Ahrens{
21979d7283Matthew Ahrens	int err;
22079d7283Matthew Ahrens	int db_flags = DB_RF_CANFAIL;
22179d7283Matthew Ahrens
22279d7283Matthew Ahrens	if (flags & DMU_READ_NO_PREFETCH)
22379d7283Matthew Ahrens		db_flags |= DB_RF_NOPREFETCH;
224eb63303Tom Caputi	if (flags & DMU_READ_NO_DECRYPT)
225eb63303Tom Caputi		db_flags |= DB_RF_NO_DECRYPT;
22679d7283Matthew Ahrens
22779d7283Matthew Ahrens	err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
22879d7283Matthew Ahrens	if (err == 0) {
22979d7283Matthew Ahrens		dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
23079d7283Matthew Ahrens		err = dbuf_read(db, NULL, db_flags);
23179d7283Matthew Ahrens		if (err != 0) {
23279d7283Matthew Ahrens			dbuf_rele(db, tag);
23379d7283Matthew Ahrens			*dbp = NULL;
23479d7283Matthew Ahrens		}
23579d7283Matthew Ahrens	}
23679d7283Matthew Ahrens
23779d7283Matthew Ahrens	return (err);
23879d7283Matthew Ahrens}
23979d7283Matthew Ahrens
24079d7283Matthew Ahrensint
2415d7b4d4Matthew Ahrensdmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
2425d7b4d4Matthew Ahrens    void *tag, dmu_buf_t **dbp, int flags)
2435d7b4d4Matthew Ahrens{
2445d7b4d4Matthew Ahrens	int err;
2455d7b4d4Matthew Ahrens	int db_flags = DB_RF_CANFAIL;
2465d7b4d4Matthew Ahrens
2475d7b4d4Matthew Ahrens	if (flags & DMU_READ_NO_PREFETCH)
2485d7b4d4Matthew Ahrens		db_flags |= DB_RF_NOPREFETCH;
249eb63303Tom Caputi	if (flags & DMU_READ_NO_DECRYPT)
250eb63303Tom Caputi		db_flags |= DB_RF_NO_DECRYPT;
2515d7b4d4Matthew Ahrens
2525d7b4d4Matthew Ahrens	err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
2535d7b4d4Matthew Ahrens	if (err == 0) {
2545d7b4d4Matthew Ahrens		dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
25547cb52dJeff Bonwick		err = dbuf_read(db, NULL, db_flags);
2565d7b4d4Matthew Ahrens		if (err != 0) {
257ea8dc4beschrock			dbuf_rele(db, tag);
2585d7b4d4Matthew Ahrens			*dbp = NULL;
259ea8dc4beschrock		}
260ea8dc4beschrock	}
262ea8dc4beschrock	return (err);
26854811daToomas Soome	return (DN_OLD_MAX_BONUSLEN);
272744947dTom Ericksondmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
274744947dTom Erickson	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
275744947dTom Erickson	dnode_t *dn;
276744947dTom Erickson	int error;
278744947dTom Erickson	DB_DNODE_ENTER(db);
279744947dTom Erickson	dn = DB_DNODE(db);
280744947dTom Erickson
281744947dTom Erickson	if (dn->dn_bonus != db) {
282be6fd75Matthew Ahrens		error = SET_ERROR(EINVAL);
283744947dTom Erickson	} else if (newsize < 0 || newsize > db_fake->db_size) {
284be6fd75Matthew Ahrens		error = SET_ERROR(EINVAL);
285744947dTom Erickson	} else {
286744947dTom Erickson		dnode_setbonuslen(dn, newsize, tx);
287744947dTom Erickson		error = 0;
288744947dTom Erickson	}
289744947dTom Erickson
290744947dTom Erickson	DB_DNODE_EXIT(db);
291744947dTom Erickson	return (error);
2940a586ceMark Shellenbaumint
295744947dTom Ericksondmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
2960a586ceMark Shellenbaum{
297744947dTom Erickson	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
298744947dTom Erickson	dnode_t *dn;
299744947dTom Erickson	int error;
3000a586ceMark Shellenbaum
301744947dTom Erickson	DB_DNODE_ENTER(db);
302744947dTom Erickson	dn = DB_DNODE(db);
303744947dTom Erickson
304ad135b5Christopher Siden	if (!DMU_OT_IS_VALID(type)) {
305be6fd75Matthew Ahrens		error = SET_ERROR(EINVAL);
306744947dTom Erickson	} else if (dn->dn_bonus != db) {
307be6fd75Matthew Ahrens		error = SET_ERROR(EINVAL);
308744947dTom Erickson	} else {
309744947dTom Erickson		dnode_setbonus_type(dn, type, tx);
310744947dTom Erickson		error = 0;
311744947dTom Erickson	}
3120a586ceMark Shellenbaum
313744947dTom Erickson	DB_DNODE_EXIT(db);
314744947dTom Erickson	return (error);
315744947dTom Erickson}
3160a586ceMark Shellenbaum
317744947dTom Ericksondmu_object_type_t
318744947dTom Ericksondmu_get_bonustype(dmu_buf_t *db_fake)
319744947dTom Erickson{
320744947dTom Erickson	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
321744947dTom Erickson	dnode_t *dn;
322744947dTom Erickson	dmu_object_type_t type;
323744947dTom Erickson
324744947dTom Erickson	DB_DNODE_ENTER(db);
325744947dTom Erickson	dn = DB_DNODE(db);
326744947dTom Erickson	type = dn->dn_bonustype;
327744947dTom Erickson	DB_DNODE_EXIT(db);
328744947dTom Erickson
329744947dTom Erickson	return (type);
3300a586ceMark Shellenbaum}
3310a586ceMark Shellenbaum
3320a586ceMark Shellenbaumint
3330a586ceMark Shellenbaumdmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
3340a586ceMark Shellenbaum{
3350a586ceMark Shellenbaum	dnode_t *dn;
3360a586ceMark Shellenbaum	int error;
3370a586ceMark Shellenbaum
3380a586ceMark Shellenbaum	error = dnode_hold(os, object, FTAG, &dn);
3390a586ceMark Shellenbaum	dbuf_rm_spill(dn, tx);
34006e0070Mark Shellenbaum	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
34106e0070Mark Shellenbaum	dnode_rm_spill(dn, tx);
34206e0070Mark Shellenbaum	rw_exit(&dn->dn_struct_rwlock);
3430a586ceMark Shellenbaum	dnode_rele(dn, FTAG);
3440a586ceMark Shellenbaum	return (error);
3450a586ceMark Shellenbaum}
3460a586ceMark Shellenbaum
348eb63303Tom Caputi * Lookup and hold the bonus buffer for the provided dnode.  If the dnode
349eb63303Tom Caputi * has not yet been allocated a new bonus dbuf a will be allocated.
350eb63303Tom Caputi * Returns ENOENT, EIO, or 0.
351eb63303Tom Caputi */
352eb63303Tom Caputiint dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp,
353eb63303Tom Caputi    uint32_t flags)
354eb63303Tom Caputi{
355eb63303Tom Caputi	dmu_buf_impl_t *db;
356eb63303Tom Caputi	int error;
357eb63303Tom Caputi	uint32_t db_flags = DB_RF_MUST_SUCCEED;
358eb63303Tom Caputi
359eb63303Tom Caputi	if (flags & DMU_READ_NO_PREFETCH)
360eb63303Tom Caputi		db_flags |= DB_RF_NOPREFETCH;
361eb63303Tom Caputi	if (flags & DMU_READ_NO_DECRYPT)
3626d65871John Levon		db_flags |= DB_RF_NO_DECRYPT;
363eb63303Tom Caputi
364eb63303Tom Caputi	rw_enter(&dn->dn_struct_rwlock, RW_READER);
365eb63303Tom Caputi	if (dn->dn_bonus == NULL) {
366eb63303Tom Caputi		rw_exit(&dn->dn_struct_rwlock);
367eb63303Tom Caputi		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
368eb63303Tom Caputi		if (dn->dn_bonus == NULL)
369eb63303Tom Caputi			dbuf_create_bonus(dn);
370eb63303Tom Caputi	}
371eb63303Tom Caputi	db = dn->dn_bonus;
372eb63303Tom Caputi
373eb63303Tom Caputi	/* as long as the bonus buf is held, the dnode will be held */
374eb63303Tom Caputi	if (zfs_refcount_add(&db->db_holds, tag) == 1) {
375eb63303Tom Caputi		VERIFY(dnode_add_ref(dn, db));
376eb63303Tom Caputi		atomic_inc_32(&dn->dn_dbufs_count);
377eb63303Tom Caputi	}
378eb63303Tom Caputi
379eb63303Tom Caputi	/*
380eb63303Tom Caputi	 * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's
381eb63303Tom Caputi	 * hold and incrementing the dbuf count to ensure that dnode_move() sees
382eb63303Tom Caputi	 * a dnode hold for every dbuf.
383eb63303Tom Caputi	 */
384eb63303Tom Caputi	rw_exit(&dn->dn_struct_rwlock);
385eb63303Tom Caputi
386eb63303Tom Caputi	error = dbuf_read(db, NULL, db_flags);
387eb63303Tom Caputi	if (error) {
388eb63303Tom Caputi		dnode_evict_bonus(dn);
389eb63303Tom Caputi		dbuf_rele(db, tag);
390eb63303Tom Caputi		*dbp = NULL;
391eb63303Tom Caputi		return (error);
392eb63303Tom Caputi	}
393eb63303Tom Caputi
394eb63303Tom Caputi	*dbp = &db->db;
395eb63303Tom Caputi	return (0);
396eb63303Tom Caputi}
397eb63303Tom Caputi
398eb63303Tom Caputi/*
399ea8dc4beschrock * returns ENOENT, EIO, or 0.
400fa9e406ahrens */
402eb63303Tom Caputidmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags,
403eb63303Tom Caputi    dmu_buf_t **dbp)
405ea8dc4beschrock	dnode_t *dn;
406fa9e406ahrens	dmu_buf_impl_t *db;
4071934e92maybee	int error;
408eb63303Tom Caputi	uint32_t db_flags = DB_RF_MUST_SUCCEED;
409eb63303Tom Caputi
410eb63303Tom Caputi	if (flags & DMU_READ_NO_PREFETCH)
411eb63303Tom Caputi		db_flags |= DB_RF_NOPREFETCH;
412eb63303Tom Caputi	if (flags & DMU_READ_NO_DECRYPT)
413eb63303Tom Caputi		db_flags |= DB_RF_NO_DECRYPT;
415503ad85Matthew Ahrens	error = dnode_hold(os, object, FTAG, &dn);
4161934e92maybee	if (error)
4171934e92maybee		return (error);
419ea8dc4beschrock	rw_enter(&dn->dn_struct_rwlock, RW_READER);
420ea8dc4beschrock	if (dn->dn_bonus == NULL) {
421fa9e406ahrens		rw_exit(&dn->dn_struct_rwlock);
422ea8dc4beschrock		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
423ea8dc4beschrock		if (dn->dn_bonus == NULL)
4241934e92maybee			dbuf_create_bonus(dn);
425fa9e406ahrens	}
426ea8dc4beschrock	db = dn->dn_bonus;
4281934e92maybee	/* as long as the bonus buf is held, the dnode will be held */
429e914aceTim Schumacher	if (zfs_refcount_add(&db->db_holds, tag) == 1) {
4301934e92maybee		VERIFY(dnode_add_ref(dn, db));
431640c167Josef 'Jeff' Sipek		atomic_inc_32(&dn->dn_dbufs_count);
432744947dTom Erickson	}
433744947dTom Erickson
434744947dTom Erickson	/*
435744947dTom Erickson	 * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's
436744947dTom Erickson	 * hold and incrementing the dbuf count to ensure that dnode_move() sees
437744947dTom Erickson	 * a dnode hold for every dbuf.
438744947dTom Erickson	 */
439744947dTom Erickson	rw_exit(&dn->dn_struct_rwlock);
441fa9e406ahrens	dnode_rele(dn, FTAG);
443eb63303Tom Caputi	error = dbuf_read(db, NULL, db_flags);
444eb63303Tom Caputi	if (error) {
445eb63303Tom Caputi		dnode_evict_bonus(dn);
446eb63303Tom Caputi		dbuf_rele(db, tag);
447eb63303Tom Caputi		*dbp = NULL;
448eb63303Tom Caputi		return (error);
449eb63303Tom Caputi	}
451ea8dc4beschrock	*dbp = &db->db;
452ea8dc4beschrock	return (0);
455eb63303Tom Caputiint
456eb63303Tom Caputidmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp)
457eb63303Tom Caputi{
458eb63303Tom Caputi	return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp));
459eb63303Tom Caputi}
460eb63303Tom Caputi
4620a586ceMark Shellenbaum * returns ENOENT, EIO, or 0.
4630a586ceMark Shellenbaum *
4640a586ceMark Shellenbaum * This interface will allocate a blank spill dbuf when a spill blk
4650a586ceMark Shellenbaum * doesn't already exist on the dnode.
4660a586ceMark Shellenbaum *
4670a586ceMark Shellenbaum * if you only want to find an already existing spill db, then
4680a586ceMark Shellenbaum * dmu_spill_hold_existing() should be used.
4690a586ceMark Shellenbaum */
4700a586ceMark Shellenbaumint
4710a586ceMark Shellenbaumdmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp)
4720a586ceMark Shellenbaum{
4730a586ceMark Shellenbaum	dmu_buf_impl_t *db = NULL;
4740a586ceMark Shellenbaum	int err;
4750a586ceMark Shellenbaum
4760a586ceMark Shellenbaum	if ((flags & DB_RF_HAVESTRUCT) == 0)
4770a586ceMark Shellenbaum		rw_enter(&dn->dn_struct_rwlock, RW_READER);
4780a586ceMark Shellenbaum
4790a586ceMark Shellenbaum	db = dbuf_hold(dn, DMU_SPILL_BLKID, tag);
4800a586ceMark Shellenbaum
4810a586ceMark Shellenbaum	if ((flags & DB_RF_HAVESTRUCT) == 0)
4820a586ceMark Shellenbaum		rw_exit(&dn->dn_struct_rwlock);
4830a586ceMark Shellenbaum
4840a586ceMark Shellenbaum	ASSERT(db != NULL);
4851d8ccc7Mark Shellenbaum	err = dbuf_read(db, NULL, flags);
4861d8ccc7Mark Shellenbaum	if (err == 0)