xref: /illumos-gate/usr/src/uts/common/fs/zfs/dbuf.c (revision 6f43873c)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5f65e61c0Sahrens  * Common Development and Distribution License (the "License").
6f65e61c0Sahrens  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2206e0070dSMark Shellenbaum  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
233f2366c2SGordon Ross  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
249704bf7fSPaul Dagnelie  * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
25aad02571SSaso Kiselkov  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26810e43b2SBill Pijewski  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27bc9014e6SJustin Gibbs  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
28c3d26abcSMatthew Ahrens  * Copyright (c) 2014 Integros [integros.com]
29fa9e4066Sahrens  */
30fa9e4066Sahrens 
31fa9e4066Sahrens #include <sys/zfs_context.h>
32fa9e4066Sahrens #include <sys/dmu.h>
332f3d8780SMatthew Ahrens #include <sys/dmu_send.h>
34fa9e4066Sahrens #include <sys/dmu_impl.h>
35fa9e4066Sahrens #include <sys/dbuf.h>
36fa9e4066Sahrens #include <sys/dmu_objset.h>
37fa9e4066Sahrens #include <sys/dsl_dataset.h>
38fa9e4066Sahrens #include <sys/dsl_dir.h>
39fa9e4066Sahrens #include <sys/dmu_tx.h>
40fa9e4066Sahrens #include <sys/spa.h>
41fa9e4066Sahrens #include <sys/zio.h>
42fa9e4066Sahrens #include <sys/dmu_zfetch.h>
430a586ceaSMark Shellenbaum #include <sys/sa.h>
440a586ceaSMark Shellenbaum #include <sys/sa_impl.h>
455d7b4d43SMatthew Ahrens #include <sys/zfeature.h>
465d7b4d43SMatthew Ahrens #include <sys/blkptr.h>
47bf16b11eSMatthew Ahrens #include <sys/range_tree.h>
48dcbf3bd6SGeorge Wilson #include <sys/callb.h>
49770499e1SDan Kimmel #include <sys/abd.h>
505cabbc6bSPrashanth Sreenivasa #include <sys/vdev.h>
513a2d8a1bSPaul Dagnelie #include <sys/cityhash.h>
52adb52d92SMatthew Ahrens #include <sys/spa_impl.h>
53dcbf3bd6SGeorge Wilson 
543b2aab18SMatthew Ahrens static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
55088f3894Sahrens static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
56fa9e4066Sahrens 
57bc9014e6SJustin Gibbs #ifndef __lint
58bc9014e6SJustin Gibbs extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
5940510e8eSJosef 'Jeff' Sipek     dmu_buf_evict_func_t *evict_func_sync,
6040510e8eSJosef 'Jeff' Sipek     dmu_buf_evict_func_t *evict_func_async,
6140510e8eSJosef 'Jeff' Sipek     dmu_buf_t **clear_on_evict_dbufp);
62bc9014e6SJustin Gibbs #endif /* ! __lint */
63bc9014e6SJustin Gibbs 
64fa9e4066Sahrens /*
65fa9e4066Sahrens  * Global data structures and functions for the dbuf cache.
66fa9e4066Sahrens  */
67dcbf3bd6SGeorge Wilson static kmem_cache_t *dbuf_kmem_cache;
68bc9014e6SJustin Gibbs static taskq_t *dbu_evict_taskq;
69fa9e4066Sahrens 
70dcbf3bd6SGeorge Wilson static kthread_t *dbuf_cache_evict_thread;
71dcbf3bd6SGeorge Wilson static kmutex_t dbuf_evict_lock;
72dcbf3bd6SGeorge Wilson static kcondvar_t dbuf_evict_cv;
73dcbf3bd6SGeorge Wilson static boolean_t dbuf_evict_thread_exit;
74dcbf3bd6SGeorge Wilson 
75dcbf3bd6SGeorge Wilson /*
76adb52d92SMatthew Ahrens  * There are two dbuf caches; each dbuf can only be in one of them at a time.
77adb52d92SMatthew Ahrens  *
78adb52d92SMatthew Ahrens  * 1. Cache of metadata dbufs, to help make read-heavy administrative commands
79adb52d92SMatthew Ahrens  *    from /sbin/zfs run faster. The "metadata cache" specifically stores dbufs
80adb52d92SMatthew Ahrens  *    that represent the metadata that describes filesystems/snapshots/
81adb52d92SMatthew Ahrens  *    bookmarks/properties/etc. We only evict from this cache when we export a
82adb52d92SMatthew Ahrens  *    pool, to short-circuit as much I/O as possible for all administrative
83adb52d92SMatthew Ahrens  *    commands that need the metadata. There is no eviction policy for this
84adb52d92SMatthew Ahrens  *    cache, because we try to only include types in it which would occupy a
85adb52d92SMatthew Ahrens  *    very small amount of space per object but create a large impact on the
86adb52d92SMatthew Ahrens  *    performance of these commands. Instead, after it reaches a maximum size
87adb52d92SMatthew Ahrens  *    (which should only happen on very small memory systems with a very large
88adb52d92SMatthew Ahrens  *    number of filesystem objects), we stop taking new dbufs into the
89adb52d92SMatthew Ahrens  *    metadata cache, instead putting them in the normal dbuf cache.
90adb52d92SMatthew Ahrens  *
91adb52d92SMatthew Ahrens  * 2. LRU cache of dbufs. The "dbuf cache" maintains a list of dbufs that
92adb52d92SMatthew Ahrens  *    are not currently held but have been recently released. These dbufs
93adb52d92SMatthew Ahrens  *    are not eligible for arc eviction until they are aged out of the cache.
94adb52d92SMatthew Ahrens  *    Dbufs that are aged out of the cache will be immediately destroyed and
95adb52d92SMatthew Ahrens  *    become eligible for arc eviction.
96adb52d92SMatthew Ahrens  *
97adb52d92SMatthew Ahrens  * Dbufs are added to these caches once the last hold is released. If a dbuf is
98adb52d92SMatthew Ahrens  * later accessed and still exists in the dbuf cache, then it will be removed
99adb52d92SMatthew Ahrens  * from the cache and later re-added to the head of the cache.
100adb52d92SMatthew Ahrens  *
101adb52d92SMatthew Ahrens  * If a given dbuf meets the requirements for the metadata cache, it will go
102adb52d92SMatthew Ahrens  * there, otherwise it will be considered for the generic LRU dbuf cache. The
103adb52d92SMatthew Ahrens  * caches and the refcounts tracking their sizes are stored in an array indexed
104adb52d92SMatthew Ahrens  * by those caches' matching enum values (from dbuf_cached_state_t).
105dcbf3bd6SGeorge Wilson  */
106adb52d92SMatthew Ahrens typedef struct dbuf_cache {
107adb52d92SMatthew Ahrens 	multilist_t *cache;
108e914ace2STim Schumacher 	zfs_refcount_t size;
109adb52d92SMatthew Ahrens } dbuf_cache_t;
110adb52d92SMatthew Ahrens dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
111dcbf3bd6SGeorge Wilson 
112adb52d92SMatthew Ahrens /* Size limits for the caches */
113adb52d92SMatthew Ahrens uint64_t dbuf_cache_max_bytes = 0;
114adb52d92SMatthew Ahrens uint64_t dbuf_metadata_cache_max_bytes = 0;
115adb52d92SMatthew Ahrens /* Set the default sizes of the caches to log2 fraction of arc size */
116268bbb2aSGeorge Wilson int dbuf_cache_shift = 5;
117adb52d92SMatthew Ahrens int dbuf_metadata_cache_shift = 6;
118dcbf3bd6SGeorge Wilson 
119dcbf3bd6SGeorge Wilson /*
120adb52d92SMatthew Ahrens  * For diagnostic purposes, this is incremented whenever we can't add
121adb52d92SMatthew Ahrens  * something to the metadata cache because it's full, and instead put
122adb52d92SMatthew Ahrens  * the data in the regular dbuf cache.
123adb52d92SMatthew Ahrens  */
124adb52d92SMatthew Ahrens uint64_t dbuf_metadata_cache_overflow;
125adb52d92SMatthew Ahrens 
126adb52d92SMatthew Ahrens /*
127adb52d92SMatthew Ahrens  * The LRU dbuf cache uses a three-stage eviction policy:
128dcbf3bd6SGeorge Wilson  *	- A low water marker designates when the dbuf eviction thread
129dcbf3bd6SGeorge Wilson  *	should stop evicting from the dbuf cache.
130dcbf3bd6SGeorge Wilson  *	- When we reach the maximum size (aka mid water mark), we
131dcbf3bd6SGeorge Wilson  *	signal the eviction thread to run.
132dcbf3bd6SGeorge Wilson  *	- The high water mark indicates when the eviction thread
133dcbf3bd6SGeorge Wilson  *	is unable to keep up with the incoming load and eviction must
134dcbf3bd6SGeorge Wilson  *	happen in the context of the calling thread.
135dcbf3bd6SGeorge Wilson  *
136dcbf3bd6SGeorge Wilson  * The dbuf cache:
137dcbf3bd6SGeorge Wilson  *                                                 (max size)
138dcbf3bd6SGeorge Wilson  *                                      low water   mid water   hi water
139dcbf3bd6SGeorge Wilson  * +----------------------------------------+----------+----------+
140dcbf3bd6SGeorge Wilson  * |                                        |          |          |
141dcbf3bd6SGeorge Wilson  * |                                        |          |          |
142dcbf3bd6SGeorge Wilson  * |                                        |          |          |
143dcbf3bd6SGeorge Wilson  * |                                        |          |          |
144dcbf3bd6SGeorge Wilson  * +----------------------------------------+----------+----------+
145dcbf3bd6SGeorge Wilson  *                                        stop        signal     evict
146dcbf3bd6SGeorge Wilson  *                                      evicting     eviction   directly
147dcbf3bd6SGeorge Wilson  *                                                    thread
148dcbf3bd6SGeorge Wilson  *
149dcbf3bd6SGeorge Wilson  * The high and low water marks indicate the operating range for the eviction
150dcbf3bd6SGeorge Wilson  * thread. The low water mark is, by default, 90% of the total size of the
151dcbf3bd6SGeorge Wilson  * cache and the high water mark is at 110% (both of these percentages can be
152dcbf3bd6SGeorge Wilson  * changed by setting dbuf_cache_lowater_pct and dbuf_cache_hiwater_pct,
153dcbf3bd6SGeorge Wilson  * respectively). The eviction thread will try to ensure that the cache remains
154dcbf3bd6SGeorge Wilson  * within this range by waking up every second and checking if the cache is
155dcbf3bd6SGeorge Wilson  * above the low water mark. The thread can also be woken up by callers adding
156dcbf3bd6SGeorge Wilson  * elements into the cache if the cache is larger than the mid water (i.e max
157dcbf3bd6SGeorge Wilson  * cache size). Once the eviction thread is woken up and eviction is required,
158dcbf3bd6SGeorge Wilson  * it will continue evicting buffers until it's able to reduce the cache size
159dcbf3bd6SGeorge Wilson  * to the low water mark. If the cache size continues to grow and hits the high
160eb633035STom Caputi  * water mark, then callers adding elements to the cache will begin to evict
161dcbf3bd6SGeorge Wilson  * directly from the cache until the cache is no longer above the high water
162dcbf3bd6SGeorge Wilson  * mark.
163dcbf3bd6SGeorge Wilson  */
164dcbf3bd6SGeorge Wilson 
165dcbf3bd6SGeorge Wilson /*
166dcbf3bd6SGeorge Wilson  * The percentage above and below the maximum cache size.
167dcbf3bd6SGeorge Wilson  */
168dcbf3bd6SGeorge Wilson uint_t dbuf_cache_hiwater_pct = 10;
169dcbf3bd6SGeorge Wilson uint_t dbuf_cache_lowater_pct = 10;
170dcbf3bd6SGeorge Wilson 
171fa9e4066Sahrens /* ARGSUSED */
172fa9e4066Sahrens static int
dbuf_cons(void * vdb,void * unused,int kmflag)173fa9e4066Sahrens dbuf_cons(void *vdb, void *unused, int kmflag)
174fa9e4066Sahrens {
175fa9e4066Sahrens 	dmu_buf_impl_t *db = vdb;
176fa9e4066Sahrens 	bzero(db, sizeof (dmu_buf_impl_t));
177fa9e4066Sahrens 
178fa9e4066Sahrens 	mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
1799704bf7fSPaul Dagnelie 	rw_init(&db->db_rwlock, NULL, RW_DEFAULT, NULL);
180fa9e4066Sahrens 	cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
181dcbf3bd6SGeorge Wilson 	multilist_link_init(&db->db_cache_link);
182e914ace2STim Schumacher 	zfs_refcount_create(&db->db_holds);
1830f6d88adSAlex Reece 
184fa9e4066Sahrens 	return (0);
185fa9e4066Sahrens }
186fa9e4066Sahrens 
187fa9e4066Sahrens /* ARGSUSED */
188fa9e4066Sahrens static void
dbuf_dest(void * vdb,void * unused)189fa9e4066Sahrens dbuf_dest(void *vdb, void *unused)
190fa9e4066Sahrens {
191fa9e4066Sahrens 	dmu_buf_impl_t *db = vdb;
192fa9e4066Sahrens 	mutex_destroy(&db->db_mtx);
1939704bf7fSPaul Dagnelie 	rw_destroy(&db->db_rwlock);
194fa9e4066Sahrens 	cv_destroy(&db->db_changed);
195dcbf3bd6SGeorge Wilson 	ASSERT(!multilist_link_active(&db->db_cache_link));
196e914ace2STim Schumacher 	zfs_refcount_destroy(&db->db_holds);
197fa9e4066Sahrens }
198fa9e4066Sahrens 
199fa9e4066Sahrens /*
200fa9e4066Sahrens  * dbuf hash table routines
201fa9e4066Sahrens  */
202fa9e4066Sahrens static dbuf_hash_table_t dbuf_hash_table;
203fa9e4066Sahrens 
204fa9e4066Sahrens static uint64_t dbuf_hash_count;
205fa9e4066Sahrens 
2063a2d8a1bSPaul Dagnelie /*
2073a2d8a1bSPaul Dagnelie  * We use Cityhash for this. It's fast, and has good hash properties without
2083a2d8a1bSPaul Dagnelie  * requiring any large static buffers.
2093a2d8a1bSPaul Dagnelie  */
210fa9e4066Sahrens static uint64_t
dbuf_hash(void * os,uint64_t obj,uint8_t lvl,uint64_t blkid)211fa9e4066Sahrens dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
212fa9e4066Sahrens {
2133a2d8a1bSPaul Dagnelie 	return (cityhash4((uintptr_t)os, obj, (uint64_t)lvl, blkid));
214fa9e4066Sahrens }
215fa9e4066Sahrens 
216fa9e4066Sahrens #define	DBUF_EQUAL(dbuf, os, obj, level, blkid)		\
217fa9e4066Sahrens 	((dbuf)->db.db_object == (obj) &&		\
218fa9e4066Sahrens 	(dbuf)->db_objset == (os) &&			\
219fa9e4066Sahrens 	(dbuf)->db_level == (level) &&			\
220fa9e4066Sahrens 	(dbuf)->db_blkid == (blkid))
221fa9e4066Sahrens 
222fa9e4066Sahrens dmu_buf_impl_t *
dbuf_find(objset_t * os,uint64_t obj,uint8_t level,uint64_t blkid)223e57a022bSJustin T. Gibbs dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
224fa9e4066Sahrens {
225fa9e4066Sahrens 	dbuf_hash_table_t *h = &dbuf_hash_table;
226dcbf3bd6SGeorge Wilson 	uint64_t hv = dbuf_hash(os, obj, level, blkid);
227fa9e4066Sahrens 	uint64_t idx = hv & h->hash_table_mask;
228fa9e4066Sahrens 	dmu_buf_impl_t *db;
229fa9e4066Sahrens 
230fa9e4066Sahrens 	mutex_enter(DBUF_HASH_MUTEX(h, idx));
231fa9e4066Sahrens 	for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
232fa9e4066Sahrens 		if (DBUF_EQUAL(db, os, obj, level, blkid)) {
233fa9e4066Sahrens 			mutex_enter(&db->db_mtx);
234ea8dc4b6Seschrock 			if (db->db_state != DB_EVICTING) {
235fa9e4066Sahrens 				mutex_exit(DBUF_HASH_MUTEX(h, idx));
236fa9e4066Sahrens 				return (db);
237fa9e4066Sahrens 			}
238fa9e4066Sahrens 			mutex_exit(&db->db_mtx);
239fa9e4066Sahrens 		}
240fa9e4066Sahrens 	}
241fa9e4066Sahrens 	mutex_exit(DBUF_HASH_MUTEX(h, idx));
242fa9e4066Sahrens 	return (NULL);
243fa9e4066Sahrens }
244fa9e4066Sahrens 
245e57a022bSJustin T. Gibbs static dmu_buf_impl_t *
dbuf_find_bonus(objset_t * os,uint64_t object)246e57a022bSJustin T. Gibbs dbuf_find_bonus(objset_t *os, uint64_t object)
247e57a022bSJustin T. Gibbs {
248e57a022bSJustin T. Gibbs 	dnode_t *dn;
249e57a022bSJustin T. Gibbs 	dmu_buf_impl_t *db = NULL;
250e57a022bSJustin T. Gibbs 
251e57a022bSJustin T. Gibbs 	if (dnode_hold(os, object, FTAG, &dn) == 0) {
252e57a022bSJustin T. Gibbs 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
253e57a022bSJustin T. Gibbs 		if (dn->dn_bonus != NULL) {
254e57a022bSJustin T. Gibbs 			db = dn->dn_bonus;
255e57a022bSJustin T. Gibbs 			mutex_enter(&db->db_mtx);
256e57a022bSJustin T. Gibbs 		}
257e57a022bSJustin T. Gibbs 		rw_exit(&dn->dn_struct_rwlock);
258e57a022bSJustin T. Gibbs 		dnode_rele(dn, FTAG);
259e57a022bSJustin T. Gibbs 	}
260e57a022bSJustin T. Gibbs 	return (db);
261e57a022bSJustin T. Gibbs }
262e57a022bSJustin T. Gibbs 
263fa9e4066Sahrens /*
264fa9e4066Sahrens  * Insert an entry into the hash table.  If there is already an element
265fa9e4066Sahrens  * equal to elem in the hash table, then the already existing element
266fa9e4066Sahrens  * will be returned and the new element will not be inserted.
267fa9e4066Sahrens  * Otherwise returns NULL.
268fa9e4066Sahrens  */
269fa9e4066Sahrens static dmu_buf_impl_t *
dbuf_hash_insert(dmu_buf_impl_t * db)270fa9e4066Sahrens dbuf_hash_insert(dmu_buf_impl_t *db)
271fa9e4066Sahrens {
272fa9e4066Sahrens 	dbuf_hash_table_t *h = &dbuf_hash_table;
273503ad85cSMatthew Ahrens 	objset_t *os = db->db_objset;
274fa9e4066Sahrens 	uint64_t obj = db->db.db_object;
275fa9e4066Sahrens 	int level = db->db_level;
276fa9e4066Sahrens 	uint64_t blkid = db->db_blkid;
277dcbf3bd6SGeorge Wilson 	uint64_t hv = dbuf_hash(os, obj, level, blkid);
278fa9e4066Sahrens 	uint64_t idx = hv & h->hash_table_mask;
279fa9e4066Sahrens 	dmu_buf_impl_t *dbf;
280fa9e4066Sahrens 
281fa9e4066Sahrens 	mutex_enter(DBUF_HASH_MUTEX(h, idx));
282fa9e4066Sahrens 	for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
283fa9e4066Sahrens 		if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
284fa9e4066Sahrens 			mutex_enter(&dbf->db_mtx);
285ea8dc4b6Seschrock 			if (dbf->db_state != DB_EVICTING) {
286fa9e4066Sahrens 				mutex_exit(DBUF_HASH_MUTEX(h, idx));
287fa9e4066Sahrens 				return (dbf);
288fa9e4066Sahrens 			}
289fa9e4066Sahrens 			mutex_exit(&dbf->db_mtx);
290fa9e4066Sahrens 		}
291fa9e4066Sahrens 	}
292fa9e4066Sahrens 
293fa9e4066Sahrens 	mutex_enter(&db->db_mtx);
294fa9e4066Sahrens 	db->db_hash_next = h->hash_table[idx];
295fa9e4066Sahrens 	h->hash_table[idx] = db;
296fa9e4066Sahrens 	mutex_exit(DBUF_HASH_MUTEX(h, idx));
2971a5e258fSJosef 'Jeff' Sipek 	atomic_inc_64(&dbuf_hash_count);
298fa9e4066Sahrens 
299fa9e4066Sahrens 	return (NULL);
300fa9e4066Sahrens }
301fa9e4066Sahrens 
302fa9e4066Sahrens /*
303bbfa8ea8SMatthew Ahrens  * Remove an entry from the hash table.  It must be in the EVICTING state.
304fa9e4066Sahrens  */
305fa9e4066Sahrens static void
dbuf_hash_remove(dmu_buf_impl_t * db)306fa9e4066Sahrens dbuf_hash_remove(dmu_buf_impl_t *db)
307fa9e4066Sahrens {
308fa9e4066Sahrens 	dbuf_hash_table_t *h = &dbuf_hash_table;
309dcbf3bd6SGeorge Wilson 	uint64_t hv = dbuf_hash(db->db_objset, db->db.db_object,
310fa9e4066Sahrens 	    db->db_level, db->db_blkid);
311fa9e4066Sahrens 	uint64_t idx = hv & h->hash_table_mask;
312fa9e4066Sahrens 	dmu_buf_impl_t *dbf, **dbp;
313fa9e4066Sahrens 
314fa9e4066Sahrens 	/*
315eb633035STom Caputi 	 * We mustn't hold db_mtx to maintain lock ordering:
316fa9e4066Sahrens 	 * DBUF_HASH_MUTEX > db_mtx.
317fa9e4066Sahrens 	 */
318e914ace2STim Schumacher 	ASSERT(zfs_refcount_is_zero(&db->db_holds));
319ea8dc4b6Seschrock 	ASSERT(db->db_state == DB_EVICTING);
320fa9e4066Sahrens 	ASSERT(!MUTEX_HELD(&db->db_mtx));
321fa9e4066Sahrens 
322fa9e4066Sahrens 	mutex_enter(DBUF_HASH_MUTEX(h, idx));
323fa9e4066Sahrens 	dbp = &h->hash_table[idx];
324fa9e4066Sahrens 	while ((dbf = *dbp) != db) {
325fa9e4066Sahrens 		dbp = &dbf->db_hash_next;
326fa9e4066Sahrens 		ASSERT(dbf != NULL);
327fa9e4066Sahrens 	}
328fa9e4066Sahrens 	*dbp = db->db_hash_next;
329fa9e4066Sahrens 	db->db_hash_next = NULL;
330fa9e4066Sahrens 	mutex_exit(DBUF_HASH_MUTEX(h, idx));
3311a5e258fSJosef 'Jeff' Sipek 	atomic_dec_64(&dbuf_hash_count);
332fa9e4066Sahrens }
333fa9e4066Sahrens 
334bc9014e6SJustin Gibbs typedef enum {
335bc9014e6SJustin Gibbs 	DBVU_EVICTING,
336bc9014e6SJustin Gibbs 	DBVU_NOT_EVICTING
337bc9014e6SJustin Gibbs } dbvu_verify_type_t;
338bc9014e6SJustin Gibbs 
339bc9014e6SJustin Gibbs static void
dbuf_verify_user(dmu_buf_impl_t * db,dbvu_verify_type_t verify_type)340bc9014e6SJustin Gibbs dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
341bc9014e6SJustin Gibbs {
342bc9014e6SJustin Gibbs #ifdef ZFS_DEBUG
343bc9014e6SJustin Gibbs 	int64_t holds;
344bc9014e6SJustin Gibbs 
345bc9014e6SJustin Gibbs 	if (db->db_user == NULL)
346bc9014e6SJustin Gibbs 		return;
347bc9014e6SJustin Gibbs 
348bc9014e6SJustin Gibbs 	/* Only data blocks support the attachment of user data. */
349bc9014e6SJustin Gibbs 	ASSERT(db->db_level == 0);
350bc9014e6SJustin Gibbs 
351bc9014e6SJustin Gibbs 	/* Clients must resolve a dbuf before attaching user data. */
352bc9014e6SJustin Gibbs 	ASSERT(db->db.db_data != NULL);
353bc9014e6SJustin Gibbs 	ASSERT3U(db->db_state, ==, DB_CACHED);
354bc9014e6SJustin Gibbs 
355e914ace2STim Schumacher 	holds = zfs_refcount_count(&db->db_holds);
356bc9014e6SJustin Gibbs 	if (verify_type == DBVU_EVICTING) {
357bc9014e6SJustin Gibbs 		/*
358bc9014e6SJustin Gibbs 		 * Immediate eviction occurs when holds == dirtycnt.
359bc9014e6SJustin Gibbs 		 * For normal eviction buffers, holds is zero on
360bc9014e6SJustin Gibbs 		 * eviction, except when dbuf_fix_old_data() calls
361bc9014e6SJustin Gibbs 		 * dbuf_clear_data().  However, the hold count can grow
362bc9014e6SJustin Gibbs 		 * during eviction even though db_mtx is held (see
363bc9014e6SJustin Gibbs 		 * dmu_bonus_hold() for an example), so we can only
364bc9014e6SJustin Gibbs 		 * test the generic invariant that holds >= dirtycnt.
365bc9014e6SJustin Gibbs 		 */
366bc9014e6SJustin Gibbs 		ASSERT3U(holds, >=, db->db_dirtycnt);
367bc9014e6SJustin Gibbs 	} else {
368d2058105SJustin T. Gibbs 		if (db->db_user_immediate_evict == TRUE)
369bc9014e6SJustin Gibbs 			ASSERT3U(holds, >=, db->db_dirtycnt);
370bc9014e6SJustin Gibbs 		else
371bc9014e6SJustin Gibbs 			ASSERT3U(holds, >, 0);
372bc9014e6SJustin Gibbs 	}
373bc9014e6SJustin Gibbs #endif
374bc9014e6SJustin Gibbs }
375bc9014e6SJustin Gibbs 
376fa9e4066Sahrens static void
dbuf_evict_user(dmu_buf_impl_t * db)377fa9e4066Sahrens dbuf_evict_user(dmu_buf_impl_t *db)
378fa9e4066Sahrens {
379bc9014e6SJustin Gibbs 	dmu_buf_user_t *dbu = db->db_user;
380bc9014e6SJustin Gibbs 
381fa9e4066Sahrens 	ASSERT(MUTEX_HELD(&db->db_mtx));
382fa9e4066Sahrens 
383bc9014e6SJustin Gibbs 	if (dbu == NULL)
384fa9e4066Sahrens 		return;
385fa9e4066Sahrens 
386bc9014e6SJustin Gibbs 	dbuf_verify_user(db, DBVU_EVICTING);
387bc9014e6SJustin Gibbs 	db->db_user = NULL;
388bc9014e6SJustin Gibbs 
389bc9014e6SJustin Gibbs #ifdef ZFS_DEBUG
390bc9014e6SJustin Gibbs 	if (dbu->dbu_clear_on_evict_dbufp != NULL)
391bc9014e6SJustin Gibbs 		*dbu->dbu_clear_on_evict_dbufp = NULL;
392bc9014e6SJustin Gibbs #endif
393bc9014e6SJustin Gibbs 
394bc9014e6SJustin Gibbs 	/*
39540510e8eSJosef 'Jeff' Sipek 	 * There are two eviction callbacks - one that we call synchronously
39640510e8eSJosef 'Jeff' Sipek 	 * and one that we invoke via a taskq.  The async one is useful for
39740510e8eSJosef 'Jeff' Sipek 	 * avoiding lock order reversals and limiting stack depth.
39840510e8eSJosef 'Jeff' Sipek 	 *
39940510e8eSJosef 'Jeff' Sipek 	 * Note that if we have a sync callback but no async callback,
40040510e8eSJosef 'Jeff' Sipek 	 * it's likely that the sync callback will free the structure
40140510e8eSJosef 'Jeff' Sipek 	 * containing the dbu.  In that case we need to take care to not
40240510e8eSJosef 'Jeff' Sipek 	 * dereference dbu after calling the sync evict func.
403bc9014e6SJustin Gibbs 	 */
40440510e8eSJosef 'Jeff' Sipek 	boolean_t has_async = (dbu->dbu_evict_func_async != NULL);
40540510e8eSJosef 'Jeff' Sipek 
40640510e8eSJosef 'Jeff' Sipek 	if (dbu->dbu_evict_func_sync != NULL)
40740510e8eSJosef 'Jeff' Sipek 		dbu->dbu_evict_func_sync(dbu);
40840510e8eSJosef 'Jeff' Sipek 
40940510e8eSJosef 'Jeff' Sipek 	if (has_async) {
41040510e8eSJosef 'Jeff' Sipek 		taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func_async,
41140510e8eSJosef 'Jeff' Sipek 		    dbu, 0, &dbu->dbu_tqent);
41240510e8eSJosef 'Jeff' Sipek 	}
413fa9e4066Sahrens }
414fa9e4066Sahrens 
415744947dcSTom Erickson boolean_t
dbuf_is_metadata(dmu_buf_impl_t * db)416744947dcSTom Erickson dbuf_is_metadata(dmu_buf_impl_t *db)
417744947dcSTom Erickson {
418eb633035STom Caputi 	if (db->db_level > 0 || db->db_blkid == DMU_SPILL_BLKID) {
419744947dcSTom Erickson 		return (B_TRUE);
420744947dcSTom Erickson 	} else {
421744947dcSTom Erickson 		boolean_t is_metadata;
422744947dcSTom Erickson 
423744947dcSTom Erickson 		DB_DNODE_ENTER(db);
424ad135b5dSChristopher Siden 		is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
425744947dcSTom Erickson 		DB_DNODE_EXIT(db);
426744947dcSTom Erickson 
427744947dcSTom Erickson 		return (is_metadata);
428744947dcSTom Erickson 	}
429744947dcSTom Erickson }
430744947dcSTom Erickson 
431adb52d92SMatthew Ahrens /*
432adb52d92SMatthew Ahrens  * This returns whether this dbuf should be stored in the metadata cache, which
433adb52d92SMatthew Ahrens  * is based on whether it's from one of the dnode types that store data related
434adb52d92SMatthew Ahrens  * to traversing dataset hierarchies.
435adb52d92SMatthew Ahrens  */
436adb52d92SMatthew Ahrens static boolean_t
dbuf_include_in_metadata_cache(dmu_buf_impl_t * db)437adb52d92SMatthew Ahrens dbuf_include_in_metadata_cache(dmu_buf_impl_t *db)
438adb52d92SMatthew Ahrens {
439adb52d92SMatthew Ahrens 	DB_DNODE_ENTER(db);
440adb52d92SMatthew Ahrens 	dmu_object_type_t type = DB_DNODE(db)->dn_type;
441adb52d92SMatthew Ahrens 	DB_DNODE_EXIT(db);
442adb52d92SMatthew Ahrens 
443adb52d92SMatthew Ahrens 	/* Check if this dbuf is one of the types we care about */
444adb52d92SMatthew Ahrens 	if (DMU_OT_IS_METADATA_CACHED(type)) {
445adb52d92SMatthew Ahrens 		/* If we hit this, then we set something up wrong in dmu_ot */
446adb52d92SMatthew Ahrens 		ASSERT(DMU_OT_IS_METADATA(type));
447adb52d92SMatthew Ahrens 
448adb52d92SMatthew Ahrens 		/*
449adb52d92SMatthew Ahrens 		 * Sanity check for small-memory systems: don't allocate too
450adb52d92SMatthew Ahrens 		 * much memory for this purpose.
451adb52d92SMatthew Ahrens 		 */
452e914ace2STim Schumacher 		if (zfs_refcount_count(
453e914ace2STim Schumacher 		    &dbuf_caches[DB_DBUF_METADATA_CACHE].size) >
454adb52d92SMatthew Ahrens 		    dbuf_metadata_cache_max_bytes) {
455adb52d92SMatthew Ahrens 			dbuf_metadata_cache_overflow++;
456adb52d92SMatthew Ahrens 			DTRACE_PROBE1(dbuf__metadata__cache__overflow,
457adb52d92SMatthew Ahrens 			    dmu_buf_impl_t *, db);
458adb52d92SMatthew Ahrens 			return (B_FALSE);
459adb52d92SMatthew Ahrens 		}
460adb52d92SMatthew Ahrens 
461adb52d92SMatthew Ahrens 		return (B_TRUE);
462adb52d92SMatthew Ahrens 	}
463adb52d92SMatthew Ahrens 
464adb52d92SMatthew Ahrens 	return (B_FALSE);
465adb52d92SMatthew Ahrens }
466adb52d92SMatthew Ahrens 
467dcbf3bd6SGeorge Wilson /*
468dcbf3bd6SGeorge Wilson  * This function *must* return indices evenly distributed between all
469dcbf3bd6SGeorge Wilson  * sublists of the multilist. This is needed due to how the dbuf eviction
470dcbf3bd6SGeorge Wilson  * code is laid out; dbuf_evict_thread() assumes dbufs are evenly
471dcbf3bd6SGeorge Wilson  * distributed between all sublists and uses this assumption when
472dcbf3bd6SGeorge Wilson  * deciding which sublist to evict from and how much to evict from it.
473dcbf3bd6SGeorge Wilson  */
474dcbf3bd6SGeorge Wilson unsigned int
dbuf_cache_multilist_index_func(multilist_t * ml,void * obj)475dcbf3bd6SGeorge Wilson dbuf_cache_multilist_index_func(multilist_t *ml, void *obj)
476ea8dc4b6Seschrock {
477dcbf3bd6SGeorge Wilson 	dmu_buf_impl_t *db = obj;
478dcbf3bd6SGeorge Wilson 
479dcbf3bd6SGeorge Wilson 	/*
480dcbf3bd6SGeorge Wilson 	 * The assumption here, is the hash value for a given
481dcbf3bd6SGeorge Wilson 	 * dmu_buf_impl_t will remain constant throughout it's lifetime
482dcbf3bd6SGeorge Wilson 	 * (i.e. it's objset, object, level and blkid fields don't change).
483dcbf3bd6SGeorge Wilson 	 * Thus, we don't need to store the dbuf's sublist index
484dcbf3bd6SGeorge Wilson 	 * on insertion, as this index can be recalculated on removal.
485dcbf3bd6SGeorge Wilson 	 *
486dcbf3bd6SGeorge Wilson 	 * Also, the low order bits of the hash value are thought to be
487dcbf3bd6SGeorge Wilson 	 * distributed evenly. Otherwise, in the case that the multilist
488dcbf3bd6SGeorge Wilson 	 * has a power of two number of sublists, each sublists' usage
489dcbf3bd6SGeorge Wilson 	 * would not be evenly distributed.
490dcbf3bd6SGeorge Wilson 	 */
491dcbf3bd6SGeorge Wilson 	return (dbuf_hash(db->db_objset, db->db.db_object,
492dcbf3bd6SGeorge Wilson 	    db->db_level, db->db_blkid) %
493dcbf3bd6SGeorge Wilson 	    multilist_get_num_sublists(ml));
494dcbf3bd6SGeorge Wilson }
495dcbf3bd6SGeorge Wilson 
496dcbf3bd6SGeorge Wilson static inline boolean_t
dbuf_cache_above_hiwater(void)497dcbf3bd6SGeorge Wilson dbuf_cache_above_hiwater(void)
498dcbf3bd6SGeorge Wilson {
499dcbf3bd6SGeorge Wilson 	uint64_t dbuf_cache_hiwater_bytes =
500dcbf3bd6SGeorge Wilson 	    (dbuf_cache_max_bytes * dbuf_cache_hiwater_pct) / 100;
501dcbf3bd6SGeorge Wilson 
502e914ace2STim Schumacher 	return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
503dcbf3bd6SGeorge Wilson 	    dbuf_cache_max_bytes + dbuf_cache_hiwater_bytes);
504dcbf3bd6SGeorge Wilson }
505dcbf3bd6SGeorge Wilson 
506dcbf3bd6SGeorge Wilson static inline boolean_t
dbuf_cache_above_lowater(void)507dcbf3bd6SGeorge Wilson dbuf_cache_above_lowater(void)
508dcbf3bd6SGeorge Wilson {
509dcbf3bd6SGeorge Wilson 	uint64_t dbuf_cache_lowater_bytes =
510dcbf3bd6SGeorge Wilson 	    (dbuf_cache_max_bytes * dbuf_cache_lowater_pct) / 100;
511dcbf3bd6SGeorge Wilson 
512e914ace2STim Schumacher 	return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
513dcbf3bd6SGeorge Wilson 	    dbuf_cache_max_bytes - dbuf_cache_lowater_bytes);
514dcbf3bd6SGeorge Wilson }
515dcbf3bd6SGeorge Wilson 
516dcbf3bd6SGeorge Wilson /*
517dcbf3bd6SGeorge Wilson  * Evict the oldest eligible dbuf from the dbuf cache.
518dcbf3bd6SGeorge Wilson  */
519dcbf3bd6SGeorge Wilson static void
dbuf_evict_one(void)520dcbf3bd6SGeorge Wilson dbuf_evict_one(void)
521dcbf3bd6SGeorge Wilson {
522adb52d92SMatthew Ahrens 	int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
523adb52d92SMatthew Ahrens 	multilist_sublist_t *mls = multilist_sublist_lock(
524adb52d92SMatthew Ahrens 	    dbuf_caches[DB_DBUF_CACHE].cache, idx);
525dcbf3bd6SGeorge Wilson 
526dcbf3bd6SGeorge Wilson 	ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
527dcbf3bd6SGeorge Wilson 
528dcbf3bd6SGeorge Wilson 	dmu_buf_impl_t *db = multilist_sublist_tail(mls);
529dcbf3bd6SGeorge Wilson 	while (db != NULL && mutex_tryenter(&db->db_mtx) == 0) {
530dcbf3bd6SGeorge Wilson 		db = multilist_sublist_prev(mls, db);
531dcbf3bd6SGeorge Wilson 	}
532dcbf3bd6SGeorge Wilson 
533dcbf3bd6SGeorge Wilson 	DTRACE_PROBE2(dbuf__evict__one, dmu_buf_impl_t *, db,
534dcbf3bd6SGeorge Wilson 	    multilist_sublist_t *, mls);
535dcbf3bd6SGeorge Wilson 
536dcbf3bd6SGeorge Wilson 	if (db != NULL) {
537dcbf3bd6SGeorge Wilson 		multilist_sublist_remove(mls, db);
538dcbf3bd6SGeorge Wilson 		multilist_sublist_unlock(mls);
539e914ace2STim Schumacher 		(void) zfs_refcount_remove_many(
540e914ace2STim Schumacher 		    &dbuf_caches[DB_DBUF_CACHE].size,
541dcbf3bd6SGeorge Wilson 		    db->db.db_size, db);
542adb52d92SMatthew Ahrens 		ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
543adb52d92SMatthew Ahrens 		db->db_caching_status = DB_NO_CACHE;
544dcbf3bd6SGeorge Wilson 		dbuf_destroy(db);
545dcbf3bd6SGeorge Wilson 	} else {
546dcbf3bd6SGeorge Wilson 		multilist_sublist_unlock(mls);
547dcbf3bd6SGeorge Wilson 	}
548dcbf3bd6SGeorge Wilson }
549dcbf3bd6SGeorge Wilson 
550dcbf3bd6SGeorge Wilson /*
551dcbf3bd6SGeorge Wilson  * The dbuf evict thread is responsible for aging out dbufs from the
552dcbf3bd6SGeorge Wilson  * cache. Once the cache has reached it's maximum size, dbufs are removed
553dcbf3bd6SGeorge Wilson  * and destroyed. The eviction thread will continue running until the size
554dcbf3bd6SGeorge Wilson  * of the dbuf cache is at or below the maximum size. Once the dbuf is aged
555dcbf3bd6SGeorge Wilson  * out of the cache it is destroyed and becomes eligible for arc eviction.
556dcbf3bd6SGeorge Wilson  */
5573f7978d0SAlan Somers /* ARGSUSED */
558dcbf3bd6SGeorge Wilson static void
dbuf_evict_thread(void * unused)5593f7978d0SAlan Somers dbuf_evict_thread(void *unused)
560dcbf3bd6SGeorge Wilson {
561dcbf3bd6SGeorge Wilson 	callb_cpr_t cpr;
562dcbf3bd6SGeorge Wilson 
563dcbf3bd6SGeorge Wilson 	CALLB_CPR_INIT(&cpr, &dbuf_evict_lock, callb_generic_cpr, FTAG);
564dcbf3bd6SGeorge Wilson 
565dcbf3bd6SGeorge Wilson 	mutex_enter(&dbuf_evict_lock);
566dcbf3bd6SGeorge Wilson 	while (!dbuf_evict_thread_exit) {
567dcbf3bd6SGeorge Wilson 		while (!dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
568dcbf3bd6SGeorge Wilson 			CALLB_CPR_SAFE_BEGIN(&cpr);
569dcbf3bd6SGeorge Wilson 			(void) cv_timedwait_hires(&dbuf_evict_cv,
570dcbf3bd6SGeorge Wilson 			    &dbuf_evict_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
571dcbf3bd6SGeorge Wilson 			CALLB_CPR_SAFE_END(&cpr, &dbuf_evict_lock);
572dcbf3bd6SGeorge Wilson 		}
573dcbf3bd6SGeorge Wilson 		mutex_exit(&dbuf_evict_lock);
574dcbf3bd6SGeorge Wilson 
575dcbf3bd6SGeorge Wilson 		/*
576dcbf3bd6SGeorge Wilson 		 * Keep evicting as long as we're above the low water mark
577dcbf3bd6SGeorge Wilson 		 * for the cache. We do this without holding the locks to
578dcbf3bd6SGeorge Wilson 		 * minimize lock contention.
579dcbf3bd6SGeorge Wilson 		 */
580dcbf3bd6SGeorge Wilson 		while (dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
581dcbf3bd6SGeorge Wilson 			dbuf_evict_one();
582dcbf3bd6SGeorge Wilson 		}
583dcbf3bd6SGeorge Wilson 
584dcbf3bd6SGeorge Wilson 		mutex_enter(&dbuf_evict_lock);
585dcbf3bd6SGeorge Wilson 	}
586ea8dc4b6Seschrock 
587dcbf3bd6SGeorge Wilson 	dbuf_evict_thread_exit = B_FALSE;
588dcbf3bd6SGeorge Wilson 	cv_broadcast(&dbuf_evict_cv);
589dcbf3bd6SGeorge Wilson 	CALLB_CPR_EXIT(&cpr);	/* drops dbuf_evict_lock */
590dcbf3bd6SGeorge Wilson 	thread_exit();
591dcbf3bd6SGeorge Wilson }
592dcbf3bd6SGeorge Wilson 
593dcbf3bd6SGeorge Wilson /*
594dcbf3bd6SGeorge Wilson  * Wake up the dbuf eviction thread if the dbuf cache is at its max size.
595dcbf3bd6SGeorge Wilson  * If the dbuf cache is at its high water mark, then evict a dbuf from the
596dcbf3bd6SGeorge Wilson  * dbuf cache using the callers context.
597dcbf3bd6SGeorge Wilson  */
598dcbf3bd6SGeorge Wilson static void
dbuf_evict_notify(void)599dcbf3bd6SGeorge Wilson dbuf_evict_notify(void)
600dcbf3bd6SGeorge Wilson {
601dbfd9f93SMatthew Ahrens 	/*
602dbfd9f93SMatthew Ahrens 	 * We check if we should evict without holding the dbuf_evict_lock,
603dbfd9f93SMatthew Ahrens 	 * because it's OK to occasionally make the wrong decision here,
604dbfd9f93SMatthew Ahrens 	 * and grabbing the lock results in massive lock contention.
605dbfd9f93SMatthew Ahrens 	 */
606e914ace2STim Schumacher 	if (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
607adb52d92SMatthew Ahrens 	    dbuf_cache_max_bytes) {
608dbfd9f93SMatthew Ahrens 		if (dbuf_cache_above_hiwater())
609dcbf3bd6SGeorge Wilson 			dbuf_evict_one();
610dbfd9f93SMatthew Ahrens 		cv_signal(&dbuf_evict_cv);
611dcbf3bd6SGeorge Wilson 	}
612ea8dc4b6Seschrock }
613ea8dc4b6Seschrock 
614fa9e4066Sahrens void
dbuf_init(void)615fa9e4066Sahrens dbuf_init(void)
616fa9e4066Sahrens {
617ea8dc4b6Seschrock 	uint64_t hsize = 1ULL << 16;
618fa9e4066Sahrens 	dbuf_hash_table_t *h = &dbuf_hash_table;
619fa9e4066Sahrens 	int i;
620fa9e4066Sahrens 
621fa9e4066Sahrens 	/*
622fa9e4066Sahrens 	 * The hash table is big enough to fill all of physical memory
623ea8dc4b6Seschrock 	 * with an average 4K block size.  The table will take up
624ea8dc4b6Seschrock 	 * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers).
625fa9e4066Sahrens 	 */
626ea8dc4b6Seschrock 	while (hsize * 4096 < physmem * PAGESIZE)
627fa9e4066Sahrens 		hsize <<= 1;
628fa9e4066Sahrens 
629ea8dc4b6Seschrock retry:
630fa9e4066Sahrens 	h->hash_table_mask = hsize - 1;
631ea8dc4b6Seschrock 	h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
632ea8dc4b6Seschrock 	if (h->hash_table == NULL) {
633ea8dc4b6Seschrock 		/* XXX - we should really return an error instead of assert */
634ea8dc4b6Seschrock 		ASSERT(hsize > (1ULL << 10));
635ea8dc4b6Seschrock 		hsize >>= 1;
636ea8dc4b6Seschrock 		goto retry;
637ea8dc4b6Seschrock 	}
638fa9e4066Sahrens 
639dcbf3bd6SGeorge Wilson 	dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t",
640fa9e4066Sahrens 	    sizeof (dmu_buf_impl_t),
641fa9e4066Sahrens 	    0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
642fa9e4066Sahrens 
643fa9e4066Sahrens 	for (i = 0; i < DBUF_MUTEXES; i++)
644fa9e4066Sahrens 		mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
645bc9014e6SJustin Gibbs 
646dcbf3bd6SGeorge Wilson 	/*
647adb52d92SMatthew Ahrens 	 * Setup the parameters for the dbuf caches. We set the sizes of the
648adb52d92SMatthew Ahrens 	 * dbuf cache and the metadata cache to 1/32nd and 1/16th (default)
649adb52d92SMatthew Ahrens 	 * of the size of the ARC, respectively. If the values are set in
650adb52d92SMatthew Ahrens 	 * /etc/system and they're not greater than the size of the ARC, then
651adb52d92SMatthew Ahrens 	 * we honor that value.
652dcbf3bd6SGeorge Wilson 	 */
653