1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
211c17160Kevin Crowe
22fa9e406ahrens/*
2306e0070Mark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2494c2d0eMatthew Ahrens * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25aad0257Saso Kiselkov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26f67950bNasf-Fan * Copyright 2019 Joyent, Inc.
27bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2812380e1Arne Jansen * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
29c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
301c17160Kevin Crowe * Copyright 2017 Nexenta Systems, Inc.
31fa9e406ahrens */
32fa9e406ahrens
3355da60bMark J Musante/* Portions Copyright 2010 Robert Milkowski */
3455da60bMark J Musante
35f67950bNasf-Fan#include <sys/zfeature.h>
36ecd6cf8marks#include <sys/cred.h>
37fa9e406ahrens#include <sys/zfs_context.h>
38fa9e406ahrens#include <sys/dmu_objset.h>
39fa9e406ahrens#include <sys/dsl_dir.h>
40fa9e406ahrens#include <sys/dsl_dataset.h>
41fa9e406ahrens#include <sys/dsl_prop.h>
42fa9e406ahrens#include <sys/dsl_pool.h>
431d452cfahrens#include <sys/dsl_synctask.h>
44ecd6cf8marks#include <sys/dsl_deleg.h>
45fa9e406ahrens#include <sys/dnode.h>
46fa9e406ahrens#include <sys/dbuf.h>
47a2eea2eahrens#include <sys/zvol.h>
48fa9e406ahrens#include <sys/dmu_tx.h>
49fa9e406ahrens#include <sys/zap.h>
50fa9e406ahrens#include <sys/zil.h>
51fa9e406ahrens#include <sys/dmu_impl.h>
52ecd6cf8marks#include <sys/zfs_ioctl.h>
530a586ceMark Shellenbaum#include <sys/sa.h>
5499d5e17Tim Haley#include <sys/zfs_onexit.h>
553b2aab1Matthew Ahrens#include <sys/dsl_destroy.h>
5612380e1Arne Jansen#include <sys/vdev.h>
575cabbc6Prashanth Sreenivasa#include <sys/zfeature.h>
58f67950bNasf-Fan#include <sys/spa_impl.h>
59eb63303Tom Caputi#include <sys/dmu_recv.h>
60f67950bNasf-Fan#include <sys/zfs_project.h>
615ac95daSerapheim Dimitropoulos#include "zfs_namecheck.h"
62fa9e406ahrens
63744947dTom Erickson/*
64744947dTom Erickson * Needed to close a window in dnode_move() that allows the objset to be freed
65744947dTom Erickson * before it can be safely accessed.
66744947dTom Erickson */
67744947dTom Ericksonkrwlock_t os_lock;
68744947dTom Erickson
6912380e1Arne Jansen/*
7012380e1Arne Jansen * Tunable to overwrite the maximum number of threads for the parallization
7112380e1Arne Jansen * of dmu_objset_find_dp, needed to speed up the import of pools with many
7212380e1Arne Jansen * datasets.
7312380e1Arne Jansen * Default is 4 times the number of leaf vdevs.
7412380e1Arne Jansen */
7512380e1Arne Jansenint dmu_find_threads = 0;
7612380e1Arne Jansen
77af346dfNed Bass/*
78af346dfNed Bass * Backfill lower metadnode objects after this many have been freed.
79af346dfNed Bass * Backfilling negatively impacts object creation rates, so only do it
80af346dfNed Bass * if there are enough holes to fill.
81af346dfNed Bass */
82af346dfNed Bassint dmu_rescan_dnode_threshold = 131072;
83af346dfNed Bass
8412380e1Arne Jansenstatic void dmu_objset_find_dp_cb(void *arg);
8512380e1Arne Jansen
86f67950bNasf-Fanstatic void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
87f67950bNasf-Fanstatic void dmu_objset_upgrade_stop(objset_t *os);
88f67950bNasf-Fan
89744947dTom Ericksonvoid
90744947dTom Ericksondmu_objset_init(void)
91744947dTom Erickson{
92744947dTom Erickson	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
93744947dTom Erickson}
94744947dTom Erickson
95744947dTom Ericksonvoid
96744947dTom Ericksondmu_objset_fini(void)
97744947dTom Erickson{
98744947dTom Erickson	rw_destroy(&os_lock);
99744947dTom Erickson}
100744947dTom Erickson
101fa9e406ahrensspa_t *
102fa9e406ahrensdmu_objset_spa(objset_t *os)
103fa9e406ahrens{
104503ad85Matthew Ahrens	return (os->os_spa);
105fa9e406ahrens}
106fa9e406ahrens
107fa9e406ahrenszilog_t *
108fa9e406ahrensdmu_objset_zil(objset_t *os)
109fa9e406ahrens{
110503ad85Matthew Ahrens	return (os->os_zil);
111fa9e406ahrens}
112fa9e406ahrens
113fa9e406ahrensdsl_pool_t *
114fa9e406ahrensdmu_objset_pool(objset_t *os)
115fa9e406ahrens{
116fa9e406ahrens	dsl_dataset_t *ds;
117fa9e406ahrens
118503ad85Matthew Ahrens	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
119fa9e406ahrens		return (ds->ds_dir->dd_pool);
120fa9e406ahrens	else
121503ad85Matthew Ahrens		return (spa_get_dsl(os->os_spa));
122fa9e406ahrens}
123fa9e406ahrens
124fa9e406ahrensdsl_dataset_t *
125fa9e406ahrensdmu_objset_ds(objset_t *os)
126fa9e406ahrens{
127503ad85Matthew Ahrens	return (os->os_dsl_dataset);
128fa9e406ahrens}
129fa9e406ahrens
130fa9e406ahrensdmu_objset_type_t
131fa9e406ahrensdmu_objset_type(objset_t *os)
132fa9e406ahrens{
133503ad85Matthew Ahrens	return (os->os_phys->os_type);
134fa9e406ahrens}
135fa9e406ahrens
136fa9e406ahrensvoid
137fa9e406ahrensdmu_objset_name(objset_t *os, char *buf)
138fa9e406ahrens{
139503ad85Matthew Ahrens	dsl_dataset_name(os->os_dsl_dataset, buf);
140fa9e406ahrens}
141fa9e406ahrens
142fa9e406ahrensuint64_t
143fa9e406ahrensdmu_objset_id(objset_t *os)
144fa9e406ahrens{
145503ad85Matthew Ahrens	dsl_dataset_t *ds = os->os_dsl_dataset;
146fa9e406ahrens
147fa9e406ahrens	return (ds ? ds->ds_object : 0);
148fa9e406ahrens}
149fa9e406ahrens
15054811daToomas Soomeuint64_t
15154811daToomas Soomedmu_objset_dnodesize(objset_t *os)
15254811daToomas Soome{
15354811daToomas Soome	return (os->os_dnodesize);
15454811daToomas Soome}
15554811daToomas Soome
156edf345eMatthew Ahrenszfs_sync_type_t
15755da60bMark J Musantedmu_objset_syncprop(objset_t *os)
15855da60bMark J Musante{
15955da60bMark J Musante	return (os->os_sync);
16055da60bMark J Musante}
16155da60bMark J Musante
162edf345eMatthew Ahrenszfs_logbias_op_t
163e09fa4dNeil Perrindmu_objset_logbias(objset_t *os)
164e09fa4dNeil Perrin{
165e09fa4dNeil Perrin	return (os->os_logbias);
166e09fa4dNeil Perrin}
167e09fa4dNeil Perrin
168fa9e406ahrensstatic void
169fa9e406ahrenschecksum_changed_cb(void *arg, uint64_t newval)
170fa9e406ahrens{
171503ad85Matthew Ahrens	objset_t *os = arg;
172fa9e406ahrens
173fa9e406ahrens	/*
174fa9e406ahrens	 * Inheritance should have been done by now.
175fa9e406ahrens	 */
176fa9e406ahrens	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
177fa9e406ahrens
178503ad85Matthew Ahrens	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
179fa9e406ahrens}
180fa9e406ahrens
181fa9e406ahrensstatic void
182fa9e406ahrenscompression_changed_cb(void *arg, uint64_t newval)
183fa9e406ahrens{
184503ad85Matthew Ahrens	objset_t *os = arg;
185fa9e406ahrens
186fa9e406ahrens	/*
187fa9e406ahrens	 * Inheritance and range checking should have been done by now.
188fa9e406ahrens	 */
189fa9e406ahrens	ASSERT(newval != ZIO_COMPRESS_INHERIT);
190fa9e406ahrens
191db1741fJustin T. Gibbs	os->os_compress = zio_compress_select(os->os_spa, newval,
192db1741fJustin T. Gibbs	    ZIO_COMPRESS_ON);
193fa9e406ahrens}
194fa9e406ahrens
195d0ad202ahrensstatic void
196d0ad202ahrenscopies_changed_cb(void *arg, uint64_t newval)
197d0ad202ahrens{
198503ad85Matthew Ahrens	objset_t *os = arg;
199d0ad202ahrens
200d0ad202ahrens	/*
201d0ad202ahrens	 * Inheritance and range checking should have been done by now.
202d0ad202ahrens	 */
203d0ad202ahrens	ASSERT(newval > 0);
204503ad85Matthew Ahrens	ASSERT(newval <= spa_max_replication(os->os_spa));
205d0ad202ahrens
206503ad85Matthew Ahrens	os->os_copies = newval;
207d0ad202ahrens}
208d0ad202ahrens
2093baa08fekstatic void
210b24ab67Jeff Bonwickdedup_changed_cb(void *arg, uint64_t newval)
211b24ab67Jeff Bonwick{
212b24ab67Jeff Bonwick	objset_t *os = arg;
213b24ab67Jeff Bonwick	spa_t *spa = os->os_spa;
214b24ab67Jeff Bonwick	enum zio_checksum checksum;
215b24ab67Jeff Bonwick
216b24ab67Jeff Bonwick	/*
217b24ab67Jeff Bonwick	 * Inheritance should have been done by now.
218b24ab67Jeff Bonwick	 */
219b24ab67Jeff Bonwick	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
220b24ab67Jeff Bonwick
221b24ab67Jeff Bonwick	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
222b24ab67Jeff Bonwick
223b24ab67Jeff Bonwick	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
224b24ab67Jeff Bonwick	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
225b24ab67Jeff Bonwick}
226b24ab67Jeff Bonwick
227b24ab67Jeff Bonwickstatic void
2283baa08fekprimary_cache_changed_cb(void *arg, uint64_t newval)
2293baa08fek{
230503ad85Matthew Ahrens	objset_t *os = arg;
2313baa08fek
2323baa08fek	/*
2333baa08fek	 * Inheritance and range checking should have been done by now.
2343baa08fek	 */
2353baa08fek	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
2363baa08fek	    newval == ZFS_CACHE_METADATA);
2373baa08fek
238503ad85Matthew Ahrens	os->os_primary_cache = newval;
2393baa08fek}
2403baa08fek
2413baa08fekstatic void
2423baa08feksecondary_cache_changed_cb(void *arg, uint64_t newval)
2433baa08fek{
244503ad85Matthew Ahrens	objset_t *os = arg;
2453baa08fek
2463baa08fek	/*
2473baa08fek	 * Inheritance and range checking should have been done by now.
2483baa08fek	 */
2493baa08fek	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
2503baa08fek	    newval == ZFS_CACHE_METADATA);
2513baa08fek
252503ad85Matthew Ahrens	os->os_secondary_cache = newval;
2533baa08fek}
2543baa08fek
255e09fa4dNeil Perrinstatic void
25655da60bMark J Musantesync_changed_cb(void *arg, uint64_t newval)
25755da60bMark J Musante{
25855da60bMark J Musante	objset_t *os = arg;
25955da60bMark J Musante
26055da60bMark J Musante	/*
26155da60bMark J Musante	 * Inheritance and range checking should have been done by now.
26255da60bMark J Musante	 */
26355da60bMark J Musante	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
26455da60bMark J Musante	    newval == ZFS_SYNC_DISABLED);
26555da60bMark J Musante
26655da60bMark J Musante	os->os_sync = newval;
26755da60bMark J Musante	if (os->os_zil)
26855da60bMark J Musante		zil_set_sync(os->os_zil, newval);
26955da60bMark J Musante}
27055da60bMark J Musante
27155da60bMark J Musantestatic void
272edf345eMatthew Ahrensredundant_metadata_changed_cb(void *arg, uint64_t newval)
273edf345eMatthew Ahrens{
274edf345eMatthew Ahrens	objset_t *os = arg;
275edf345eMatthew Ahrens
276edf345eMatthew Ahrens	/*
277edf345eMatthew Ahrens	 * Inheritance and range checking should have been done by now.
278edf345eMatthew Ahrens	 */
279edf345eMatthew Ahrens	ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
280edf345eMatthew Ahrens	    newval == ZFS_REDUNDANT_METADATA_MOST);
281edf345eMatthew Ahrens
282edf345eMatthew Ahrens	os->os_redundant_metadata = newval;
283edf345eMatthew Ahrens}
284edf345eMatthew Ahrens
285edf345eMatthew Ahrensstatic void
28654811daToomas Soomednodesize_changed_cb(void *arg, uint64_t newval)
28754811daToomas Soome{
28854811daToomas Soome	objset_t *os = arg;
28954811daToomas Soome
29054811daToomas Soome	switch (newval) {
29154811daToomas Soome	case ZFS_DNSIZE_LEGACY:
29254811daToomas Soome		os->os_dnodesize = DNODE_MIN_SIZE;
29354811daToomas Soome		break;
29454811daToomas Soome	case ZFS_DNSIZE_AUTO:
29554811daToomas Soome		/*
29654811daToomas Soome		 * Choose a dnode size that will work well for most
29754811daToomas Soome		 * workloads if the user specified "auto". Future code
29854811daToomas Soome		 * improvements could dynamically select a dnode size
29954811daToomas Soome		 * based on observed workload patterns.
30054811daToomas Soome		 */
30154811daToomas Soome		os->os_dnodesize = DNODE_MIN_SIZE * 2;
30254811daToomas Soome		break;
30354811daToomas Soome	case ZFS_DNSIZE_1K:
30454811daToomas Soome	case ZFS_DNSIZE_2K:
30554811daToomas Soome	case ZFS_DNSIZE_4K:
30654811daToomas Soome	case ZFS_DNSIZE_8K:
30754811daToomas Soome	case ZFS_DNSIZE_16K:
30854811daToomas Soome		os->os_dnodesize = newval;
30954811daToomas Soome		break;
31054811daToomas Soome	}
31154811daToomas Soome}
31254811daToomas Soome
31354811daToomas Soomestatic void
314663207aDon Bradysmallblk_changed_cb(void *arg, uint64_t newval)
315663207aDon Brady{
316663207aDon Brady	objset_t *os = arg;
317663207aDon Brady
318663207aDon Brady	/*
319663207aDon Brady	 * Inheritance and range checking should have been done by now.
320663207aDon Brady	 */
321663207aDon Brady	ASSERT(newval <= SPA_OLD_MAXBLOCKSIZE);
322663207aDon Brady	ASSERT(ISP2(newval));
323663207aDon Brady
324663207aDon Brady	os->os_zpl_special_smallblock = newval;
325663207aDon Brady}
326663207aDon Brady
327663207aDon Bradystatic void
328e09fa4dNeil Perrinlogbias_changed_cb(void *arg, uint64_t newval)
329e09fa4dNeil Perrin{
330e09fa4dNeil Perrin	objset_t *os = arg;
331e09fa4dNeil Perrin
332e09fa4dNeil Perrin	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
333e09fa4dNeil Perrin	    newval == ZFS_LOGBIAS_THROUGHPUT);
334e09fa4dNeil Perrin	os->os_logbias = newval;
335e09fa4dNeil Perrin	if (os->os_zil)
336e09fa4dNeil Perrin		zil_set_logbias(os->os_zil, newval);
337e09fa4dNeil Perrin}
338e09fa4dNeil Perrin
339b515258Matthew Ahrensstatic void
340b515258Matthew Ahrensrecordsize_changed_cb(void *arg, uint64_t newval)
341b515258Matthew Ahrens{
342b515258Matthew Ahrens	objset_t *os = arg;
343b515258Matthew Ahrens
344b515258Matthew Ahrens	os->os_recordsize = newval;
345b515258Matthew Ahrens}
346b515258Matthew Ahrens
347fa9e406ahrensvoid
348fa9e406ahrensdmu_objset_byteswap(void *buf, size_t size)
349fa9e406ahrens{
350fa9e406ahrens	objset_phys_t *osp = buf;
351fa9e406ahrens
352f67950bNasf-Fan	ASSERT(size == OBJSET_PHYS_SIZE_V1 || size == OBJSET_PHYS_SIZE_V2 ||
353f67950bNasf-Fan	    size == sizeof (objset_phys_t));
354fa9e406ahrens	dnode_byteswap(&osp->os_meta_dnode);
355fa9e406ahrens	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
356fa9e406ahrens	osp->os_type = BSWAP_64(osp->os_type);
3571484342Matthew Ahrens	osp->os_flags = BSWAP_64(osp->os_flags);
358f67950bNasf-Fan	if (size >= OBJSET_PHYS_SIZE_V2) {
3591484342Matthew Ahrens		dnode_byteswap(&osp->os_userused_dnode);
3601484342Matthew Ahrens		dnode_byteswap(&osp->os_groupused_dnode);
361f67950bNasf-Fan		if (size >= sizeof (objset_phys_t))
362f67950bNasf-Fan			dnode_byteswap(&osp->os_projectused_dnode);
3631484342Matthew Ahrens	}
364fa9e406ahrens}
365fa9e406ahrens
36694c2d0eMatthew Ahrens/*
36794c2d0eMatthew Ahrens * The hash is a CRC-based hash of the objset_t pointer and the object number.
36894c2d0eMatthew Ahrens */
36994c2d0eMatthew Ahrensstatic uint64_t
37094c2d0eMatthew Ahrensdnode_hash(const objset_t *os, uint64_t obj)
37194c2d0eMatthew Ahrens{
37294c2d0eMatthew Ahrens	uintptr_t osv = (uintptr_t)os;
37394c2d0eMatthew Ahrens	uint64_t crc = -1ULL;
37494c2d0eMatthew Ahrens
37594c2d0eMatthew Ahrens	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
37694c2d0eMatthew Ahrens	/*
37794c2d0eMatthew Ahrens	 * The low 6 bits of the pointer don't have much entropy, because
37894c2d0eMatthew Ahrens	 * the objset_t is larger than 2^6 bytes long.
37994c2d0eMatthew Ahrens	 */
38094c2d0eMatthew Ahrens	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
38194c2d0eMatthew Ahrens	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
38294c2d0eMatthew Ahrens	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
38394c2d0eMatthew Ahrens	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
38494c2d0eMatthew Ahrens
38594c2d0eMatthew Ahrens	crc ^= (osv>>14) ^ (obj>>24);
38694c2d0eMatthew Ahrens
38794c2d0eMatthew Ahrens	return (crc);
38894c2d0eMatthew Ahrens}
38994c2d0eMatthew Ahrens
39094c2d0eMatthew Ahrensunsigned int
39194c2d0eMatthew Ahrensdnode_multilist_index_func(multilist_t *ml, void *obj)
39294c2d0eMatthew Ahrens{
39394c2d0eMatthew Ahrens	dnode_t *dn = obj;
39494c2d0eMatthew Ahrens	return (dnode_hash(dn->dn_objset, dn->dn_object) %
39594c2d0eMatthew Ahrens	    multilist_get_num_sublists(ml));
39694c2d0eMatthew Ahrens}
39794c2d0eMatthew Ahrens
39894c2d0eMatthew Ahrens/*
39994c2d0eMatthew Ahrens * Instantiates the objset_t in-memory structure corresponding to the
40094c2d0eMatthew Ahrens * objset_phys_t that's pointed to by the specified blkptr_t.
40194c2d0eMatthew Ahrens */
402ea8dc4beschrockint
403ea8dc4beschrockdmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
404503ad85Matthew Ahrens    objset_t **osp)
405fa9e406ahrens{
406503ad85Matthew Ahrens	objset_t *os;
407088f389ahrens	int i, err;
408fa9e406ahrens
40991ebeefahrens	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
41091ebeefahrens
411a3874b8Toomas Soome#if 0
4125cabbc6Prashanth Sreenivasa	/*
4135cabbc6Prashanth Sreenivasa	 * The $ORIGIN dataset (if it exists) doesn't have an associated
4145cabbc6Prashanth Sreenivasa	 * objset, so there's no reason to open it. The $ORIGIN dataset
4155cabbc6Prashanth Sreenivasa	 * will not exist on pools older than SPA_VERSION_ORIGIN.
4165cabbc6Prashanth Sreenivasa	 */
4175cabbc6Prashanth Sreenivasa	if (ds != NULL && spa_get_dsl(spa) != NULL &&
4185cabbc6Prashanth Sreenivasa	    spa_get_dsl(spa)->dp_origin_snap != NULL) {
4195cabbc6Prashanth Sreenivasa		ASSERT3P(ds->ds_dir, !=,
4205cabbc6Prashanth Sreenivasa		    spa_get_dsl(spa)->dp_origin_snap->ds_dir);
4215cabbc6Prashanth Sreenivasa	}
422a3874b8Toomas Soome#endif
4235cabbc6Prashanth Sreenivasa
424503ad85Matthew Ahrens	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
425503ad85Matthew Ahrens	os->os_dsl_dataset = ds;
426503ad85Matthew Ahrens	os->os_spa = spa;
427503ad85Matthew Ahrens	os->os_rootbp = bp;
428503ad85Matthew Ahrens	if (!BP_IS_HOLE(os->os_rootbp)) {
4297adb730George Wilson		arc_flags_t aflags = ARC_FLAG_WAIT;
4307802d7bMatthew Ahrens		zbookmark_phys_t zb;
431f67950bNasf-Fan		int size;
432eb63303Tom Caputi		enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
433b24ab67Jeff Bonwick		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
434b24ab67Jeff Bonwick		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
435b24ab67Jeff Bonwick
436503ad85Matthew Ahrens		if (DMU_OS_IS_L2CACHEABLE(os))
4377adb730George Wilson			aflags |= ARC_FLAG_L2CACHE;
438ea8dc4beschrock
439eb63303Tom Caputi		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
440eb63303Tom Caputi			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
441eb63303Tom Caputi			ASSERT(BP_IS_AUTHENTICATED(bp));
442eb63303Tom Caputi			zio_flags |= ZIO_FLAG_RAW;
443eb63303Tom Caputi		}
444eb63303Tom Caputi
445503ad85Matthew Ahrens		dprintf_bp(os->os_rootbp, "reading %s", "");
4461b912ecGeorge Wilson		err = arc_read(NULL, spa, os->os_rootbp,
447503ad85Matthew Ahrens		    arc_getbuf_func, &os->os_phys_buf,
448eb63303Tom Caputi		    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
4493b2aab1Matthew Ahrens		if (err != 0) {
450503ad85Matthew Ahrens			kmem_free(os, sizeof (objset_t));
451b87f3afperrin			/* convert checksum errors into IO errors */
452b87f3afperrin			if (err == ECKSUM)
453be6fd75Matthew Ahrens				err = SET_ERROR(EIO);
454ea8dc4beschrock			return (err);
455ea8dc4beschrock		}
4561484342Matthew Ahrens
457f67950bNasf-Fan		if (spa_version(spa) < SPA_VERSION_USERSPACE)
458f67950bNasf-Fan			size = OBJSET_PHYS_SIZE_V1;
459f67950bNasf-Fan		else if (!spa_feature_is_enabled(spa,
460f67950bNasf-Fan		    SPA_FEATURE_PROJECT_QUOTA))
461f67950bNasf-Fan			size = OBJSET_PHYS_SIZE_V2;
462f67950bNasf-Fan		else
463f67950bNasf-Fan			size = sizeof (objset_phys_t);
464f67950bNasf-Fan
4651484342Matthew Ahrens		/* Increase the blocksize if we are permitted. */
466f67950bNasf-Fan		if (arc_buf_size(os->os_phys_buf) < size) {
4675602294Dan Kimmel			arc_buf_t *buf = arc_alloc_buf(spa, &os->os_phys_buf,
468f67950bNasf-Fan			    ARC_BUFC_METADATA, size);
469f67950bNasf-Fan			bzero(buf->b_data, size);
470503ad85Matthew Ahrens			bcopy(os->os_phys_buf->b_data, buf->b_data,
471503ad85Matthew Ahrens			    arc_buf_size(os->os_phys_buf));
472dcbf3bdGeorge Wilson			arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
473503ad85Matthew Ahrens			os->os_phys_buf = buf;
4741484342Matthew Ahrens		}
4751484342Matthew Ahrens
476503ad85Matthew Ahrens		os->os_phys = os->os_phys_buf->b_data;
477503ad85Matthew Ahrens		os->os_flags = os->os_phys->os_flags;
478fa9e406ahrens	} else {
4791484342Matthew Ahrens		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
480f67950bNasf-Fan		    sizeof (objset_phys_t) : OBJSET_PHYS_SIZE_V1;
4815602294Dan Kimmel		os->os_phys_buf = arc_alloc_buf(spa, &os->os_phys_buf,
4825602294Dan Kimmel		    ARC_BUFC_METADATA, size);
483503ad85Matthew Ahrens		os->os_phys = os->os_phys_buf->b_data;
484503ad85Matthew Ahrens		bzero(os->os_phys, size);
485fa9e406ahrens	}
486fa9e406ahrens
487fa9e406ahrens	/*
488fa9e406ahrens	 * Note: the changed_cb will be called once before the register
489fa9e406ahrens	 * func returns, thus changing the checksum/compression from the
4903baa08fek	 * default (fletcher2/off).  Snapshots don't need to know about
4913baa08fek	 * checksum/compression/copies.
492fa9e406ahrens	 */
4935d7b4d4Matthew Ahrens	if (ds != NULL) {
4949c3fd12Matthew Ahrens		boolean_t needlock = B_FALSE;
4959c3fd12Matthew Ahrens
496eb63303Tom Caputi		os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
497eb63303Tom Caputi
4989c3fd12Matthew Ahrens		/*
4999c3fd12Matthew Ahrens		 * Note: it's valid to open the objset if the dataset is
5009c3fd12Matthew Ahrens		 * long-held, in which case the pool_config lock will not
5019c3fd12Matthew Ahrens		 * be held.
5029c3fd12Matthew Ahrens		 */
5039c3fd12Matthew Ahrens		if (!dsl_pool_config_held(dmu_objset_pool(os))) {
5049c3fd12Matthew Ahrens			needlock = B_TRUE;
5059c3fd12Matthew Ahrens			dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
5069c3fd12Matthew Ahrens		}
507eb63303Tom Caputi
5083b2aab1Matthew Ahrens		err = dsl_prop_register(ds,
5093b2aab1Matthew Ahrens		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
510503ad85Matthew Ahrens		    primary_cache_changed_cb, os);
5113b2aab1Matthew Ahrens		if (err == 0) {
5123b2aab1Matthew Ahrens			err = dsl_prop_register(ds,
5133b2aab1Matthew Ahrens			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
514503ad85Matthew Ahrens			    secondary_cache_changed_cb, os);
5153b2aab1Matthew Ahrens		}
516bc9014eJustin Gibbs		if (!ds->ds_is_snapshot) {
5173b2aab1Matthew Ahrens			if (err == 0) {
5183b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5193b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
520503ad85Matthew Ahrens				    checksum_changed_cb, os);
5213b2aab1Matthew Ahrens			}
5223b2aab1Matthew Ahrens			if (err == 0) {
5233b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5243b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
525503ad85Matthew Ahrens				    compression_changed_cb, os);
5263b2aab1Matthew Ahrens			}
5273b2aab1Matthew Ahrens			if (err == 0) {
5283b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5293b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_COPIES),
530503ad85Matthew Ahrens				    copies_changed_cb, os);
5313b2aab1Matthew Ahrens			}
5323b2aab1Matthew Ahrens			if (err == 0) {
5333b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5343b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_DEDUP),
535b24ab67Jeff Bonwick				    dedup_changed_cb, os);
5363b2aab1Matthew Ahrens			}
5373b2aab1Matthew Ahrens			if (err == 0) {
5383b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5393b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
540e09fa4dNeil Perrin				    logbias_changed_cb, os);
5413b2aab1Matthew Ahrens			}
5423b2aab1Matthew Ahrens			if (err == 0) {
5433b2aab1Matthew Ahrens				err = dsl_prop_register(ds,
5443b2aab1Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_SYNC),
54555da60bMark J Musante				    sync_changed_cb, os);
5463b2aab1Matthew Ahrens			}
547edf345eMatthew Ahrens			if (err == 0) {
548edf345eMatthew Ahrens				err = dsl_prop_register(ds,
549edf345eMatthew Ahrens				    zfs_prop_to_name(
550edf345eMatthew Ahrens				    ZFS_PROP_REDUNDANT_METADATA),
551edf345eMatthew Ahrens				    redundant_metadata_changed_cb, os);
552edf345eMatthew Ahrens			}
553b515258Matthew Ahrens			if (err == 0) {
554b515258Matthew Ahrens				err = dsl_prop_register(ds,
555b515258Matthew Ahrens				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
556b515258Matthew Ahrens				    recordsize_changed_cb, os);
557b515258Matthew Ahrens			}
55854811daToomas Soome			if (err == 0) {
55954811daToomas Soome				err = dsl_prop_register(ds,
56054811daToomas Soome				    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
56154811daToomas Soome				    dnodesize_changed_cb, os);
56254811daToomas Soome			}
563663207aDon Brady			if (err == 0) {
564663207aDon Brady				err = dsl_prop_register(ds,
565663207aDon Brady				    zfs_prop_to_name(
566663207aDon Brady				    ZFS_PROP_SPECIAL_SMALL_BLOCKS),
567663207aDon Brady				    smallblk_changed_cb, os);
568663207aDon Brady			}
5693baa08fek		}
5709c3fd12Matthew Ahrens		if (needlock)
5719c3fd12Matthew Ahrens			dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
5723b2aab1Matthew Ahrens		if (err != 0) {
573dcbf3bdGeorge Wilson			arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
574503ad85Matthew Ahrens			kmem_free(os, sizeof (objset_t));
575ea8dc4beschrock			return (err);
576ea8dc4beschrock		}
5775d7b4d4Matthew Ahrens	} else {
578fa9e406ahrens		/* It's the meta-objset. */
579503ad85Matthew Ahrens		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
580db1741fJustin T. Gibbs		os->os_compress = ZIO_COMPRESS_ON;
581eb63303Tom Caputi		os->os_encrypted = B_FALSE;
582503ad85Matthew Ahrens		os->os_copies = spa_max_replication(spa);
583b24ab67Jeff Bonwick		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
584edf345eMatthew Ahrens		os->os_dedup_verify = B_FALSE;
585edf345eMatthew Ahrens		os->os_logbias = ZFS_LOGBIAS_LATENCY;
586edf345eMatthew Ahrens		os->os_sync = ZFS_SYNC_STANDARD;
587503ad85Matthew Ahrens		os->os_primary_cache = ZFS_CACHE_ALL;
588503ad85Matthew Ahrens		os->os_secondary_cache = ZFS_CACHE_ALL;
58954811daToomas Soome		os->os_dnodesize = DNODE_MIN_SIZE;
590fa9e406ahrens	}
591adb52d9Matthew Ahrens	/*
592adb52d9Matthew Ahrens	 * These properties will be filled in by the logic in zfs_get_zplprop()
593adb52d9Matthew Ahrens	 * when they are queried for the first time.
594adb52d9Matthew Ahrens	 */
595adb52d9Matthew Ahrens	os->os_version = OBJSET_PROP_UNINITIALIZED;
596adb52d9Matthew Ahrens	os->os_normalization = OBJSET_PROP_UNINITIALIZED;
597adb52d9Matthew Ahrens	os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
598adb52d9Matthew Ahrens	os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
599fa9e406ahrens
600bc9014eJustin Gibbs	if (ds == NULL || !ds->ds_is_snapshot)
6016e0cbcaMatthew Ahrens		os->os_zil_header = os->os_phys->os_zil_header;
602503ad85Matthew Ahrens	os->os_zil = zil_alloc(os, &os->os_zil_header);
603fa9e406ahrens
604fa9e406ahrens	for (i = 0; i < TXG_SIZE; i++) {
60594c2d0eMatthew Ahrens		os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
60694c2d0eMatthew Ahrens		    offsetof(dnode_t, dn_dirty_link[i]),
60794c2d0eMatthew Ahrens		    dnode_multilist_index_func);
608fa9e406ahrens	}
609503ad85Matthew Ahrens	list_create(&os->os_dnodes, sizeof (dnode_t),
610fa9e406ahrens	    offsetof(dnode_t, dn_link));
611503ad85Matthew Ahrens	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
612fa9e406ahrens	    offsetof(dmu_buf_impl_t, db_link));
613fa9e406ahrens
614503ad85Matthew Ahrens	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
61594c2d0eMatthew Ahrens	mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
616503ad85Matthew Ahrens	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
617503ad85Matthew Ahrens	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
61854811daToomas Soome	os->os_obj_next_percpu_len = boot_ncpus;
61954811daToomas Soome	os->os_obj_next_percpu = kmem_zalloc(os->os_obj_next_percpu_len *
62054811daToomas Soome	    sizeof (os->os_obj_next_percpu[0]), KM_SLEEP);
621503ad85Matthew Ahrens
622bc9014eJustin Gibbs	dnode_special_open(os, &os->os_phys->os_meta_dnode,
623bc9014eJustin Gibbs	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
624f67950bNasf-Fan	if (OBJSET_BUF_HAS_USERUSED(os->os_phys_buf)) {
625bc9014eJustin Gibbs		dnode_special_open(os, &os->os_phys->os_userused_dnode,
626bc9014eJustin Gibbs		    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
627bc9014eJustin Gibbs		dnode_special_open(os, &os->os_phys->os_groupused_dnode,
628bc9014eJustin Gibbs		    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
629f67950bNasf-Fan		if (OBJSET_BUF_HAS_PROJECTUSED(os->os_phys_buf))
630f67950bNasf-Fan			dnode_special_open(os,
631f67950bNasf-Fan			    &os->os_phys->os_projectused_dnode,
632f67950bNasf-Fan			    DMU_PROJECTUSED_OBJECT, &os->os_projectused_dnode);
6331484342Matthew Ahrens	}
634fa9e406ahrens
635f67950bNasf-Fan	mutex_init(&os->os_upgrade_lock, NULL, MUTEX_DEFAULT, NULL);
636f67950bNasf-Fan
637503ad85Matthew Ahrens	*osp = os;
638ea8dc4beschrock	return (0);
639fa9e406ahrens}
640fa9e406ahrens
641503ad85Matthew Ahrensint
642503ad85Matthew Ahrensdmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
6433cb34c6ahrens{
644503ad85Matthew Ahrens	int err = 0;
6453cb34c6ahrens
6469c3fd12Matthew Ahrens	/*
6479c3fd12Matthew Ahrens	 * We shouldn't be doing anything with dsl_dataset_t's unless the
6489c3fd12Matthew Ahrens	 * pool_config lock is held, or the dataset is long-held.
6499c3fd12Matthew Ahrens	 */
6509c3fd12Matthew Ahrens	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
6519c3fd12Matthew Ahrens	    dsl_dataset_long_held(ds));
6529c3fd12Matthew Ahrens
6533cb34c6ahrens	mutex_enter(&ds->ds_opening_lock);
6545d7b4d4Matthew Ahrens	if (ds->ds_objset == NULL) {
6555d7b4d4Matthew Ahrens		objset_t *os;
656c166b69Paul Dagnelie		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
6573cb34c6ahrens		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
6585d7b4d4Matthew Ahrens		    ds, dsl_dataset_get_blkptr(ds), &os);
659c166b69Paul Dagnelie		rrw_exit(&ds->ds_bp_rwlock, FTAG);
6605d7b4d4Matthew Ahrens
6615d7b4d4Matthew Ahrens		if (err == 0) {
6625d7b4d4Matthew Ahrens			mutex_enter(&ds->ds_lock);
6635d7b4d4Matthew Ahrens			ASSERT(ds->ds_objset == NULL);
6645d7b4d4Matthew Ahrens			ds->ds_objset = os;
6655d7b4d4Matthew Ahrens			mutex_exit(&ds->ds_lock);
6665d7b4d4Matthew Ahrens		}
6673cb34c6ahrens	}
6685d7b4d4Matthew Ahrens	*osp = ds->ds_objset;
6693cb34c6ahrens	mutex_exit(&ds->ds_opening_lock);
670503ad85Matthew Ahrens	return (err);
6713cb34c6ahrens}
6723cb34c6ahrens
6733b2aab1Matthew Ahrens/*
6743b2aab1Matthew Ahrens * Holds the pool while the objset is held.  Therefore only one objset
6753b2aab1Matthew Ahrens * can be held at a time.
6763b2aab1Matthew Ahrens */
6773cb34c6ahrensint
678eb63303Tom Caputidmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
679eb63303Tom Caputi    objset_t **osp)
6803cb34c6ahrens{
6813b2aab1Matthew Ahrens	dsl_pool_t *dp;
682503ad85Matthew Ahrens	dsl_dataset_t *ds;
6833cb34c6ahrens	int err;
684eb63303Tom Caputi	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
6853cb34c6ahrens
6863b2aab1Matthew Ahrens	err = dsl_pool_hold(name, tag, &dp);
6873b2aab1Matthew Ahrens	if (err != 0)
6883b2aab1Matthew Ahrens		return (err);
689eb63303Tom Caputi	err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
6903b2aab1Matthew Ahrens	if (err != 0) {
6913b2aab1Matthew Ahrens		dsl_pool_rele(dp, tag);
692503ad85Matthew Ahrens		return (err);
6933b2aab1Matthew Ahrens	}
694503ad85Matthew Ahrens
695503ad85Matthew Ahrens	err = dmu_objset_from_ds(ds, osp);
6963b2aab1Matthew Ahrens	if (err != 0) {
697503ad85Matthew Ahrens		dsl_dataset_rele(ds, tag);
6983b2aab1Matthew Ahrens		dsl_pool_rele(dp, tag);
6993b2aab1Matthew Ahrens	}
700503ad85Matthew Ahrens
7013cb34c6ahrens	return (err);
7023cb34c6ahrens}
7033cb34c6ahrens
704eb63303Tom Caputiint
705eb63303Tom Caputidmu_objset_hold(const char *name, void *tag, objset_t **osp)
706eb63303Tom Caputi{
707eb63303Tom Caputi	return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
708eb63303Tom Caputi}
709eb63303Tom Caputi
710eb63303Tom Caputi/* ARGSUSED */
71112380e1Arne Jansenstatic int
71212380e1Arne Jansendmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
713eb63303Tom Caputi    boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
71412380e1Arne Jansen{
71512380e1Arne Jansen	int err;
71612380e1Arne Jansen
71712380e1Arne Jansen	err = dmu_objset_from_ds(ds, osp);
71812380e1Arne Jansen	if (err != 0) {
719eb63303Tom Caputi		return (err);
72012380e1Arne Jansen	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
72112380e1Arne Jansen		return (SET_ERROR(EINVAL));
72212380e1Arne Jansen	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
723eb63303Tom Caputi		return (SET_ERROR(EROFS));
724eb63303Tom Caputi	} else if (!readonly && decrypt &&
725eb63303Tom Caputi	    dsl_dir_incompatible_encryption_version(ds->ds_dir)) {
72612380e1Arne Jansen		return (SET_ERROR(EROFS));
72712380e1Arne Jansen	}
728eb63303Tom Caputi
729eb63303Tom Caputi	/* if we are decrypting, we can now check MACs in os->os_phys_buf */
730eb63303Tom Caputi	if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
731eb63303Tom Caputi		zbookmark_phys_t zb;
732eb63303Tom Caputi
733eb63303Tom Caputi		SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
734